Spaces:

yagnik12
/

AI_Text_Detecter

Running

App Files Files Community

yagnik12 commited on Sep 12

Commit

6c5caaa

verified ·

1 Parent(s): d930eea

Update ai_text_detector_valid_final.py

Browse files

Files changed (1) hide show

ai_text_detector_valid_final.py +52 -58

ai_text_detector_valid_final.py CHANGED Viewed

@@ -28,40 +28,21 @@ model_3 = AutoModelForSequenceClassification.from_pretrained("answerdotai/Modern
 model_3.load_state_dict(torch.hub.load_state_dict_from_url(model3_path, map_location=device))
 model_3.to(device).eval()
-label_mapping = {
-    0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
-    6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
-    11: 'flan_t5_base', 12: 'flan_t5_large', 13: 'flan_t5_small',
-    14: 'flan_t5_xl', 15: 'flan_t5_xxl', 16: 'gemma-7b-it', 17: 'gemma2-9b-it',
-    18: 'gpt-3.5-turbo', 19: 'gpt-35', 20: 'gpt4', 21: 'gpt4o',
-    22: 'gpt_j', 23: 'gpt_neox', 24: 'human', 25: 'llama3-70b', 26: 'llama3-8b',
-    27: 'mixtral-8x7b', 28: 'opt_1.3b', 29: 'opt_125m', 30: 'opt_13b',
-    31: 'opt_2.7b', 32: 'opt_30b', 33: 'opt_350m', 34: 'opt_6.7b',
-    35: 'opt_iml_30b', 36: 'opt_iml_max_1.3b', 37: 't0_11b', 38: 't0_3b',
-    39: 'text-davinci-002', 40: 'text-davinci-003'
-}
-# def clean_text(text: str) -> str:
-#     text = re.sub(r"\s{2,}", " ", text)
-#     text = re.sub(r"\s+([,.;:?!])", r"\1", text)
-#     return text
 def clean_text(text: str) -> str:
-    # Normalize non-breaking spaces to normal space
     text = text.replace("\xa0", " ").replace("\u200b", "")
-    # Collapse multiple spaces
     text = re.sub(r"\s{2,}", " ", text)
-    # Remove space before punctuation
     text = re.sub(r"\s+([,.;:?!])", r"\1", text)
-    # Trim leading/trailing spaces
     return text.strip()
 def classify_szegedai(text: str):
-    """ModernBERT ensemble detector (replaces SzegedAI Space call)."""
     cleaned_text = clean_text(text)
     if not cleaned_text.strip():
         return {"error": "Empty text"}
@@ -79,7 +60,7 @@ def classify_szegedai(text: str):
     probs = probs[0]
     ai_probs = probs.clone()
-    ai_probs[24] = 0  # "human" label index
     ai_total_prob = ai_probs.sum().item() * 100
     human_prob = 100 - ai_total_prob
@@ -93,10 +74,9 @@ def classify_szegedai(text: str):
     }
 # ---------------------------
-# Your Other Detectors
 # ---------------------------
 MODELS = {
-    # "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
     "MonkeyDAnh": "MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
     # "Andreas122001": "andreas122001/roberta-academic-detector",
 }
@@ -114,10 +94,9 @@ def run_hf_model(model_id, text):
         return {"error": str(e)}
 # ---------------------------
-# Main Detector
 # ---------------------------
 def verdict(ai_prob):
-    """Return a human-readable verdict based on AI probability"""
     if ai_prob < 20:
         return "Most likely human-written."
     elif 20 <= ai_prob < 40:
@@ -126,46 +105,61 @@ def verdict(ai_prob):
         return "Unclear – could be either human or AI-assisted."
     elif 60 <= ai_prob < 80:
         return "Possibly AI-generated, or a human using AI assistance."
-    else:  # ai_prob >= 80
         return "Likely AI-generated or heavily AI-assisted."
-def detect_text(text):
-    results = {}
-    # HuggingFace transformer models
-    for name, model_id in MODELS.items():
-        results[name] = run_hf_model(model_id, text)
-    # SzegedAI ModernBERT ensemble
-    results["SzegedAI Detector"] = classify_szegedai(text)
-    # ---------------------------
-    # Final Verdict (Hybrid Rule)
-    # ---------------------------
-    ai_probs = []
-    strong_ai_detector = None
-    for k, v in results.items():
-        if "AI Probability" in v:
-            ai_probs.append(v["AI Probability"])
-            if v["AI Probability"] > 90:   # strong AI flag
-                strong_ai_detector = v
-    avg_ai = np.mean(ai_probs) if ai_probs else 0
-    if strong_ai_detector:
-        final_verdict = verdict(strong_ai_detector["AI Probability"])
-        if "Identified LLM" in strong_ai_detector:
-            final_verdict += f" (Identified: {strong_ai_detector['Identified LLM']})"
     else:
-        final_verdict = verdict(avg_ai)
     results["Final Score"] = {
-        "Verdict": final_verdict,
-        # "Average AI Probability": round(avg_ai, 2)
     }
     return results
 if __name__ == "__main__":
     sample = "This is a test sentence written by AI or human."
     print(detect_text(sample))

 model_3.load_state_dict(torch.hub.load_state_dict_from_url(model3_path, map_location=device))
 model_3.to(device).eval()
+label_mapping = { ... }  # keep as is
+# ---------------------------
+# Text Cleaning
+# ---------------------------
 def clean_text(text: str) -> str:
     text = text.replace("\xa0", " ").replace("\u200b", "")
     text = re.sub(r"\s{2,}", " ", text)
     text = re.sub(r"\s+([,.;:?!])", r"\1", text)
     return text.strip()
+# ---------------------------
+# SzegedAI Detector
+# ---------------------------
 def classify_szegedai(text: str):
     cleaned_text = clean_text(text)
     if not cleaned_text.strip():
         return {"error": "Empty text"}
     probs = probs[0]
     ai_probs = probs.clone()
+    ai_probs[24] = 0  # "human"
     ai_total_prob = ai_probs.sum().item() * 100
     human_prob = 100 - ai_total_prob
     }
 # ---------------------------
+# HuggingFace Detectors
 # ---------------------------
 MODELS = {
     "MonkeyDAnh": "MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
     # "Andreas122001": "andreas122001/roberta-academic-detector",
 }
         return {"error": str(e)}
 # ---------------------------
+# Verdict Logic
 # ---------------------------
 def verdict(ai_prob):
     if ai_prob < 20:
         return "Most likely human-written."
     elif 20 <= ai_prob < 40:
         return "Unclear – could be either human or AI-assisted."
     elif 60 <= ai_prob < 80:
         return "Possibly AI-generated, or a human using AI assistance."
+    else:
         return "Likely AI-generated or heavily AI-assisted."
+# ---------------------------
+# Weighted Final Score
+# ---------------------------
+def compute_final_score(results: dict) -> dict:
+    weighted_ai_probs = []
+    weighted_human_probs = []
+    weights = []
+    for model, scores in results.items():
+        if model == "Final Score" or "AI Probability" not in scores:
+            continue
+        ai_prob = scores.get("AI Probability", 0.0)
+        human_prob = scores.get("Human Probability", 0.0)
+        weight = 0.5 if model == "SzegedAI Detector" else 1.0
+        weighted_ai_probs.append(ai_prob * weight)
+        weighted_human_probs.append(human_prob * weight)
+        weights.append(weight)
+    if not weights:
+        avg_ai_prob = 0
+        avg_human_prob = 100
     else:
+        avg_ai_prob = sum(weighted_ai_probs) / sum(weights)
+        avg_human_prob = sum(weighted_human_probs) / sum(weights)
+    verdict_text = verdict(avg_ai_prob)
     results["Final Score"] = {
+        "Human Probability": round(avg_human_prob, 2),
+        "AI Probability": round(avg_ai_prob, 2),
+        "Verdict": verdict_text
     }
     return results
+# ---------------------------
+# Main Detector
+# ---------------------------
+def detect_text(text):
+    results = {}
+    for name, model_id in MODELS.items():
+        results[name] = run_hf_model(model_id, text)
+    results["SzegedAI Detector"] = classify_szegedai(text)
+    # compute weighted final score
+    results = compute_final_score(results)
+    return results
 if __name__ == "__main__":
     sample = "This is a test sentence written by AI or human."
     print(detect_text(sample))