Spaces:

yagnik12
/

AI_Text_Detecter

Running

App Files Files Community

yagnik12 commited on Sep 12

Commit

13a1290

verified ·

1 Parent(s): 6c5caaa

Update ai_text_detector_valid_final.py

Browse files

Files changed (1) hide show

ai_text_detector_valid_final.py +56 -52

ai_text_detector_valid_final.py CHANGED Viewed

@@ -28,21 +28,39 @@ model_3 = AutoModelForSequenceClassification.from_pretrained("answerdotai/Modern
 model_3.load_state_dict(torch.hub.load_state_dict_from_url(model3_path, map_location=device))
 model_3.to(device).eval()
-label_mapping = { ... }  # keep as is
-# ---------------------------
-# Text Cleaning
-# ---------------------------
 def clean_text(text: str) -> str:
     text = text.replace("\xa0", " ").replace("\u200b", "")
     text = re.sub(r"\s{2,}", " ", text)
     text = re.sub(r"\s+([,.;:?!])", r"\1", text)
     return text.strip()
-# ---------------------------
-# SzegedAI Detector
-# ---------------------------
 def classify_szegedai(text: str):
     cleaned_text = clean_text(text)
     if not cleaned_text.strip():
         return {"error": "Empty text"}
@@ -60,7 +78,7 @@ def classify_szegedai(text: str):
     probs = probs[0]
     ai_probs = probs.clone()
-    ai_probs[24] = 0  # "human"
     ai_total_prob = ai_probs.sum().item() * 100
     human_prob = 100 - ai_total_prob
@@ -74,9 +92,10 @@ def classify_szegedai(text: str):
     }
 # ---------------------------
-# HuggingFace Detectors
 # ---------------------------
 MODELS = {
     "MonkeyDAnh": "MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
     # "Andreas122001": "andreas122001/roberta-academic-detector",
 }
@@ -94,9 +113,10 @@ def run_hf_model(model_id, text):
         return {"error": str(e)}
 # ---------------------------
-# Verdict Logic
 # ---------------------------
 def verdict(ai_prob):
     if ai_prob < 20:
         return "Most likely human-written."
     elif 20 <= ai_prob < 40:
@@ -105,61 +125,45 @@ def verdict(ai_prob):
         return "Unclear – could be either human or AI-assisted."
     elif 60 <= ai_prob < 80:
         return "Possibly AI-generated, or a human using AI assistance."
-    else:
         return "Likely AI-generated or heavily AI-assisted."
-# ---------------------------
-# Weighted Final Score
-# ---------------------------
-def compute_final_score(results: dict) -> dict:
-    weighted_ai_probs = []
-    weighted_human_probs = []
-    weights = []
-    for model, scores in results.items():
-        if model == "Final Score" or "AI Probability" not in scores:
-            continue
-        ai_prob = scores.get("AI Probability", 0.0)
-        human_prob = scores.get("Human Probability", 0.0)
-        weight = 0.5 if model == "SzegedAI Detector" else 1.0
-        weighted_ai_probs.append(ai_prob * weight)
-        weighted_human_probs.append(human_prob * weight)
-        weights.append(weight)
-    if not weights:
-        avg_ai_prob = 0
-        avg_human_prob = 100
     else:
-        avg_ai_prob = sum(weighted_ai_probs) / sum(weights)
-        avg_human_prob = sum(weighted_human_probs) / sum(weights)
-    verdict_text = verdict(avg_ai_prob)
     results["Final Score"] = {
-        "Human Probability": round(avg_human_prob, 2),
-        "AI Probability": round(avg_ai_prob, 2),
-        "Verdict": verdict_text
     }
     return results
-# ---------------------------
-# Main Detector
-# ---------------------------
-def detect_text(text):
-    results = {}
-    for name, model_id in MODELS.items():
-        results[name] = run_hf_model(model_id, text)
-    results["SzegedAI Detector"] = classify_szegedai(text)
-    # compute weighted final score
-    results = compute_final_score(results)
-    return results
 if __name__ == "__main__":
     sample = "This is a test sentence written by AI or human."
     print(detect_text(sample))

 model_3.load_state_dict(torch.hub.load_state_dict_from_url(model3_path, map_location=device))
 model_3.to(device).eval()
+label_mapping = {
+    0: '13B', 1: '30B', 2: '65B', 3: '7B', 4: 'GLM130B', 5: 'bloom_7b',
+    6: 'bloomz', 7: 'cohere', 8: 'davinci', 9: 'dolly', 10: 'dolly-v2-12b',
+    11: 'flan_t5_base', 12: 'flan_t5_large', 13: 'flan_t5_small',
+    14: 'flan_t5_xl', 15: 'flan_t5_xxl', 16: 'gemma-7b-it', 17: 'gemma2-9b-it',
+    18: 'gpt-3.5-turbo', 19: 'gpt-35', 20: 'gpt4', 21: 'gpt4o',
+    22: 'gpt_j', 23: 'gpt_neox', 24: 'human', 25: 'llama3-70b', 26: 'llama3-8b',
+    27: 'mixtral-8x7b', 28: 'opt_1.3b', 29: 'opt_125m', 30: 'opt_13b',
+    31: 'opt_2.7b', 32: 'opt_30b', 33: 'opt_350m', 34: 'opt_6.7b',
+    35: 'opt_iml_30b', 36: 'opt_iml_max_1.3b', 37: 't0_11b', 38: 't0_3b',
+    39: 'text-davinci-002', 40: 'text-davinci-003'
+}
+# def clean_text(text: str) -> str:
+#     text = re.sub(r"\s{2,}", " ", text)
+#     text = re.sub(r"\s+([,.;:?!])", r"\1", text)
+#     return text
 def clean_text(text: str) -> str:
+    # Normalize non-breaking spaces to normal space
     text = text.replace("\xa0", " ").replace("\u200b", "")
+    # Collapse multiple spaces
     text = re.sub(r"\s{2,}", " ", text)
+    # Remove space before punctuation
     text = re.sub(r"\s+([,.;:?!])", r"\1", text)
+    # Trim leading/trailing spaces
     return text.strip()
 def classify_szegedai(text: str):
+    """ModernBERT ensemble detector (replaces SzegedAI Space call)."""
     cleaned_text = clean_text(text)
     if not cleaned_text.strip():
         return {"error": "Empty text"}
     probs = probs[0]
     ai_probs = probs.clone()
+    ai_probs[24] = 0  # "human" label index
     ai_total_prob = ai_probs.sum().item() * 100
     human_prob = 100 - ai_total_prob
     }
 # ---------------------------
+# Your Other Detectors
 # ---------------------------
 MODELS = {
+    # "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
     "MonkeyDAnh": "MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
     # "Andreas122001": "andreas122001/roberta-academic-detector",
 }
         return {"error": str(e)}
 # ---------------------------
+# Main Detector
 # ---------------------------
 def verdict(ai_prob):
+    """Return a human-readable verdict based on AI probability"""
     if ai_prob < 20:
         return "Most likely human-written."
     elif 20 <= ai_prob < 40:
         return "Unclear – could be either human or AI-assisted."
     elif 60 <= ai_prob < 80:
         return "Possibly AI-generated, or a human using AI assistance."
+    else:  # ai_prob >= 80
         return "Likely AI-generated or heavily AI-assisted."
+def detect_text(text):
+    results = {}
+    # HuggingFace transformer models
+    for name, model_id in MODELS.items():
+        results[name] = run_hf_model(model_id, text)
+    # SzegedAI ModernBERT ensemble
+    results["SzegedAI Detector"] = classify_szegedai(text)
+    # ---------------------------
+    # Final Verdict (Hybrid Rule)
+    # ---------------------------
+    ai_probs = []
+    strong_ai_detector = None
+    for k, v in results.items():
+        if "AI Probability" in v:
+            ai_probs.append(v["AI Probability"])
+            if v["AI Probability"] > 90:   # strong AI flag
+                strong_ai_detector = v
+    avg_ai = np.mean(ai_probs) if ai_probs else 0
+    if strong_ai_detector:
+        final_verdict = verdict(strong_ai_detector["AI Probability"])
+        if "Identified LLM" in strong_ai_detector:
+            final_verdict += f" (Identified: {strong_ai_detector['Identified LLM']})"
     else:
+        final_verdict = verdict(avg_ai)
     results["Final Score"] = {
+        "Verdict": final_verdict,
+        # "Average AI Probability": round(avg_ai, 2)
     }
     return results
 if __name__ == "__main__":
     sample = "This is a test sentence written by AI or human."
     print(detect_text(sample))