Spaces:

yagnik12
/

AI_Text_Detecter

Running

App Files Files Community

yagnik12 commited on Sep 12

Commit

d4db00e

verified ·

1 Parent(s): e7d2c96

Update ai_text_detector_valid_final.py

Browse files

Files changed (1) hide show

ai_text_detector_valid_final.py +72 -67

ai_text_detector_valid_final.py CHANGED Viewed

@@ -1,86 +1,91 @@
-import os
 import torch
-from transformers import AutoTokenizer, AutoModelForSequenceClassification
-import requests
 import numpy as np
-# Hugging Face Token
-HF_TOKEN = os.getenv("HF_TOKEN")  # Hugging Face token (optional if space is public)
-SZEGEDAI_URL = "https://hf.space/embed/SzegedAI/AI_Detector/api/predict/"
-HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
-# Headers for API
-headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
 # Multiple AI text detection models
-MODELS = {
     "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
     "MonkeyDAnh":"MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
-    "Andreas122001":"andreas122001/roberta-academic-detector"
-    # SzegedAI handled separately since it's a Space
 }
-def run_hf_model(model_id, text):
-    """Run HuggingFace transformer models"""
-    try:
-        tokenizer = AutoTokenizer.from_pretrained(model_id)
-        model = AutoModelForSequenceClassification.from_pretrained(model_id)
-        inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
-        with torch.no_grad():
-            logits = model(**inputs).logits
-        probs = torch.softmax(logits, dim=-1).cpu().numpy()[0]
-        return {"Human Probability": float(probs[0]*100), "AI Probability": float(probs[1]*100)}
-    except Exception as e:
-        return {"error": str(e)}
-def szegedai_predict(text):
-    try:
-        payload = {"data": [text]}
-        response = requests.post(SZEGEDAI_URL, json=payload, headers=HEADERS, timeout=30)
-        response.raise_for_status()
-        result = response.json()
-        raw = result["data"][0]  # e.g. "Human Probability: 99.83% | AI Probability: 0.17%"
-        human_match = re.search(r"Human[^0-9]*([\d.]+)%", raw)
-        ai_match = re.search(r"AI[^0-9]*([\d.]+)%", raw)
-        if human_match and ai_match:
-            human_prob = float(human_match.group(1))
-            ai_prob = float(ai_match.group(1))
-            return {
-                "Human Probability": round(human_prob, 2),
-                "AI Probability": round(ai_prob, 2),
-            }
-        else:
-            return {"error": f"Unexpected response: {raw}"}
-    except Exception as e:
-        return {"error": str(e)}
 def detect_text(text):
     results = {}
-    # Transformers models
     for name, model_id in MODELS.items():
-        results[name] = run_hf_model(model_id, text)
-    # SzegedAI (Space)
-    results["SzegedAI Detector"] = szegedai_predict(text)
-    # Final verdict (simple rule-based)
-    ai_probs = []
-    for v in results.values():
-        if "AI Probability" in v:
-            ai_probs.append(v["AI Probability"])
-    avg_ai = np.mean(ai_probs) if ai_probs else 0
-    if avg_ai > 80:
-        verdict = "Likely AI-generated"
-    elif avg_ai > 40:
-        verdict = "Possibly human-written with AI assistance"
-    else:
-        verdict = "Likely human-written"
-    results["Final Score"] = {"Verdict": verdict}
     return results
 if __name__ == "__main__":
-    sample = "This is a test sentence written by AI or human."
-    print(detect_text(sample))

 import torch
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 import numpy as np
 # Multiple AI text detection models
+MODELS = {
     "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
     "MonkeyDAnh":"MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
+    "Andreas122001":"andreas122001/roberta-academic-detector",
+    "roberta-mnli": "roberta-large-mnli"
 }
+def load_model(model_id):
+    tokenizer = AutoTokenizer.from_pretrained(model_id)
+    model = AutoModelForSequenceClassification.from_pretrained(model_id)
+    return tokenizer, model
+def predict(text, tokenizer, model):
+    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
+    with torch.no_grad():
+        outputs = model(**inputs)
+        probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
+    return probs[0].numpy()  # [human_prob, ai_prob]
+def verdict(ai_prob):
+    """Return a human-readable verdict based on AI probability"""
+    if ai_prob < 20:
+        return "Most likely human-written."
+    elif 20 <= ai_prob < 40:
+        return "Possibly human-written with minimal AI assistance."
+    elif 40 <= ai_prob < 60:
+        return "Unclear – could be either human or AI-assisted."
+    elif 60 <= ai_prob < 80:
+        return "Possibly AI-generated, or a human using AI assistance."
+    else:  # ai_prob >= 80
+        return "Likely AI-generated or heavily AI-assisted."
 def detect_text(text):
     results = {}
+    ai_scores = []
     for name, model_id in MODELS.items():
+        try:
+            tokenizer, model = load_model(model_id)
+            probs = predict(text, tokenizer, model)
+            human_score, ai_score = probs
+            results[name] = {
+                "Human Probability": round(float(human_score) * 100, 2),
+                "AI Probability": round(float(ai_score) * 100, 2),
+            }
+            ai_scores.append(ai_score)
+        except Exception as e:
+            results[name] = {"error": str(e)}
+    # ------------------ Final Score (Average) ------------------
+    try:
+        ai_scores, human_scores = [], []
+        for r in results.values():
+            if isinstance(r, dict) and "AI Probability" in r and "Human Probability" in r:
+                ai_scores.append(r["AI Probability"])
+                human_scores.append(r["Human Probability"])
+        if ai_scores and human_scores:
+            avg_ai = sum(ai_scores) / len(ai_scores)
+            avg_human = sum(human_scores) / len(human_scores)
+            results["Final Score"] = {
+                # "Human Probability (average)": float(round(avg_human, 2)),
+                # "AI Probability (average)": float(round(avg_ai, 2))
+                # "Verdict": verdict(avg_ai)
+                verdict(avg_ai)
+            }
+    except Exception as e:
+        results["Final Score"] = {"error": str(e)}
     return results
 if __name__ == "__main__":
+    text = input("Enter text to analyze:\n")
+    output = detect_text(text)
+    print("\n--- Detection Results ---")
+    for model, scores in output.items():
+        print(f"\n[{model}]")
+        for k, v in scores.items():
+            if isinstance(v, (int, float)):  # only add % for numeric values
+                print(f"{k}: {v}%")
+            else:
+                print(f"{k}: {v}")