yagnik12 commited on
Commit
d4db00e
·
verified ·
1 Parent(s): e7d2c96

Update ai_text_detector_valid_final.py

Browse files
Files changed (1) hide show
  1. ai_text_detector_valid_final.py +72 -67
ai_text_detector_valid_final.py CHANGED
@@ -1,86 +1,91 @@
1
- import os
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
4
- import requests
5
  import numpy as np
6
 
7
- # Hugging Face Token
8
- HF_TOKEN = os.getenv("HF_TOKEN") # Hugging Face token (optional if space is public)
9
- SZEGEDAI_URL = "https://hf.space/embed/SzegedAI/AI_Detector/api/predict/"
10
- HEADERS = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
11
-
12
- # Headers for API
13
- headers = {"Authorization": f"Bearer {HF_TOKEN}"} if HF_TOKEN else {}
14
-
15
  # Multiple AI text detection models
16
- MODELS = {
17
  "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
18
  "MonkeyDAnh":"MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
19
- "Andreas122001":"andreas122001/roberta-academic-detector"
20
- # SzegedAI handled separately since it's a Space
21
  }
 
 
 
 
 
22
 
23
- def run_hf_model(model_id, text):
24
- """Run HuggingFace transformer models"""
25
- try:
26
- tokenizer = AutoTokenizer.from_pretrained(model_id)
27
- model = AutoModelForSequenceClassification.from_pretrained(model_id)
28
- inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
29
- with torch.no_grad():
30
- logits = model(**inputs).logits
31
- probs = torch.softmax(logits, dim=-1).cpu().numpy()[0]
32
- return {"Human Probability": float(probs[0]*100), "AI Probability": float(probs[1]*100)}
33
- except Exception as e:
34
- return {"error": str(e)}
35
- def szegedai_predict(text):
36
- try:
37
- payload = {"data": [text]}
38
- response = requests.post(SZEGEDAI_URL, json=payload, headers=HEADERS, timeout=30)
39
- response.raise_for_status()
40
- result = response.json()
41
 
42
- raw = result["data"][0] # e.g. "Human Probability: 99.83% | AI Probability: 0.17%"
43
 
44
- human_match = re.search(r"Human[^0-9]*([\d.]+)%", raw)
45
- ai_match = re.search(r"AI[^0-9]*([\d.]+)%", raw)
 
 
 
 
 
 
 
 
 
 
46
 
47
- if human_match and ai_match:
48
- human_prob = float(human_match.group(1))
49
- ai_prob = float(ai_match.group(1))
50
- return {
51
- "Human Probability": round(human_prob, 2),
52
- "AI Probability": round(ai_prob, 2),
53
- }
54
- else:
55
- return {"error": f"Unexpected response: {raw}"}
56
-
57
- except Exception as e:
58
- return {"error": str(e)}
59
-
60
  def detect_text(text):
61
  results = {}
62
- # Transformers models
 
63
  for name, model_id in MODELS.items():
64
- results[name] = run_hf_model(model_id, text)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- # SzegedAI (Space)
67
- results["SzegedAI Detector"] = szegedai_predict(text)
 
 
 
 
 
 
 
 
 
 
68
 
69
- # Final verdict (simple rule-based)
70
- ai_probs = []
71
- for v in results.values():
72
- if "AI Probability" in v:
73
- ai_probs.append(v["AI Probability"])
74
- avg_ai = np.mean(ai_probs) if ai_probs else 0
75
- if avg_ai > 80:
76
- verdict = "Likely AI-generated"
77
- elif avg_ai > 40:
78
- verdict = "Possibly human-written with AI assistance"
79
- else:
80
- verdict = "Likely human-written"
81
- results["Final Score"] = {"Verdict": verdict}
82
  return results
83
 
 
84
  if __name__ == "__main__":
85
- sample = "This is a test sentence written by AI or human."
86
- print(detect_text(sample))
 
 
 
 
 
 
 
 
 
 
1
  import torch
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 
3
  import numpy as np
4
 
 
 
 
 
 
 
 
 
5
  # Multiple AI text detection models
6
+ MODELS = {
7
  "DeBERTa Detector": "distilbert-base-uncased-finetuned-sst-2-english",
8
  "MonkeyDAnh":"MonkeyDAnh/my-awesome-ai-detector-roberta-base-v4-human-vs-machine-finetune",
9
+ "Andreas122001":"andreas122001/roberta-academic-detector",
10
+ "roberta-mnli": "roberta-large-mnli"
11
  }
12
+
13
+ def load_model(model_id):
14
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
15
+ model = AutoModelForSequenceClassification.from_pretrained(model_id)
16
+ return tokenizer, model
17
 
18
+ def predict(text, tokenizer, model):
19
+ inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
20
+ with torch.no_grad():
21
+ outputs = model(**inputs)
22
+ probs = torch.nn.functional.softmax(outputs.logits, dim=-1)
23
+ return probs[0].numpy() # [human_prob, ai_prob]
 
 
 
 
 
 
 
 
 
 
 
 
24
 
 
25
 
26
+ def verdict(ai_prob):
27
+ """Return a human-readable verdict based on AI probability"""
28
+ if ai_prob < 20:
29
+ return "Most likely human-written."
30
+ elif 20 <= ai_prob < 40:
31
+ return "Possibly human-written with minimal AI assistance."
32
+ elif 40 <= ai_prob < 60:
33
+ return "Unclear – could be either human or AI-assisted."
34
+ elif 60 <= ai_prob < 80:
35
+ return "Possibly AI-generated, or a human using AI assistance."
36
+ else: # ai_prob >= 80
37
+ return "Likely AI-generated or heavily AI-assisted."
38
 
 
 
 
 
 
 
 
 
 
 
 
 
 
39
  def detect_text(text):
40
  results = {}
41
+ ai_scores = []
42
+
43
  for name, model_id in MODELS.items():
44
+ try:
45
+ tokenizer, model = load_model(model_id)
46
+ probs = predict(text, tokenizer, model)
47
+ human_score, ai_score = probs
48
+ results[name] = {
49
+ "Human Probability": round(float(human_score) * 100, 2),
50
+ "AI Probability": round(float(ai_score) * 100, 2),
51
+ }
52
+ ai_scores.append(ai_score)
53
+ except Exception as e:
54
+ results[name] = {"error": str(e)}
55
+
56
+ # ------------------ Final Score (Average) ------------------
57
+ try:
58
+ ai_scores, human_scores = [], []
59
+
60
+ for r in results.values():
61
+ if isinstance(r, dict) and "AI Probability" in r and "Human Probability" in r:
62
+ ai_scores.append(r["AI Probability"])
63
+ human_scores.append(r["Human Probability"])
64
 
65
+ if ai_scores and human_scores:
66
+ avg_ai = sum(ai_scores) / len(ai_scores)
67
+ avg_human = sum(human_scores) / len(human_scores)
68
+
69
+ results["Final Score"] = {
70
+ # "Human Probability (average)": float(round(avg_human, 2)),
71
+ # "AI Probability (average)": float(round(avg_ai, 2))
72
+ # "Verdict": verdict(avg_ai)
73
+ verdict(avg_ai)
74
+ }
75
+ except Exception as e:
76
+ results["Final Score"] = {"error": str(e)}
77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  return results
79
 
80
+
81
  if __name__ == "__main__":
82
+ text = input("Enter text to analyze:\n")
83
+ output = detect_text(text)
84
+ print("\n--- Detection Results ---")
85
+ for model, scores in output.items():
86
+ print(f"\n[{model}]")
87
+ for k, v in scores.items():
88
+ if isinstance(v, (int, float)): # only add % for numeric values
89
+ print(f"{k}: {v}%")
90
+ else:
91
+ print(f"{k}: {v}")