-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathapp.py
More file actions
90 lines (71 loc) · 3.32 KB
/
app.py
File metadata and controls
90 lines (71 loc) · 3.32 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import streamlit as st
from transformers import GPT2LMHeadModel, GPT2Tokenizer
from collections import OrderedDict
import re
import nltk
from nltk.tokenize import sent_tokenize
import joblib
import matplotlib.pyplot as plt
#activate virtual environment first
# for perplexity score
model_name="gpt2" #or any gpt variant
model=GPT2LMHeadModel.from_pretrained(model_name)
tokenizer=GPT2Tokenizer.from_pretrained(model_name)
def calculate_perplexity(line):
tokens = tokenizer.encode(line, return_tensors='pt')
perplexity=model.forward(tokens,labels=tokens)["loss"].exp().item()
return perplexity
def getResult(perplexity):
threshold1=60
threshold2=80
if perplexity<threshold1:
st.markdown("The Text is generated by AI with [perplexity](https://en.wikipedia.org/wiki/Perplexity#:~:text=In%20information%20theory%2C%20perplexity%20is,be%20drawn%20from%20the%20distribution.) score of {:.2f}".format(perplexity))
return
elif perplexity<threshold2:
"The Text is most probably contain parts which are generated by AI. (require more text for better Judgement)"
return
else:
st.markdown("The Text is written by Human with [perplexity](https://en.wikipedia.org/wiki/Perplexity#:~:text=In%20information%20theory%2C%20perplexity%20is,be%20drawn%20from%20the%20distribution.) score of {:.2f}".format(perplexity))
return
def predict(sentence):
result=OrderedDict()
lines = sent_tokenize(sentence)
Perplexity_per_line=[]
for line in lines:
ppl=calculate_perplexity(line)
Perplexity_per_line.append(ppl)
result["perplexity"]=sum(Perplexity_per_line)/len(Perplexity_per_line)
out=getResult(result["perplexity"])
#from trained model
ensemble_model=joblib.load(r'C:\Users\sumit\Desktop\projects\detectAItext\ensemble_model.joblib')
tfidf_vectorizer=joblib.load(r'C:\Users\sumit\Desktop\projects\detectAItext\tfidf_vectorizer.joblib')
def main():
nltk.download('punkt')
st.title("Detect AI generated Text")
user_input = st.text_area("Enter your text here:",height=300)
if st.button("Check"):
if user_input:
predict(user_input)
test_x=tfidf_vectorizer.transform([user_input])
result=ensemble_model.predict_proba(test_x)[:,1]
ai=result[0]*100
human=100-ai
labels = ['Human Generated', 'AI Generated',]
sizes = [human,ai]
fig, ax = plt.subplots()
wedges, texts, autotexts = ax.pie(sizes, labels=labels, autopct='%1.2f%%', startangle=90,
textprops=dict(color="w"))
# Set the background color to be transparent
fig.patch.set_facecolor('none')
ax.set_facecolor('none')
# Set the text color of the autopct labels to be white
for autotext in autotexts:
autotext.set_color('white')
# Display the pie chart using st.pyplot()
st.pyplot(fig)
# st.write(f"Probability (AI generated)= {ai:.2f} %")
# st.write(f"Probability (Human generated)= {human:.2f} %")
else:
st.warning("Please enter text before pressing the 'Check' button.")
if __name__ == "__main__":
main()