Skip to content

Commit da8ceea

Browse files
authored
Add files via upload
1 parent a7a4f5b commit da8ceea

16 files changed

+15459
-0
lines changed

stack_overflow_security_questions_analysis/IoT-Security-Dataset.csv

Lines changed: 13404 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
import streamlit as st
2+
import pandas as pd
3+
import joblib
4+
import re
5+
from sklearn.feature_extraction.text import TfidfVectorizer
6+
7+
# Load the dataset
8+
df = pd.read_csv('IoT-Security-Dataset.csv')
9+
10+
# Load the saved Random Forest model
11+
rf_model_loaded = joblib.load('random_forest_model.pkl')
12+
13+
# Load and fit the TF-IDF vectorizer on the dataset
14+
tfidf_vectorizer = TfidfVectorizer(max_features=5000)
15+
tfidf_vectorizer.fit(df['Cleaned Sentence'])
16+
17+
# Function to preprocess the input text
18+
def preprocess_text(text):
19+
text = text.lower()
20+
text = re.sub(r'\W', ' ', text)
21+
text = re.sub(r'\d', ' ', text)
22+
text = re.sub(r'\s+[a-z]\s+', ' ', text)
23+
text = re.sub(r'\s+', ' ', text).strip()
24+
return text
25+
26+
# Function to predict if a question is security-related
27+
def predict_security(question, model, vectorizer):
28+
clean_question = preprocess_text(question)
29+
question_tfidf = vectorizer.transform([clean_question])
30+
prediction = model.predict(question_tfidf)
31+
return prediction[0]
32+
33+
# Streamlit app
34+
st.title("Security text Predictor")
35+
36+
st.write("Enter your question below to determine if it is related to security.")
37+
38+
user_question = st.text_area("Your Question")
39+
40+
if st.button("Predict"):
41+
if user_question.strip() != "":
42+
prediction = predict_security(user_question, rf_model_loaded, tfidf_vectorizer)
43+
if prediction == 0:
44+
st.success("This question is security-related.")
45+
else:
46+
st.info("This question is not security-related.")
47+
else:
48+
st.error("Please enter a question.")
33 KB
Loading
30.1 KB
Loading
20.5 KB
Loading
20.8 KB
Loading
22 KB
Loading
21.5 KB
Loading
53.9 KB
Loading
18.7 KB
Loading

0 commit comments

Comments
 (0)