33from sklearn .decomposition import TruncatedSVD
44from utils import *
55import matplotlib .pyplot as plt
6+ import matplotlib
7+ matplotlib .use ('QtAgg' )
68
79class AI (object ):
8- def __init__ (self , OPENAI_API_KEY , model_type ):
10+ def __init__ (self , OPENAI_API_KEY , model_type , llm_model ):
911 # Initialize OpenAI client and model
10- self .client , self .model = initialize_openai_and_embedding (OPENAI_API_KEY , model_type )
11-
12+ self .client , self .model = initialize_openai_and_embedding (OPENAI_API_KEY , model_type , llm_model )
13+ self .llm_model = llm_model
14+
1215 def content_embedding (self , content ):
1316 return self .model .encode (content ).tolist ()
1417
1518 def clustering (self , allowed_type , txt_files , pdf_files , docx_files , num_clusters ):
1619 # Clustering logic
1720 contents = []
21+ C_path = []
1822 if allowed_type == "txt" or allowed_type == "all" :
1923 for file in txt_files :
2024 with open (file , 'r' , encoding = 'utf-8' ) as f :
2125 contents .append (f .read ())
26+ C_path .append (basename (file ))
2227
2328 if allowed_type == "pdf" or allowed_type == "all" :
2429 for file in pdf_files :
2530 try :
2631 pdf_text = read_pdf (file )
2732 contents .append (pdf_text )
33+ C_path .append (basename (file ))
2834 except Exception as e :
2935 write_log (f"Warning: Error reading PDF file: { e } " )
3036
@@ -33,6 +39,7 @@ def clustering(self, allowed_type, txt_files, pdf_files, docx_files, num_cluster
3339 try :
3440 docx_text = read_docx (file )
3541 contents .append (docx_text )
42+ C_path .append (basename (file ))
3643 except Exception as e :
3744 write_log (f"Warning: Error reading DOCX file: { e } " )
3845
@@ -51,12 +58,9 @@ def clustering(self, allowed_type, txt_files, pdf_files, docx_files, num_cluster
5158 for i in range (num_clusters ):
5259 plt .scatter (X_svd [y_kmeans == i , 0 ], X_svd [y_kmeans == i , 1 ], label = f'Cluster { i + 1 } ' )
5360
54- '''for i, txt_file in enumerate(txt_files + pdf_files):
55- plt.annotate(basename(txt_file), (X_svd[i, 0], X_svd[i, 1]))'''
56-
57- for i in range (len (X_svd )):
58- plt .annotate (basename (txt_files [i % len (txt_files )]), (X_svd [i , 0 ], X_svd [i , 1 ])) # Use modulo operator to loop over txt_files
59-
61+ for i in range (len (contents )):
62+ plt .annotate (C_path [i ], (X_svd [i , 0 ], X_svd [i , 1 ]))
63+
6064 plt .title ('Clustering of Text Files Content' )
6165 plt .xlabel ('Feature 1' )
6266 plt .ylabel ('Feature 2' )
@@ -94,18 +98,22 @@ def keyword_search(self, allowed_type, txt_files, pdf_files, docx_files, keyword
9498
9599 return results
96100
97- def chat_interaction (self , openaiclient , msg , selected_note_files , chat_history ):
101+ def chat_interaction (self , msg , selected_note_files , chat_history ):
98102 # Chat interaction logic
99103 notes_text = ""
100104 for file in selected_note_files :
101105 with open (file , 'r' , encoding = 'utf-8' ) as f :
102106 notes_text += f .read () + "\n "
103-
104- response = openaiclient .chat .completions .create (
105- model = "gpt-3.5-turbo" ,
106- messages = [
107- {"role" : "system" , "content" : "Answer the users question based on the notes.(answer only one sentence(15-20 words))" },
108- {"role" : "user" , "content" : f"Notes: \n { notes_text } " }] + chat_history + [{"role" : "user" , "content" : msg }], max_tokens = 41 )
109-
110- chatbot_response = response .choices [0 ].message .content .strip ()
111- return chatbot_response
107+ if self .llm_model in ["Tinyllama(Q5)" , "Llama2-7B(Q4)" ]:
108+ history_str = '\n ' .join ([f"{ c ['role' ]} : { c ['content' ]} " for c in chat_history ])
109+ prompt = f"""Answer the user question based on the following notes.(answer max one sentence(15-20 words)\n \n Notes: \n { notes_text } \n \n { history_str } \n user: { msg } \n assistant: """
110+ chatbot_response = self .client (prompt , temperature = 0.7 , max_new_tokens = 41 , stop = ['assistant:' ,'user:' ], threads = 4 )
111+ else :
112+ response = self .client .chat .completions .create (
113+ model = "gpt-3.5-turbo" ,
114+ messages = [
115+ {"role" : "system" , "content" : "Answer the user question based on the notes.(answer max one sentence(15-20 words)" },
116+ {"role" : "user" , "content" : f"Notes: \n { notes_text } " }] + chat_history + [{"role" : "user" , "content" : msg }], max_tokens = 41 )
117+
118+ chatbot_response = response .choices [0 ].message .content
119+ return chatbot_response .strip ()
0 commit comments