1- from flask import Flask , request , jsonify , render_template , Response , stream_with_context , session , send_from_directory
1+ from flask import (
2+ Flask ,
3+ request ,
4+ jsonify ,
5+ render_template ,
6+ Response ,
7+ stream_with_context ,
8+ session ,
9+ send_from_directory ,
10+ )
211from flask_wtf .csrf import CSRFProtect
312from rag_system import RAGSystem
413import hashlib
716import segment .analytics as analytics
817import uuid
918
10- from werkzeug .test import EnvironBuilder
11- from werkzeug .wrappers import Request
12-
1319import logging
1420import redis
15- from intercom import parse_html_to_text , set_conversation_human_replied , is_conversation_human_replied , answer_intercom_conversation , check_intercom_ip
21+ from intercom import (
22+ parse_html_to_text ,
23+ set_conversation_human_replied ,
24+ is_conversation_human_replied ,
25+ answer_intercom_conversation ,
26+ check_intercom_ip ,
27+ )
1628from utils import generate
1729
1830# Configure logging
1931logging .basicConfig (
20- level = logging .DEBUG if os .getenv (' DEBUG' ) else logging .INFO ,
32+ level = logging .DEBUG if os .getenv (" DEBUG" ) else logging .INFO ,
2133 format = "%(asctime)s %(levelname)s %(message)s" ,
22- datefmt = "%Y-%m-%d %H:%M:%S"
34+ datefmt = "%Y-%m-%d %H:%M:%S" ,
2335)
2436logger = logging .getLogger (__name__ )
2537
26- analytics .write_key = os .getenv (' SEGMENT_WRITE_KEY' )
38+ analytics .write_key = os .getenv (" SEGMENT_WRITE_KEY" )
2739
28- app = Flask (__name__ , static_folder = ' templates/static' )
29- app .config [' SECRET_KEY' ] = os .getenv (' SECRET_KEY' )
30- app .config [' SESSION_COOKIE_HTTPONLY' ] = True
31- app .config [' SESSION_COOKIE_SECURE' ] = bool (os .getenv (' SESSION_COOKIE_SECURE' ))
40+ app = Flask (__name__ , static_folder = " templates/static" )
41+ app .config [" SECRET_KEY" ] = os .getenv (" SECRET_KEY" )
42+ app .config [" SESSION_COOKIE_HTTPONLY" ] = True
43+ app .config [" SESSION_COOKIE_SECURE" ] = bool (os .getenv (" SESSION_COOKIE_SECURE" ))
3244
3345app .rag_system = RAGSystem ()
3446
3547csrf = CSRFProtect (app )
3648
3749# Initialize Redis connection
38- r = redis .from_url (os .getenv ('REDIS_URL' ), decode_responses = True )
50+ r = redis .from_url (os .getenv ("REDIS_URL" ), decode_responses = True )
51+
3952
4053def validate_pow (nonce , data , difficulty ):
4154 # Calculate the sha256 of the concatenated string of 32-bit X-Nonce header and raw body.
4255 # This calculation has to match the code on the client side, in index.html.
43- nonce_bytes = int (nonce ).to_bytes (4 , byteorder = ' little' ) # 32-bit = 4 bytes
56+ nonce_bytes = int (nonce ).to_bytes (4 , byteorder = " little" ) # 32-bit = 4 bytes
4457 calculated_hash = hashlib .sha256 (nonce_bytes + data ).digest ()
45- first_uint32 = int .from_bytes (calculated_hash [:4 ], byteorder = ' big' )
58+ first_uint32 = int .from_bytes (calculated_hash [:4 ], byteorder = " big" )
4659 return first_uint32 <= difficulty
4760
4861
49-
5062def handle_ask_request (request , session ):
5163 data = request .get_json ()
52- query = data .get (' query' )
64+ query = data .get (" query" )
5365
5466 if not query :
5567 return jsonify ({"error" : "No query provided" }), 400
5668
5769 # For analytics tracking, generates an anonymous id and uses it for the session
58- if ' anonymous_id' not in session :
59- session [' anonymous_id' ] = str (uuid .uuid4 ())
60- anonymous_id = session [' anonymous_id' ]
70+ if " anonymous_id" not in session :
71+ session [" anonymous_id" ] = str (uuid .uuid4 ())
72+ anonymous_id = session [" anonymous_id" ]
6173
6274 # Determine the source based on the user agent
63- user_agent = request .headers .get ('User-Agent' , '' )
64- source = 'Ask Defang Discord Bot' if 'Discord Bot' in user_agent else 'Ask Defang Website'
75+ user_agent = request .headers .get ("User-Agent" , "" )
76+ source = (
77+ "Ask Defang Discord Bot"
78+ if "Discord Bot" in user_agent
79+ else "Ask Defang Website"
80+ )
6581
6682 # Use the shared generate function directly
67- return Response (stream_with_context (generate (app .rag_system , query , source , anonymous_id )), content_type = 'text/markdown' )
83+ return Response (
84+ stream_with_context (generate (app .rag_system , query , source , anonymous_id )),
85+ content_type = "text/markdown" ,
86+ )
87+
6888
69- @app .route ('/' , methods = [' GET' , ' POST' ])
89+ @app .route ("/" , methods = [" GET" , " POST" ])
7090def index ():
71- return render_template (' index.html' )
91+ return render_template (" index.html" )
7292
73- @app .route ('/ask' , methods = ['POST' ])
93+
94+ @app .route ("/ask" , methods = ["POST" ])
7495def ask ():
75- if not validate_pow (request .headers .get (' X-Nonce' ), request .get_data (), 0x50000 ):
96+ if not validate_pow (request .headers .get (" X-Nonce" ), request .get_data (), 0x50000 ):
7697 return jsonify ({"error" : "Invalid Proof of Work" }), 400
7798
7899 response = handle_ask_request (request , session )
79100 return response
80101
102+
81103# /v1/ask allows bypassing of CSRF and PoW for clients with a valid Ask Token
82- @app .route (' /v1/ask' , methods = [' POST' ])
104+ @app .route (" /v1/ask" , methods = [" POST" ])
83105@csrf .exempt
84106def v1_ask ():
85- auth_header = request .headers .get ('Authorization' )
86- ask_token = auth_header .split ("Bearer " )[1 ] if auth_header and auth_header .startswith ("Bearer " ) else None
87- if ask_token and ask_token == os .getenv ('ASK_TOKEN' ):
107+ auth_header = request .headers .get ("Authorization" )
108+ ask_token = (
109+ auth_header .split ("Bearer " )[1 ]
110+ if auth_header and auth_header .startswith ("Bearer " )
111+ else None
112+ )
113+ if ask_token and ask_token == os .getenv ("ASK_TOKEN" ):
88114 response = handle_ask_request (request , session )
89115 return response
90116 else :
91117 return jsonify ({"error" : "Invalid or missing Ask Token" }), 401
92118
93- @app .route ('/trigger-rebuild' , methods = ['POST' ])
119+
120+ @app .route ("/trigger-rebuild" , methods = ["POST" ])
94121@csrf .exempt
95122def trigger_rebuild ():
96- token = request .args .get (' token' )
97- if token != os .getenv (' REBUILD_TOKEN' ):
123+ token = request .args .get (" token" )
124+ if token != os .getenv (" REBUILD_TOKEN" ):
98125 return jsonify ({"error" : "Unauthorized" }), 401
99126 try :
100127 print ("Running get_knowledge_base.py script..." )
101- result = subprocess .run (["python3" , "get_knowledge_base.py" ], capture_output = True , text = True )
128+ result = subprocess .run (
129+ ["python3" , "get_knowledge_base.py" ], capture_output = True , text = True
130+ )
102131 if result .returncode != 0 :
103132 print (f"Error running get_knowledge_base.py script: { result .stderr } " )
104- return jsonify ({"error" : "Error running get_knowledge_base.py script" , "details" : result .stderr }), 500
133+ return jsonify (
134+ {
135+ "error" : "Error running get_knowledge_base.py script" ,
136+ "details" : result .stderr ,
137+ }
138+ ), 500
105139
106140 print ("Finished running get_knowledge_base.py script." )
107141
108142 # get Dockerfiles and compose files from samples repo
109143 print ("Running get_samples_examples.py script..." )
110- result = subprocess .run (["python3" , "get_samples_examples.py" ], capture_output = True , text = True )
144+ result = subprocess .run (
145+ ["python3" , "get_samples_examples.py" ], capture_output = True , text = True
146+ )
111147 if result .returncode != 0 :
112148 print (f"Error running get_samples_examples.py script: { result .stderr } " )
113- return jsonify ({"error" : "Error running get_samples_examples.py script" , "details" : result .stderr }), 500
149+ return jsonify (
150+ {
151+ "error" : "Error running get_samples_examples.py script" ,
152+ "details" : result .stderr ,
153+ }
154+ ), 500
114155
115156 print ("Finished running get_samples_examples.py script." )
116157
117158 print ("Rebuilding embeddings..." )
118159 try :
119160 app .rag_system .rebuild ()
120161 except Exception as e :
121- print (f"Error rebuilding embeddings: { str (e )} " )
122- return jsonify ({"error" : "Error rebuilding embeddings" , "details" : str ( e ) }), 500
162+ logging . error (f"Error rebuilding embeddings: { str (e )} " )
163+ return jsonify ({"error" : "Error rebuilding embeddings" }), 500
123164
124- print ("Finished rebuilding embeddings." )
165+ logging . info ("Finished rebuilding embeddings." )
125166 return jsonify ({"status" : "Rebuild triggered successfully" }), 200
126167
127168 except Exception as e :
128169 print (f"Error in /trigger-rebuild endpoint: { e } " )
129170 return jsonify ({"error" : "Internal Server Error" }), 500
130171
172+
131173@app .route ("/data/<path:name>" )
132174@csrf .exempt
133175def download_file (name ):
134- return send_from_directory (
135- "data" , name , as_attachment = True
136- )
176+ return send_from_directory ("data" , name , as_attachment = True )
177+
137178
138179# Handle incoming webhooks from Intercom
139- @app .route (' /intercom-webhook' , methods = [' POST' ])
180+ @app .route (" /intercom-webhook" , methods = [" POST" ])
140181@csrf .exempt
141182def handle_webhook ():
142183 if not check_intercom_ip (request ):
@@ -145,53 +186,77 @@ def handle_webhook():
145186 data = request .json
146187
147188 logger .info (f"Received Intercom webhook: { data } " )
148- conversation_id = data .get (' data' , {}).get (' item' , {}).get ('id' )
189+ conversation_id = data .get (" data" , {}).get (" item" , {}).get ("id" )
149190
150191 # Check for the type of the webhook event
151- topic = data .get (' topic' )
192+ topic = data .get (" topic" )
152193 logger .info (f"Webhook topic: { topic } " )
153- if topic == ' conversation.admin.replied' :
194+ if topic == " conversation.admin.replied" :
154195 # In this case, the webhook event is an admin reply
155196 # Check if the admin is a bot or human based on presence of a message marker (e.g., "🤖") in the last message
156- last_message = data .get ('data' , {}).get ('item' , {}).get ('conversation_parts' , {}).get ('conversation_parts' , [])[- 1 ].get ('body' , '' )
197+ last_message = (
198+ data .get ("data" , {})
199+ .get ("item" , {})
200+ .get ("conversation_parts" , {})
201+ .get ("conversation_parts" , [])[- 1 ]
202+ .get ("body" , "" )
203+ )
157204 last_message_text = parse_html_to_text (last_message )
158205
159206 logger .info (f"Parsed last message text: { last_message_text } " )
160207 if last_message_text and last_message_text .endswith ("🤖" ):
161208 # If the last message ends with the marker, it indicates a bot reply
162- logger .info (f"Last message in conversation { conversation_id } ends with the marker 🤖" )
163- logger .info (f"Detected bot admin reply in conversation { conversation_id } ; no action taken." )
209+ logger .info (
210+ f"Last message in conversation { conversation_id } ends with the marker 🤖"
211+ )
212+ logger .info (
213+ f"Detected bot admin reply in conversation { conversation_id } ; no action taken."
214+ )
164215 else :
165216 # If the last message does not end with the marker, it indicates a human reply
166- logger .info (f"Detected human admin reply in conversation { conversation_id } ; marking as human admin-replied..." )
217+ logger .info (
218+ f"Detected human admin reply in conversation { conversation_id } ; marking as human admin-replied..."
219+ )
167220 # Mark the conversation as replied by a human admin to skip LLM responses in the future
168221 set_conversation_human_replied (conversation_id , r )
169- logger .info (f"Successfully marked conversation { conversation_id } as human admin-replied." )
222+ logger .info (
223+ f"Successfully marked conversation { conversation_id } as human admin-replied."
224+ )
170225
171- elif topic == ' conversation.user.replied' or topic == ' conversation.user.created' :
226+ elif topic == " conversation.user.replied" or topic == " conversation.user.created" :
172227 # In this case, the webhook event is a user reply or a new user conversation
173228 # Check if the conversation is of type email, and skip processing if so
174- conversation_type = data .get ('data' , {}).get ('item' , {}).get ('source' , {}).get ('type' )
175- if conversation_type == 'email' :
176- logger .info (f"Conversation { conversation_id } is of type email; no action taken." )
177- return 'OK'
229+ conversation_type = (
230+ data .get ("data" , {}).get ("item" , {}).get ("source" , {}).get ("type" )
231+ )
232+ if conversation_type == "email" :
233+ logger .info (
234+ f"Conversation { conversation_id } is of type email; no action taken."
235+ )
236+ return "OK"
178237
179238 # Check if it is a user reply and do the admin-replied checks if so
180239 # For new user conversations, we will skip admin-replied check to avoid false positives from Intercom auto-replies
181- if topic == ' conversation.user.replied' :
240+ if topic == " conversation.user.replied" :
182241 # Check if the conversation was replied previously by a human admin and skip processing if so
183242 if is_conversation_human_replied (conversation_id , r ):
184- logger .info (f"Conversation { conversation_id } already marked as human admin-replied; no action taken." )
185- return 'OK'
243+ logger .info (
244+ f"Conversation { conversation_id } already marked as human admin-replied; no action taken."
245+ )
246+ return "OK"
186247
187248 # Fetch the conversation and generate an LLM answer for the user
188- logger .info (f"Detected a user reply in conversation { conversation_id } ; fetching an answer from LLM..." )
249+ logger .info (
250+ f"Detected a user reply in conversation { conversation_id } ; fetching an answer from LLM..."
251+ )
189252 answer_intercom_conversation (app .rag_system , conversation_id , topic )
190253
191254 else :
192- logger .info (f"Received webhook for unsupported topic: { topic } ; no action taken." )
193- return 'OK'
255+ logger .info (
256+ f"Received webhook for unsupported topic: { topic } ; no action taken."
257+ )
258+ return "OK"
194259
195260
196- if __name__ == ' __main__' :
197- app .run (host = ' 0.0.0.0' , port = 5050 )
261+ if __name__ == " __main__" :
262+ app .run (host = " 0.0.0.0" , port = 5050 )
0 commit comments