Skip to content

Commit 4af0f15

Browse files
Merge pull request #90 from DefangLabs/eric/add-linter
add linter and formatter to CI
2 parents acc4c71 + 5197232 commit 4af0f15

File tree

13 files changed

+588
-281
lines changed

13 files changed

+588
-281
lines changed

.github/workflows/build.yaml

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,21 @@ jobs:
1313
steps:
1414
- name: Install Docker Buildx
1515
uses: docker/setup-buildx-action@v2
16+
1617
- name: Checkout Repo
1718
uses: actions/checkout@v4
19+
20+
# Ruff linting steps
21+
- name: Run Ruff linter
22+
uses: astral-sh/ruff-action@v1
23+
with:
24+
args: check ./app
25+
26+
- name: Run Ruff formatter check
27+
uses: astral-sh/ruff-action@v1
28+
with:
29+
args: "format --check"
30+
1831
- name: Build Docker image
1932
working-directory: ./app
2033
run: |

.pre-commit-config.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
repos:
2+
- repo: https://github.com/astral-sh/ruff-pre-commit
3+
# Ruff version.
4+
rev: v0.9.10
5+
hooks:
6+
# Run the linter.
7+
- id: ruff
8+
types_or: [python, pyi]
9+
# Run the formatter.
10+
- id: ruff-format

Makefile

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
.PHONY: tests
2+
tests:
3+
PYTHONPATH=src pytest
4+
5+
.PHONY: lint
6+
lint: # Run pre-commit on staged/changed files
7+
pre-commit run
8+
9+
.PHONY: check
10+
check: # Run all pre-commit hooks on all files (useful for CI or full check)
11+
pre-commit run --all-files
12+
13+
.PHONY: format
14+
format: # Manually run ruff formatter on all files
15+
ruff format .
16+
17+
.PHONY: pre-commit-install
18+
pre-commit-install: # Install pre-commit hooks changes
19+
pip install ruff
20+
pip install pre-commit
21+
pre-commit install

app/app.py

Lines changed: 131 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
1-
from flask import Flask, request, jsonify, render_template, Response, stream_with_context, session, send_from_directory
1+
from flask import (
2+
Flask,
3+
request,
4+
jsonify,
5+
render_template,
6+
Response,
7+
stream_with_context,
8+
session,
9+
send_from_directory,
10+
)
211
from flask_wtf.csrf import CSRFProtect
312
from rag_system import RAGSystem
413
import hashlib
@@ -7,136 +16,168 @@
716
import segment.analytics as analytics
817
import uuid
918

10-
from werkzeug.test import EnvironBuilder
11-
from werkzeug.wrappers import Request
12-
1319
import logging
1420
import redis
15-
from intercom import parse_html_to_text, set_conversation_human_replied, is_conversation_human_replied, answer_intercom_conversation, check_intercom_ip
21+
from intercom import (
22+
parse_html_to_text,
23+
set_conversation_human_replied,
24+
is_conversation_human_replied,
25+
answer_intercom_conversation,
26+
check_intercom_ip,
27+
)
1628
from utils import generate
1729

1830
# Configure logging
1931
logging.basicConfig(
20-
level=logging.DEBUG if os.getenv('DEBUG') else logging.INFO,
32+
level=logging.DEBUG if os.getenv("DEBUG") else logging.INFO,
2133
format="%(asctime)s %(levelname)s %(message)s",
22-
datefmt="%Y-%m-%d %H:%M:%S"
34+
datefmt="%Y-%m-%d %H:%M:%S",
2335
)
2436
logger = logging.getLogger(__name__)
2537

26-
analytics.write_key = os.getenv('SEGMENT_WRITE_KEY')
38+
analytics.write_key = os.getenv("SEGMENT_WRITE_KEY")
2739

28-
app = Flask(__name__, static_folder='templates/static')
29-
app.config['SECRET_KEY'] = os.getenv('SECRET_KEY')
30-
app.config['SESSION_COOKIE_HTTPONLY'] = True
31-
app.config['SESSION_COOKIE_SECURE'] = bool(os.getenv('SESSION_COOKIE_SECURE'))
40+
app = Flask(__name__, static_folder="templates/static")
41+
app.config["SECRET_KEY"] = os.getenv("SECRET_KEY")
42+
app.config["SESSION_COOKIE_HTTPONLY"] = True
43+
app.config["SESSION_COOKIE_SECURE"] = bool(os.getenv("SESSION_COOKIE_SECURE"))
3244

3345
app.rag_system = RAGSystem()
3446

3547
csrf = CSRFProtect(app)
3648

3749
# Initialize Redis connection
38-
r = redis.from_url(os.getenv('REDIS_URL'), decode_responses=True)
50+
r = redis.from_url(os.getenv("REDIS_URL"), decode_responses=True)
51+
3952

4053
def validate_pow(nonce, data, difficulty):
4154
# Calculate the sha256 of the concatenated string of 32-bit X-Nonce header and raw body.
4255
# This calculation has to match the code on the client side, in index.html.
43-
nonce_bytes = int(nonce).to_bytes(4, byteorder='little') # 32-bit = 4 bytes
56+
nonce_bytes = int(nonce).to_bytes(4, byteorder="little") # 32-bit = 4 bytes
4457
calculated_hash = hashlib.sha256(nonce_bytes + data).digest()
45-
first_uint32 = int.from_bytes(calculated_hash[:4], byteorder='big')
58+
first_uint32 = int.from_bytes(calculated_hash[:4], byteorder="big")
4659
return first_uint32 <= difficulty
4760

4861

49-
5062
def handle_ask_request(request, session):
5163
data = request.get_json()
52-
query = data.get('query')
64+
query = data.get("query")
5365

5466
if not query:
5567
return jsonify({"error": "No query provided"}), 400
5668

5769
# For analytics tracking, generates an anonymous id and uses it for the session
58-
if 'anonymous_id' not in session:
59-
session['anonymous_id'] = str(uuid.uuid4())
60-
anonymous_id = session['anonymous_id']
70+
if "anonymous_id" not in session:
71+
session["anonymous_id"] = str(uuid.uuid4())
72+
anonymous_id = session["anonymous_id"]
6173

6274
# Determine the source based on the user agent
63-
user_agent = request.headers.get('User-Agent', '')
64-
source = 'Ask Defang Discord Bot' if 'Discord Bot' in user_agent else 'Ask Defang Website'
75+
user_agent = request.headers.get("User-Agent", "")
76+
source = (
77+
"Ask Defang Discord Bot"
78+
if "Discord Bot" in user_agent
79+
else "Ask Defang Website"
80+
)
6581

6682
# Use the shared generate function directly
67-
return Response(stream_with_context(generate(app.rag_system, query, source, anonymous_id)), content_type='text/markdown')
83+
return Response(
84+
stream_with_context(generate(app.rag_system, query, source, anonymous_id)),
85+
content_type="text/markdown",
86+
)
87+
6888

69-
@app.route('/', methods=['GET', 'POST'])
89+
@app.route("/", methods=["GET", "POST"])
7090
def index():
71-
return render_template('index.html')
91+
return render_template("index.html")
7292

73-
@app.route('/ask', methods=['POST'])
93+
94+
@app.route("/ask", methods=["POST"])
7495
def ask():
75-
if not validate_pow(request.headers.get('X-Nonce'), request.get_data(), 0x50000):
96+
if not validate_pow(request.headers.get("X-Nonce"), request.get_data(), 0x50000):
7697
return jsonify({"error": "Invalid Proof of Work"}), 400
7798

7899
response = handle_ask_request(request, session)
79100
return response
80101

102+
81103
# /v1/ask allows bypassing of CSRF and PoW for clients with a valid Ask Token
82-
@app.route('/v1/ask', methods=['POST'])
104+
@app.route("/v1/ask", methods=["POST"])
83105
@csrf.exempt
84106
def v1_ask():
85-
auth_header = request.headers.get('Authorization')
86-
ask_token = auth_header.split("Bearer ")[1] if auth_header and auth_header.startswith("Bearer ") else None
87-
if ask_token and ask_token == os.getenv('ASK_TOKEN'):
107+
auth_header = request.headers.get("Authorization")
108+
ask_token = (
109+
auth_header.split("Bearer ")[1]
110+
if auth_header and auth_header.startswith("Bearer ")
111+
else None
112+
)
113+
if ask_token and ask_token == os.getenv("ASK_TOKEN"):
88114
response = handle_ask_request(request, session)
89115
return response
90116
else:
91117
return jsonify({"error": "Invalid or missing Ask Token"}), 401
92118

93-
@app.route('/trigger-rebuild', methods=['POST'])
119+
120+
@app.route("/trigger-rebuild", methods=["POST"])
94121
@csrf.exempt
95122
def trigger_rebuild():
96-
token = request.args.get('token')
97-
if token != os.getenv('REBUILD_TOKEN'):
123+
token = request.args.get("token")
124+
if token != os.getenv("REBUILD_TOKEN"):
98125
return jsonify({"error": "Unauthorized"}), 401
99126
try:
100127
print("Running get_knowledge_base.py script...")
101-
result = subprocess.run(["python3", "get_knowledge_base.py"], capture_output=True, text=True)
128+
result = subprocess.run(
129+
["python3", "get_knowledge_base.py"], capture_output=True, text=True
130+
)
102131
if result.returncode != 0:
103132
print(f"Error running get_knowledge_base.py script: {result.stderr}")
104-
return jsonify({"error": "Error running get_knowledge_base.py script", "details": result.stderr}), 500
133+
return jsonify(
134+
{
135+
"error": "Error running get_knowledge_base.py script",
136+
"details": result.stderr,
137+
}
138+
), 500
105139

106140
print("Finished running get_knowledge_base.py script.")
107141

108142
# get Dockerfiles and compose files from samples repo
109143
print("Running get_samples_examples.py script...")
110-
result = subprocess.run(["python3", "get_samples_examples.py"], capture_output=True, text=True)
144+
result = subprocess.run(
145+
["python3", "get_samples_examples.py"], capture_output=True, text=True
146+
)
111147
if result.returncode != 0:
112148
print(f"Error running get_samples_examples.py script: {result.stderr}")
113-
return jsonify({"error": "Error running get_samples_examples.py script", "details": result.stderr}), 500
149+
return jsonify(
150+
{
151+
"error": "Error running get_samples_examples.py script",
152+
"details": result.stderr,
153+
}
154+
), 500
114155

115156
print("Finished running get_samples_examples.py script.")
116157

117158
print("Rebuilding embeddings...")
118159
try:
119160
app.rag_system.rebuild()
120161
except Exception as e:
121-
print(f"Error rebuilding embeddings: {str(e)}")
122-
return jsonify({"error": "Error rebuilding embeddings", "details": str(e)}), 500
162+
logging.error(f"Error rebuilding embeddings: {str(e)}")
163+
return jsonify({"error": "Error rebuilding embeddings"}), 500
123164

124-
print("Finished rebuilding embeddings.")
165+
logging.info("Finished rebuilding embeddings.")
125166
return jsonify({"status": "Rebuild triggered successfully"}), 200
126167

127168
except Exception as e:
128169
print(f"Error in /trigger-rebuild endpoint: {e}")
129170
return jsonify({"error": "Internal Server Error"}), 500
130171

172+
131173
@app.route("/data/<path:name>")
132174
@csrf.exempt
133175
def download_file(name):
134-
return send_from_directory(
135-
"data", name, as_attachment=True
136-
)
176+
return send_from_directory("data", name, as_attachment=True)
177+
137178

138179
# Handle incoming webhooks from Intercom
139-
@app.route('/intercom-webhook', methods=['POST'])
180+
@app.route("/intercom-webhook", methods=["POST"])
140181
@csrf.exempt
141182
def handle_webhook():
142183
if not check_intercom_ip(request):
@@ -145,53 +186,77 @@ def handle_webhook():
145186
data = request.json
146187

147188
logger.info(f"Received Intercom webhook: {data}")
148-
conversation_id = data.get('data', {}).get('item', {}).get('id')
189+
conversation_id = data.get("data", {}).get("item", {}).get("id")
149190

150191
# Check for the type of the webhook event
151-
topic = data.get('topic')
192+
topic = data.get("topic")
152193
logger.info(f"Webhook topic: {topic}")
153-
if topic == 'conversation.admin.replied':
194+
if topic == "conversation.admin.replied":
154195
# In this case, the webhook event is an admin reply
155196
# Check if the admin is a bot or human based on presence of a message marker (e.g., "🤖") in the last message
156-
last_message = data.get('data', {}).get('item', {}).get('conversation_parts', {}).get('conversation_parts', [])[-1].get('body', '')
197+
last_message = (
198+
data.get("data", {})
199+
.get("item", {})
200+
.get("conversation_parts", {})
201+
.get("conversation_parts", [])[-1]
202+
.get("body", "")
203+
)
157204
last_message_text = parse_html_to_text(last_message)
158205

159206
logger.info(f"Parsed last message text: {last_message_text}")
160207
if last_message_text and last_message_text.endswith("🤖"):
161208
# If the last message ends with the marker, it indicates a bot reply
162-
logger.info(f"Last message in conversation {conversation_id} ends with the marker 🤖")
163-
logger.info(f"Detected bot admin reply in conversation {conversation_id}; no action taken.")
209+
logger.info(
210+
f"Last message in conversation {conversation_id} ends with the marker 🤖"
211+
)
212+
logger.info(
213+
f"Detected bot admin reply in conversation {conversation_id}; no action taken."
214+
)
164215
else:
165216
# If the last message does not end with the marker, it indicates a human reply
166-
logger.info(f"Detected human admin reply in conversation {conversation_id}; marking as human admin-replied...")
217+
logger.info(
218+
f"Detected human admin reply in conversation {conversation_id}; marking as human admin-replied..."
219+
)
167220
# Mark the conversation as replied by a human admin to skip LLM responses in the future
168221
set_conversation_human_replied(conversation_id, r)
169-
logger.info(f"Successfully marked conversation {conversation_id} as human admin-replied.")
222+
logger.info(
223+
f"Successfully marked conversation {conversation_id} as human admin-replied."
224+
)
170225

171-
elif topic == 'conversation.user.replied' or topic == 'conversation.user.created':
226+
elif topic == "conversation.user.replied" or topic == "conversation.user.created":
172227
# In this case, the webhook event is a user reply or a new user conversation
173228
# Check if the conversation is of type email, and skip processing if so
174-
conversation_type = data.get('data', {}).get('item', {}).get('source', {}).get('type')
175-
if conversation_type == 'email':
176-
logger.info(f"Conversation {conversation_id} is of type email; no action taken.")
177-
return 'OK'
229+
conversation_type = (
230+
data.get("data", {}).get("item", {}).get("source", {}).get("type")
231+
)
232+
if conversation_type == "email":
233+
logger.info(
234+
f"Conversation {conversation_id} is of type email; no action taken."
235+
)
236+
return "OK"
178237

179238
# Check if it is a user reply and do the admin-replied checks if so
180239
# For new user conversations, we will skip admin-replied check to avoid false positives from Intercom auto-replies
181-
if topic == 'conversation.user.replied':
240+
if topic == "conversation.user.replied":
182241
# Check if the conversation was replied previously by a human admin and skip processing if so
183242
if is_conversation_human_replied(conversation_id, r):
184-
logger.info(f"Conversation {conversation_id} already marked as human admin-replied; no action taken.")
185-
return 'OK'
243+
logger.info(
244+
f"Conversation {conversation_id} already marked as human admin-replied; no action taken."
245+
)
246+
return "OK"
186247

187248
# Fetch the conversation and generate an LLM answer for the user
188-
logger.info(f"Detected a user reply in conversation {conversation_id}; fetching an answer from LLM...")
249+
logger.info(
250+
f"Detected a user reply in conversation {conversation_id}; fetching an answer from LLM..."
251+
)
189252
answer_intercom_conversation(app.rag_system, conversation_id, topic)
190253

191254
else:
192-
logger.info(f"Received webhook for unsupported topic: {topic}; no action taken.")
193-
return 'OK'
255+
logger.info(
256+
f"Received webhook for unsupported topic: {topic}; no action taken."
257+
)
258+
return "OK"
194259

195260

196-
if __name__ == '__main__':
197-
app.run(host='0.0.0.0', port=5050)
261+
if __name__ == "__main__":
262+
app.run(host="0.0.0.0", port=5050)

0 commit comments

Comments
 (0)