Skip to content

Commit 35a13ce

Browse files
author
Eric Liu
committed
add linter and formatter to CI
1 parent f0ed656 commit 35a13ce

File tree

13 files changed

+592
-281
lines changed

13 files changed

+592
-281
lines changed

.github/workflows/build.yaml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,27 @@ jobs:
1313
steps:
1414
- name: Install Docker Buildx
1515
uses: docker/setup-buildx-action@v2
16+
1617
- name: Checkout Repo
1718
uses: actions/checkout@v4
19+
20+
# Add Python and Ruff linting steps
21+
- name: Set up Python
22+
uses: actions/setup-python@v4
23+
with:
24+
python-version: '3.11'
25+
26+
- name: Install Ruff
27+
run: pip install ruff>=0.12.5
28+
29+
- name: Run Ruff linter
30+
working-directory: ./app
31+
run: ruff check .
32+
33+
- name: Run Ruff formatter check
34+
working-directory: ./app
35+
run: ruff format --check .
36+
1837
- name: Build Docker image
1938
working-directory: ./app
2039
run: |

.pre-commit-config.yaml

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
repos:
2+
- repo: https://github.com/astral-sh/ruff-pre-commit
3+
# Ruff version.
4+
rev: v0.9.10
5+
hooks:
6+
# Run the linter.
7+
- id: ruff
8+
types_or: [python, pyi]
9+
# Run the formatter.
10+
- id: ruff-format

Makefile

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
.PHONY: tests
2+
tests:
3+
PYTHONPATH=src pytest
4+
5+
.PHONY: lint
6+
lint: # Run pre-commit on staged/changed files
7+
pre-commit run
8+
9+
.PHONY: check
10+
check: # Run all pre-commit hooks on all files (useful for CI or full check)
11+
pre-commit run --all-files
12+
13+
.PHONY: format
14+
format: # Manually run ruff formatter on all files
15+
ruff format .
16+
17+
.PHONY: pre-commit-install
18+
pre-commit-install: # Install pre-commit hooks changes
19+
pip install ruff
20+
pip install pre-commit
21+
pre-commit install

app/app.py

Lines changed: 131 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,13 @@
1-
from flask import Flask, request, jsonify, render_template, Response, stream_with_context, session, send_from_directory
1+
from flask import (
2+
Flask,
3+
request,
4+
jsonify,
5+
render_template,
6+
Response,
7+
stream_with_context,
8+
session,
9+
send_from_directory,
10+
)
211
from flask_wtf.csrf import CSRFProtect
312
from rag_system import RAGSystem
413
import hashlib
@@ -7,110 +16,142 @@
716
import segment.analytics as analytics
817
import uuid
918

10-
from werkzeug.test import EnvironBuilder
11-
from werkzeug.wrappers import Request
12-
1319
import logging
1420
import redis
15-
from intercom import parse_html_to_text, set_conversation_human_replied, is_conversation_human_replied, answer_intercom_conversation, check_intercom_ip
21+
from intercom import (
22+
parse_html_to_text,
23+
set_conversation_human_replied,
24+
is_conversation_human_replied,
25+
answer_intercom_conversation,
26+
check_intercom_ip,
27+
)
1628
from utils import generate
1729

1830
# Configure logging
1931
logging.basicConfig(
20-
level=logging.DEBUG if os.getenv('DEBUG') else logging.INFO,
32+
level=logging.DEBUG if os.getenv("DEBUG") else logging.INFO,
2133
format="%(asctime)s %(levelname)s %(message)s",
22-
datefmt="%Y-%m-%d %H:%M:%S"
34+
datefmt="%Y-%m-%d %H:%M:%S",
2335
)
2436
logger = logging.getLogger(__name__)
2537

26-
analytics.write_key = os.getenv('SEGMENT_WRITE_KEY')
38+
analytics.write_key = os.getenv("SEGMENT_WRITE_KEY")
2739

28-
app = Flask(__name__, static_folder='templates/static')
29-
app.config['SECRET_KEY'] = os.getenv('SECRET_KEY')
30-
app.config['SESSION_COOKIE_HTTPONLY'] = True
31-
app.config['SESSION_COOKIE_SECURE'] = bool(os.getenv('SESSION_COOKIE_SECURE'))
40+
app = Flask(__name__, static_folder="templates/static")
41+
app.config["SECRET_KEY"] = os.getenv("SECRET_KEY")
42+
app.config["SESSION_COOKIE_HTTPONLY"] = True
43+
app.config["SESSION_COOKIE_SECURE"] = bool(os.getenv("SESSION_COOKIE_SECURE"))
3244

3345
app.rag_system = RAGSystem()
3446

3547
csrf = CSRFProtect(app)
3648

3749
# Initialize Redis connection
38-
r = redis.from_url(os.getenv('REDIS_URL'), decode_responses=True)
50+
r = redis.from_url(os.getenv("REDIS_URL"), decode_responses=True)
51+
3952

4053
def validate_pow(nonce, data, difficulty):
4154
# Calculate the sha256 of the concatenated string of 32-bit X-Nonce header and raw body.
4255
# This calculation has to match the code on the client side, in index.html.
43-
nonce_bytes = int(nonce).to_bytes(4, byteorder='little') # 32-bit = 4 bytes
56+
nonce_bytes = int(nonce).to_bytes(4, byteorder="little") # 32-bit = 4 bytes
4457
calculated_hash = hashlib.sha256(nonce_bytes + data).digest()
45-
first_uint32 = int.from_bytes(calculated_hash[:4], byteorder='big')
58+
first_uint32 = int.from_bytes(calculated_hash[:4], byteorder="big")
4659
return first_uint32 <= difficulty
4760

4861

49-
5062
def handle_ask_request(request, session):
5163
data = request.get_json()
52-
query = data.get('query')
64+
query = data.get("query")
5365

5466
if not query:
5567
return jsonify({"error": "No query provided"}), 400
5668

5769
# For analytics tracking, generates an anonymous id and uses it for the session
58-
if 'anonymous_id' not in session:
59-
session['anonymous_id'] = str(uuid.uuid4())
60-
anonymous_id = session['anonymous_id']
70+
if "anonymous_id" not in session:
71+
session["anonymous_id"] = str(uuid.uuid4())
72+
anonymous_id = session["anonymous_id"]
6173

6274
# Determine the source based on the user agent
63-
user_agent = request.headers.get('User-Agent', '')
64-
source = 'Ask Defang Discord Bot' if 'Discord Bot' in user_agent else 'Ask Defang Website'
75+
user_agent = request.headers.get("User-Agent", "")
76+
source = (
77+
"Ask Defang Discord Bot"
78+
if "Discord Bot" in user_agent
79+
else "Ask Defang Website"
80+
)
6581

6682
# Use the shared generate function directly
67-
return Response(stream_with_context(generate(app.rag_system, query, source, anonymous_id)), content_type='text/markdown')
83+
return Response(
84+
stream_with_context(generate(app.rag_system, query, source, anonymous_id)),
85+
content_type="text/markdown",
86+
)
87+
6888

69-
@app.route('/', methods=['GET', 'POST'])
89+
@app.route("/", methods=["GET", "POST"])
7090
def index():
71-
return render_template('index.html')
91+
return render_template("index.html")
7292

73-
@app.route('/ask', methods=['POST'])
93+
94+
@app.route("/ask", methods=["POST"])
7495
def ask():
75-
if not validate_pow(request.headers.get('X-Nonce'), request.get_data(), 0x50000):
96+
if not validate_pow(request.headers.get("X-Nonce"), request.get_data(), 0x50000):
7697
return jsonify({"error": "Invalid Proof of Work"}), 400
7798

7899
response = handle_ask_request(request, session)
79100
return response
80101

102+
81103
# /v1/ask allows bypassing of CSRF and PoW for clients with a valid Ask Token
82-
@app.route('/v1/ask', methods=['POST'])
104+
@app.route("/v1/ask", methods=["POST"])
83105
@csrf.exempt
84106
def v1_ask():
85-
auth_header = request.headers.get('Authorization')
86-
ask_token = auth_header.split("Bearer ")[1] if auth_header and auth_header.startswith("Bearer ") else None
87-
if ask_token and ask_token == os.getenv('ASK_TOKEN'):
107+
auth_header = request.headers.get("Authorization")
108+
ask_token = (
109+
auth_header.split("Bearer ")[1]
110+
if auth_header and auth_header.startswith("Bearer ")
111+
else None
112+
)
113+
if ask_token and ask_token == os.getenv("ASK_TOKEN"):
88114
response = handle_ask_request(request, session)
89115
return response
90116
else:
91117
return jsonify({"error": "Invalid or missing Ask Token"}), 401
92118

93-
@app.route('/trigger-rebuild', methods=['POST'])
119+
120+
@app.route("/trigger-rebuild", methods=["POST"])
94121
@csrf.exempt
95122
def trigger_rebuild():
96-
token = request.args.get('token')
97-
if token != os.getenv('REBUILD_TOKEN'):
123+
token = request.args.get("token")
124+
if token != os.getenv("REBUILD_TOKEN"):
98125
return jsonify({"error": "Unauthorized"}), 401
99126
try:
100127
print("Running get_knowledge_base.py script...")
101-
result = subprocess.run(["python3", "get_knowledge_base.py"], capture_output=True, text=True)
128+
result = subprocess.run(
129+
["python3", "get_knowledge_base.py"], capture_output=True, text=True
130+
)
102131
if result.returncode != 0:
103132
print(f"Error running get_knowledge_base.py script: {result.stderr}")
104-
return jsonify({"error": "Error running get_knowledge_base.py script", "details": result.stderr}), 500
133+
return jsonify(
134+
{
135+
"error": "Error running get_knowledge_base.py script",
136+
"details": result.stderr,
137+
}
138+
), 500
105139

106140
print("Finished running get_knowledge_base.py script.")
107141

108142
# get Dockerfiles and compose files from samples repo
109143
print("Running get_samples_examples.py script...")
110-
result = subprocess.run(["python3", "get_samples_examples.py"], capture_output=True, text=True)
144+
result = subprocess.run(
145+
["python3", "get_samples_examples.py"], capture_output=True, text=True
146+
)
111147
if result.returncode != 0:
112148
print(f"Error running get_samples_examples.py script: {result.stderr}")
113-
return jsonify({"error": "Error running get_samples_examples.py script", "details": result.stderr}), 500
149+
return jsonify(
150+
{
151+
"error": "Error running get_samples_examples.py script",
152+
"details": result.stderr,
153+
}
154+
), 500
114155

115156
print("Finished running get_samples_examples.py script.")
116157

@@ -119,7 +160,9 @@ def trigger_rebuild():
119160
app.rag_system.rebuild()
120161
except Exception as e:
121162
print(f"Error rebuilding embeddings: {str(e)}")
122-
return jsonify({"error": "Error rebuilding embeddings", "details": str(e)}), 500
163+
return jsonify(
164+
{"error": "Error rebuilding embeddings", "details": str(e)}
165+
), 500
123166

124167
print("Finished rebuilding embeddings.")
125168
return jsonify({"status": "Rebuild triggered successfully"}), 200
@@ -128,15 +171,15 @@ def trigger_rebuild():
128171
print(f"Error in /trigger-rebuild endpoint: {e}")
129172
return jsonify({"error": "Internal Server Error"}), 500
130173

174+
131175
@app.route("/data/<path:name>")
132176
@csrf.exempt
133177
def download_file(name):
134-
return send_from_directory(
135-
"data", name, as_attachment=True
136-
)
178+
return send_from_directory("data", name, as_attachment=True)
179+
137180

138181
# Handle incoming webhooks from Intercom
139-
@app.route('/intercom-webhook', methods=['POST'])
182+
@app.route("/intercom-webhook", methods=["POST"])
140183
@csrf.exempt
141184
def handle_webhook():
142185
if not check_intercom_ip(request):
@@ -145,53 +188,77 @@ def handle_webhook():
145188
data = request.json
146189

147190
logger.info(f"Received Intercom webhook: {data}")
148-
conversation_id = data.get('data', {}).get('item', {}).get('id')
191+
conversation_id = data.get("data", {}).get("item", {}).get("id")
149192

150193
# Check for the type of the webhook event
151-
topic = data.get('topic')
194+
topic = data.get("topic")
152195
logger.info(f"Webhook topic: {topic}")
153-
if topic == 'conversation.admin.replied':
196+
if topic == "conversation.admin.replied":
154197
# In this case, the webhook event is an admin reply
155198
# Check if the admin is a bot or human based on presence of a message marker (e.g., "🤖") in the last message
156-
last_message = data.get('data', {}).get('item', {}).get('conversation_parts', {}).get('conversation_parts', [])[-1].get('body', '')
199+
last_message = (
200+
data.get("data", {})
201+
.get("item", {})
202+
.get("conversation_parts", {})
203+
.get("conversation_parts", [])[-1]
204+
.get("body", "")
205+
)
157206
last_message_text = parse_html_to_text(last_message)
158207

159208
logger.info(f"Parsed last message text: {last_message_text}")
160209
if last_message_text and last_message_text.endswith("🤖"):
161210
# If the last message ends with the marker, it indicates a bot reply
162-
logger.info(f"Last message in conversation {conversation_id} ends with the marker 🤖")
163-
logger.info(f"Detected bot admin reply in conversation {conversation_id}; no action taken.")
211+
logger.info(
212+
f"Last message in conversation {conversation_id} ends with the marker 🤖"
213+
)
214+
logger.info(
215+
f"Detected bot admin reply in conversation {conversation_id}; no action taken."
216+
)
164217
else:
165218
# If the last message does not end with the marker, it indicates a human reply
166-
logger.info(f"Detected human admin reply in conversation {conversation_id}; marking as human admin-replied...")
219+
logger.info(
220+
f"Detected human admin reply in conversation {conversation_id}; marking as human admin-replied..."
221+
)
167222
# Mark the conversation as replied by a human admin to skip LLM responses in the future
168223
set_conversation_human_replied(conversation_id, r)
169-
logger.info(f"Successfully marked conversation {conversation_id} as human admin-replied.")
224+
logger.info(
225+
f"Successfully marked conversation {conversation_id} as human admin-replied."
226+
)
170227

171-
elif topic == 'conversation.user.replied' or topic == 'conversation.user.created':
228+
elif topic == "conversation.user.replied" or topic == "conversation.user.created":
172229
# In this case, the webhook event is a user reply or a new user conversation
173230
# Check if the conversation is of type email, and skip processing if so
174-
conversation_type = data.get('data', {}).get('item', {}).get('source', {}).get('type')
175-
if conversation_type == 'email':
176-
logger.info(f"Conversation {conversation_id} is of type email; no action taken.")
177-
return 'OK'
231+
conversation_type = (
232+
data.get("data", {}).get("item", {}).get("source", {}).get("type")
233+
)
234+
if conversation_type == "email":
235+
logger.info(
236+
f"Conversation {conversation_id} is of type email; no action taken."
237+
)
238+
return "OK"
178239

179240
# Check if it is a user reply and do the admin-replied checks if so
180241
# For new user conversations, we will skip admin-replied check to avoid false positives from Intercom auto-replies
181-
if topic == 'conversation.user.replied':
242+
if topic == "conversation.user.replied":
182243
# Check if the conversation was replied previously by a human admin and skip processing if so
183244
if is_conversation_human_replied(conversation_id, r):
184-
logger.info(f"Conversation {conversation_id} already marked as human admin-replied; no action taken.")
185-
return 'OK'
245+
logger.info(
246+
f"Conversation {conversation_id} already marked as human admin-replied; no action taken."
247+
)
248+
return "OK"
186249

187250
# Fetch the conversation and generate an LLM answer for the user
188-
logger.info(f"Detected a user reply in conversation {conversation_id}; fetching an answer from LLM...")
251+
logger.info(
252+
f"Detected a user reply in conversation {conversation_id}; fetching an answer from LLM..."
253+
)
189254
answer_intercom_conversation(app.rag_system, conversation_id, topic)
190255

191256
else:
192-
logger.info(f"Received webhook for unsupported topic: {topic}; no action taken.")
193-
return 'OK'
257+
logger.info(
258+
f"Received webhook for unsupported topic: {topic}; no action taken."
259+
)
260+
return "OK"
194261

195262

196-
if __name__ == '__main__':
197-
app.run(host='0.0.0.0', port=5050)
263+
if __name__ == "__main__":
264+
app.run(host="0.0.0.0", port=5050)

0 commit comments

Comments
 (0)