Skip to content

Commit c42f4cb

Browse files
authored
Merge pull request #58 from DefangLabs/linda-intercom
Add Intercom Integration
2 parents a2acc71 + 4859241 commit c42f4cb

File tree

10 files changed

+532
-33
lines changed

10 files changed

+532
-33
lines changed

.github/workflows/deploy.yaml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ jobs:
5252
- name: Deploy
5353
uses: DefangLabs/[email protected]
5454
with:
55-
config-env-vars: ASK_TOKEN OPENAI_API_KEY REBUILD_TOKEN SECRET_KEY SEGMENT_WRITE_KEY DISCORD_APP_ID DISCORD_TOKEN DISCORD_PUBLIC_KEY
55+
config-env-vars: ASK_TOKEN OPENAI_API_KEY REBUILD_TOKEN SECRET_KEY SEGMENT_WRITE_KEY DISCORD_APP_ID DISCORD_TOKEN DISCORD_PUBLIC_KEY INTERCOM_TOKEN INTERCOM_ADMIN_ID
5656
mode: staging
5757
provider: aws
5858

@@ -65,3 +65,5 @@ jobs:
6565
DISCORD_APP_ID: ${{ secrets.DISCORD_APP_ID }}
6666
DISCORD_TOKEN: ${{ secrets.DISCORD_TOKEN }}
6767
DISCORD_PUBLIC_KEY: ${{ secrets.DISCORD_PUBLIC_KEY }}
68+
INTERCOM_TOKEN: ${{ secrets.INTERCOM_TOKEN }}
69+
INTERCOM_ADMIN_ID: ${{ secrets.INTERCOM_ADMIN_ID }}

app/Dockerfile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@ RUN chmod +x .tmp/prebuild.sh
4242
# Expose port 5050 for the Flask application
4343
EXPOSE 5050
4444

45+
# Run test file
46+
RUN python test_intercom.py
47+
4548
# Define environment variable for Flask
4649
ENV FLASK_APP=app.py
4750

app/app.py

Lines changed: 70 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,18 @@
66
import os
77
import segment.analytics as analytics
88
import uuid
9-
import sys
10-
import traceback
9+
10+
from werkzeug.test import EnvironBuilder
11+
from werkzeug.wrappers import Request
12+
13+
import logging
14+
import redis
15+
from intercom import parse_html_to_text, set_conversation_human_replied, is_conversation_human_replied, answer_intercom_conversation, check_intercom_ip
16+
from utils import generate
17+
18+
# Configure logging
19+
logging.basicConfig(level=logging.INFO)
20+
logger = logging.getLogger(__name__)
1121

1222
analytics.write_key = os.getenv('SEGMENT_WRITE_KEY')
1323

@@ -18,6 +28,9 @@
1828

1929
csrf = CSRFProtect(app)
2030

31+
# Initialize Redis connection
32+
r = redis.from_url(os.getenv('REDIS_URL'), decode_responses=True)
33+
2134
def validate_pow(nonce, data, difficulty):
2235
# Calculate the sha256 of the concatenated string of 32-bit X-Nonce header and raw body.
2336
# This calculation has to match the code on the client side, in index.html.
@@ -26,6 +39,8 @@ def validate_pow(nonce, data, difficulty):
2639
first_uint32 = int.from_bytes(calculated_hash[:4], byteorder='big')
2740
return first_uint32 <= difficulty
2841

42+
43+
2944
def handle_ask_request(request, session):
3045
data = request.get_json()
3146
query = data.get('query')
@@ -37,33 +52,13 @@ def handle_ask_request(request, session):
3752
if 'anonymous_id' not in session:
3853
session['anonymous_id'] = str(uuid.uuid4())
3954
anonymous_id = session['anonymous_id']
55+
56+
# Determine the source based on the user agent
57+
user_agent = request.headers.get('User-Agent', '')
58+
source = 'Ask Defang Discord Bot' if 'Discord Bot' in user_agent else 'Ask Defang Website'
4059

41-
def generate():
42-
full_response = ""
43-
try:
44-
for token in rag_system.answer_query_stream(query):
45-
yield token
46-
full_response += token
47-
except Exception as e:
48-
print(f"Error in /ask endpoint: {e}", file=sys.stderr)
49-
traceback.print_exc()
50-
yield "Internal Server Error"
51-
52-
if not full_response:
53-
full_response = "No response generated"
54-
55-
if analytics.write_key:
56-
# Determine the source based on the user agent
57-
user_agent = request.headers.get('User-Agent', '')
58-
source = 'Ask Defang Discord Bot' if 'Discord Bot' in user_agent else 'Ask Defang Website'
59-
# Track the query and response
60-
analytics.track(
61-
anonymous_id=anonymous_id,
62-
event='Chatbot Question submitted',
63-
properties={'query': query, 'response': full_response, 'source': source}
64-
)
65-
66-
return Response(stream_with_context(generate()), content_type='text/markdown')
60+
# Use the shared generate function directly
61+
return Response(stream_with_context(generate(query, source, anonymous_id)), content_type='text/markdown')
6762

6863
@app.route('/', methods=['GET', 'POST'])
6964
def index():
@@ -144,5 +139,52 @@ def debug_context():
144139
context = rag_system.get_context(query)
145140
return jsonify({"context": context})
146141

142+
143+
# Handle incoming webhooks from Intercom
144+
@app.route('/intercom-webhook', methods=['POST'])
145+
@csrf.exempt
146+
def handle_webhook():
147+
if not check_intercom_ip(request):
148+
return jsonify({"error": "Unauthorized IP"}), 403
149+
150+
data = request.json
151+
152+
logger.info(f"Received Intercom webhook: {data}")
153+
conversation_id = data.get('data', {}).get('item', {}).get('id')
154+
155+
# Check for the type of the webhook event
156+
topic = data.get('topic')
157+
logger.info(f"Webhook topic: {topic}")
158+
if topic == 'conversation.admin.replied':
159+
160+
# Check if the admin is a bot or human based on presence of a message marker (e.g., "🤖") in the last message
161+
last_message = data.get('data', {}).get('item', {}).get('conversation_parts', {}).get('conversation_parts', [])[-1].get('body', '')
162+
last_message_text = parse_html_to_text(last_message)
163+
164+
logger.info(f"Parsed last message text: {last_message_text}")
165+
if last_message_text and last_message_text.endswith("🤖"):
166+
# If the last message ends with the marker, it indicates a bot reply
167+
logger.info(f"Last message in conversation {conversation_id} ends with the marker 🤖")
168+
logger.info(f"Detected bot admin reply in conversation {conversation_id}; no action taken.")
169+
else:
170+
# If the last message does not end with the marker, it indicates a human reply
171+
logger.info(f"Detected human admin reply in conversation {conversation_id}; marking as human admin-replied...")
172+
# Mark the conversation as replied by a human admin to skip LLM responses in the future
173+
set_conversation_human_replied(conversation_id, r)
174+
logger.info(f"Successfully marked conversation {conversation_id} as human admin-replied.")
175+
elif topic == 'conversation.user.replied':
176+
# In this case, the webhook event is a user reply, not an admin reply
177+
# Check if the conversation was replied previously by a human admin
178+
if is_conversation_human_replied(conversation_id, r):
179+
logger.info(f"Conversation {conversation_id} already marked as human admin-replied; no action taken.")
180+
return 'OK'
181+
# Fetch the conversation and generate an LLM answer for the user
182+
logger.info(f"Detected a user reply in conversation {conversation_id}; fetching an answer from LLM...")
183+
answer_intercom_conversation(conversation_id)
184+
else:
185+
logger.info(f"Received webhook for unsupported topic: {topic}; no action taken.")
186+
return 'OK'
187+
188+
147189
if __name__ == '__main__':
148190
app.run(host='0.0.0.0', port=5050)

app/intercom.py

Lines changed: 204 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,204 @@
1+
# Intercom API helper functions for handling conversations and replies
2+
import os
3+
import requests
4+
import hashlib
5+
from flask import jsonify
6+
from html.parser import HTMLParser
7+
8+
import logging
9+
logger = logging.getLogger(__name__)
10+
11+
from utils import generate
12+
13+
class BodyHTMLParser(HTMLParser):
14+
def __init__(self):
15+
super().__init__()
16+
self.text = []
17+
18+
def handle_data(self, data):
19+
self.text.append(data)
20+
21+
def get_text(self):
22+
return ''.join(self.text)
23+
24+
# Retrieve a conversation from Intercom API by its ID
25+
def fetch_intercom_conversation(conversation_id):
26+
# Sanitize conversation_id to allow only digits (Intercom conversation IDs are numeric)
27+
if not conversation_id.isdigit():
28+
logger.error(f"Invalid conversation_id: {conversation_id}")
29+
return jsonify({"error": f"Invalid conversation_id: {conversation_id}"}), 400
30+
31+
url = "https://api.intercom.io/conversations/" + conversation_id
32+
token = os.getenv('INTERCOM_TOKEN')
33+
if not token:
34+
return jsonify({"error": "Intercom token not set"}), 500
35+
36+
headers = {
37+
"Content-Type": "application/json",
38+
"Intercom-Version": "2.13",
39+
"Authorization": "Bearer " + token
40+
}
41+
42+
response = requests.get(url, headers=headers)
43+
if response.status_code != 200:
44+
logger.error(f"Failed to fetch conversation {conversation_id} from Intercom; status code: {response.status_code}, response: {response.text}")
45+
return jsonify({"error": "Failed to fetch conversation from Intercom"}), response.status_code
46+
47+
return response, response.status_code
48+
49+
# Determines the user query from the Intercom conversation response
50+
def get_user_query(response, conversation_id):
51+
# Extract conversation parts from an Intercom request response
52+
result = extract_conversation_parts(response)
53+
logger.info(f"Extracted {len(result)} parts from conversation {conversation_id}")
54+
55+
# Get and join the latest user messages from the conversation parts
56+
joined_text = extract_latest_user_messages(result)
57+
if not joined_text:
58+
return "No entries made by user found.", 204
59+
return joined_text, 200
60+
61+
# Extract conversation parts into a simplified JSON format
62+
def extract_conversation_parts(response):
63+
data = response.json()
64+
parts = data.get('conversation_parts', {}).get('conversation_parts', [])
65+
extracted_parts = []
66+
for part in parts:
67+
body = part.get('body', '')
68+
if not body:
69+
continue
70+
author = part.get('author', {})
71+
created_at = part.get('created_at')
72+
extracted_parts.append({'body': body, 'author': author, 'created_at': created_at})
73+
return extracted_parts
74+
75+
# Joins the latest user entries in the conversation starting from the last non-user (i.e. admin) entry
76+
def extract_latest_user_messages(conversation_parts):
77+
# Find the index of the last non-user entry
78+
last_non_user_idx = None
79+
for idx in range(len(conversation_parts) - 1, -1, -1):
80+
if conversation_parts[idx].get('author', {}).get('type') != 'user':
81+
last_non_user_idx = idx
82+
break
83+
84+
# Collect user entries after the last non-user entry
85+
if last_non_user_idx is not None:
86+
last_user_entries = [
87+
part for part in conversation_parts[last_non_user_idx + 1 :]
88+
if part.get('author', {}).get('type') == 'user'
89+
]
90+
else:
91+
# If there is no non-user entry, include all user entries
92+
last_user_entries = [
93+
part for part in conversation_parts if part.get('author', {}).get('type') == 'user'
94+
]
95+
96+
# If no user entries found, return None
97+
if not last_user_entries:
98+
return None
99+
100+
# Only keep the 'body' field from each user entry
101+
bodies = [part['body'] for part in last_user_entries if 'body' in part]
102+
103+
# Parse and concatenate all user message bodies as plain text
104+
parsed_bodies = []
105+
for html_body in bodies:
106+
parsed_bodies.append(parse_html_to_text(html_body))
107+
108+
# Join all parsed user messages into a single string
109+
joined_text = " ".join(parsed_bodies)
110+
return joined_text
111+
112+
# Helper function to parse HTML into plain text
113+
def parse_html_to_text(html_content):
114+
parser = BodyHTMLParser()
115+
parser.feed(html_content)
116+
return parser.get_text()
117+
118+
# Store conversation ID in persistent storage
119+
def set_conversation_human_replied(conversation_id, redis_client):
120+
try:
121+
# Use a Redis set to avoid duplicates
122+
redis_client.set(conversation_id, '1', ex=60*60*24) # Set TTL expiration to 1 day
123+
logger.info(f"Added conversation_id {conversation_id} to Redis set admin_replied_conversations")
124+
except Exception as e:
125+
logger.error(f"Error adding conversation_id to Redis: {e}")
126+
127+
# Check if a conversation is already marked as replied by a human admin
128+
def is_conversation_human_replied(conversation_id, redis_client):
129+
try:
130+
return redis_client.exists(conversation_id)
131+
except Exception as e:
132+
logger.error(f"Error checking conversation_id in Redis: {e}")
133+
return False
134+
135+
# Post a reply to a conversation through Intercom API
136+
def post_intercom_reply(conversation_id, response_text):
137+
url = f"https://api.intercom.io/conversations/{conversation_id}/reply"
138+
token = os.getenv('INTERCOM_TOKEN')
139+
if not token:
140+
return jsonify({"error": "Intercom token not set"}), 500
141+
142+
headers = {
143+
"Content-Type": "application/json",
144+
"Authorization": "Bearer " + token
145+
}
146+
147+
payload = {
148+
"message_type": "comment",
149+
"type": "admin",
150+
"admin_id": int(os.getenv('INTERCOM_ADMIN_ID')),
151+
"body": response_text
152+
}
153+
154+
response = requests.post(url, json=payload, headers=headers)
155+
logger.info(f"Posted reply to Intercom; response status code: {response.status_code}")
156+
157+
return response.json(), response.status_code
158+
159+
160+
# Returns a generated LLM answer to the Intercom conversation based on previous user message history
161+
def answer_intercom_conversation(conversation_id):
162+
logger.info(f"Received request to get conversation {conversation_id}")
163+
# Retrieves the history of the conversation thread in Intercom
164+
conversation, status_code = fetch_intercom_conversation(conversation_id)
165+
if status_code != 200:
166+
return jsonify(conversation), status_code
167+
168+
# Extracts the user query (which are latest user messages joined into a single string) from conversation history
169+
user_query, status_code = get_user_query(conversation, conversation_id)
170+
if status_code != 200:
171+
return jsonify(user_query), status_code
172+
173+
logger.info(f"Joined user messages: {user_query}")
174+
175+
# Use a deterministic, non-reversible hash for anonymous_id for Intercom conversations
176+
anon_hash = hashlib.sha256(f"intercom-{conversation_id}".encode()).hexdigest()
177+
178+
# Generate the exact response using the RAG system
179+
llm_response = "".join(generate(user_query, 'Intercom Conversation', anon_hash))
180+
llm_response = llm_response + " 🤖" # Add a marker to indicate the end of the response
181+
182+
logger.info(f"LLM response: {llm_response}")
183+
184+
return post_intercom_reply(conversation_id, llm_response)
185+
186+
def check_intercom_ip(request):
187+
# Restrict webhook access to a list of allowed IP addresses
188+
INTERCOM_ALLOWED_IPS = [
189+
"34.231.68.152",
190+
"34.197.76.213",
191+
"35.171.78.91",
192+
"35.169.138.21",
193+
"52.70.27.159",
194+
"52.44.63.161"
195+
]
196+
remote_ip = request.headers.get('X-Forwarded-For', request.remote_addr)
197+
# X-Forwarded-For may contain a comma-separated list; take the first IP
198+
remote_ip = remote_ip.split(',')[0].strip() if remote_ip else None
199+
200+
if remote_ip not in INTERCOM_ALLOWED_IPS:
201+
# logger.info(f"Rejected webhook from unauthorized IP: {remote_ip}")
202+
return False
203+
204+
return True

app/rag_system.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,14 @@ def embed_knowledge_base(self):
3030
def normalize_query(self, query):
3131
return query.lower().strip()
3232

33-
def get_query_embedding(self, query, use_cpu=False):
33+
def get_query_embedding(self, query, use_cpu=True):
3434
normalized_query = self.normalize_query(query)
3535
query_embedding = self.model.encode([normalized_query], convert_to_tensor=True)
3636
if use_cpu:
3737
query_embedding = query_embedding.cpu()
3838
return query_embedding
3939

40-
def get_doc_embeddings(self, use_cpu=False):
40+
def get_doc_embeddings(self, use_cpu=True):
4141
if use_cpu:
4242
return self.doc_embeddings.cpu()
4343
return self.doc_embeddings
@@ -66,7 +66,7 @@ def compute_document_scores(self, query_embedding, doc_embeddings, high_match_th
6666

6767
return result
6868

69-
def retrieve(self, query, similarity_threshold=0.4, high_match_threshold=0.8, max_docs=5, use_cpu=False):
69+
def retrieve(self, query, similarity_threshold=0.4, high_match_threshold=0.8, max_docs=5, use_cpu=True):
7070
# Note: Set use_cpu=True to run on CPU, which is useful for testing or environments without a GPU.
7171
# Set use_cpu=False to leverage GPU for better performance in production.
7272

app/requirements.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,5 @@ huggingface_hub==0.15.1
1010
openai==0.28.0
1111
PyYAML==6.0.2
1212
GitPython==3.1.44
13+
redis==6.2.0
14+
fakeredis==2.30.1

0 commit comments

Comments
 (0)