Skip to content

Commit d2d2efe

Browse files
authored
Merge pull request #263 from nyu-mlab/packets
Upgrade API fields and Debug Packet Timestamps
2 parents 5982dc3 + 26b2ceb commit d2d2efe

File tree

7 files changed

+243
-143
lines changed

7 files changed

+243
-143
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ description = "IoT Inspector Client for analyzing IoT device firmware"
99
readme = "README.md"
1010
requires-python = "==3.13.*"
1111
dependencies = [
12-
"libinspector==1.0.10",
12+
"libinspector==1.0.12",
1313
"streamlit>=1.50.0",
1414
]
1515
authors = [

src/libinspector/common.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import json
55
import functools
66
import pandas as pd
7-
from typing import Any
7+
import typing
88
import streamlit as st
99
import logging
1010
import re
@@ -157,15 +157,14 @@ def plot_traffic_volume(df: pd.DataFrame, now: int, chart_title: str):
157157
st.bar_chart(df.set_index('seconds_ago')['Bits'], width='content')
158158

159159

160-
def get_device_metadata(mac_address: str):
160+
def get_device_metadata(mac_address: str) -> dict:
161161
"""
162162
Retrieve the DHCP hostname and OUI vendor for a device from the database.
163163
164164
Args:
165165
mac_address (str): The MAC address of the device.
166-
167166
Returns:
168-
tuple: (dhcp_hostname, oui_vendor) as strings. Returns empty strings if not found.
167+
dict: A dictionary containing the device's metadata, or an empty dictionary if not found.
169168
"""
170169
db_conn, rwlock = libinspector.global_state.db_conn_and_lock
171170
sql = """
@@ -175,13 +174,11 @@ def get_device_metadata(mac_address: str):
175174
with rwlock:
176175
row = db_conn.execute(sql, (mac_address,)).fetchone()
177176
if row:
178-
metadata = json.loads(row['metadata_json'])
179-
dhcp_hostname = metadata.get('dhcp_hostname', "")
180-
oui_vendor = metadata.get('oui_vendor', "")
177+
meta_data = json.loads(row['metadata_json'])
178+
logger.info(f"Parsed Metadata: {json.dumps(meta_data, indent=4)}") # Check the parsed dictionary
179+
return meta_data
181180
else:
182-
dhcp_hostname = ""
183-
oui_vendor = ""
184-
return dhcp_hostname, oui_vendor
181+
return dict()
185182

186183

187184
def get_remote_hostnames(mac_address: str):
@@ -247,7 +244,7 @@ def initialize_config_dict():
247244
config_dict['app_start_time'] = time.time()
248245

249246

250-
def config_get(key, default=None) -> Any:
247+
def config_get(key, default=None) -> typing.Any:
251248
"""
252249
Get a configuration value.
253250
@@ -289,7 +286,7 @@ def config_get_prefix(key_prefix: str):
289286
}
290287

291288

292-
def config_set(key: str, value: Any):
289+
def config_set(key: str, value: typing.Any):
293290
"""
294291
Set a configuration value.
295292

src/libinspector/device_detail_page.py

Lines changed: 164 additions & 99 deletions
Large diffs are not rendered by default.

src/libinspector/device_list_page.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -41,23 +41,26 @@ def worker_thread():
4141

4242
# Getting inputs and calling API
4343
for device_dict in device_list:
44-
dhcp_hostname, oui_vendor = common.get_device_metadata(device_dict['mac_address'])
44+
meta_data = common.get_device_metadata(device_dict['mac_address'])
4545
remote_hostnames = common.get_remote_hostnames(device_dict['mac_address'])
4646
try:
47-
api_output = call_predict_api(dhcp_hostname, oui_vendor, remote_hostnames, device_dict['mac_address'])
47+
# Note I am passing the metadata as a string because functions with cache cannot take dicts
48+
# as a dict is mutable, and the cache would not work as expected.
49+
api_output = call_predict_api(json.dumps(meta_data), remote_hostnames, device_dict['mac_address'])
4850
common.config_set(f'device_details@{device_dict["mac_address"]}', api_output)
4951
if "Vendor" in api_output:
5052
# Update name based on API
5153
custom_name_key = f"device_custom_name_{device_dict['mac_address']}"
5254
custom_name = api_output["Vendor"]
5355
if api_output["Vendor"] != "":
5456
common.config_set(custom_name_key, custom_name)
55-
except RuntimeError:
57+
except Exception as e:
58+
logger.info("[Device ID API] Exception when calling API: %s", str(e))
5659
continue
5760

5861

5962
@functools.cache
60-
def call_predict_api(dhcp_hostname: str, oui_vendor: str, remote_hostnames: str,
63+
def call_predict_api(meta_data_string: str, remote_hostnames: str,
6164
mac_address: str, url="https://dev-id-1.tailcedbd.ts.net/predict") -> dict:
6265
"""
6366
Call the predicting API with the given fields.
@@ -69,8 +72,7 @@ def call_predict_api(dhcp_hostname: str, oui_vendor: str, remote_hostnames: str,
6972
2. dhcp_hostname: this is extracted from the 'devices' table, check meta-data and look for 'dhcp_hostname' key.
7073
3. remote_hostnames: IoT Inspector collects this information the DHCP hostname via either DNS or SNI
7174
Args:
72-
dhcp_hostname (str): The DHCP hostname of the device we want to use AI to get more info about
73-
oui_vendor (str): The OUI vendor of the device we want to use AI to get more info about
75+
meta_data_string (str): Device Metadata, User Agent info, OUI info, DHCP hostname, etc. in string format
7476
remote_hostnames (str): The remote hostnames the device has contacted
7577
mac_address (str): The MAC address of the device we want to use AI to get more info about
7678
url (str): The API endpoint.
@@ -79,6 +81,7 @@ def call_predict_api(dhcp_hostname: str, oui_vendor: str, remote_hostnames: str,
7981
"""
8082
api_key = os.environ.get("API_KEY", "momo")
8183
device_tracked_key = f'tracked@{mac_address}'
84+
meta_data = json.loads(meta_data_string)
8285

8386
headers = {
8487
"Content-Type": "application/json",
@@ -88,11 +91,12 @@ def call_predict_api(dhcp_hostname: str, oui_vendor: str, remote_hostnames: str,
8891
"prolific_id": common.config_get("prolific_id", ""),
8992
"mac_address": mac_address,
9093
"fields": {
91-
"oui_friendly": oui_vendor,
92-
"dhcp_hostname": dhcp_hostname,
94+
"oui_friendly": meta_data.get("oui_vendor", ""),
95+
"dhcp_hostname": meta_data.get("dhcp_hostname", ""),
9396
"remote_hostnames": remote_hostnames,
94-
"user_agent_info": "",
95-
"netdisco_info": "",
97+
"user_agent_info": meta_data.get("user_agent_info", ""),
98+
"mdns_info": meta_data.get("mdns_json", ""),
99+
"ssdp_info": meta_data.get("ssdp_json", ""),
96100
"user_labels": "",
97101
"talks_to_ads": common.config_get(device_tracked_key, False)
98102
}

src/libinspector/page_manager.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import common
1212
import libinspector.core
1313
import threading
14+
import libinspector.global_state
1415

1516

1617
def get_page(title, material_icon, show_page_func):
@@ -81,6 +82,14 @@ def start_inspector_once():
8182
daemon=True,
8283
)
8384
api_thread.start()
85+
label_thread = threading.Thread(
86+
name="Device Label Thread",
87+
target=device_detail_page.label_thread,
88+
daemon=True
89+
)
90+
label_thread.start()
91+
with libinspector.global_state.global_state_lock:
92+
libinspector.global_state.custom_packet_callback_func = device_detail_page.save_labeled_activity_packets
8493

8594

8695
device_list_page_obj = get_page(

src/libinspector/server/packet_collector.py

Lines changed: 43 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -32,20 +32,20 @@
3232
# and prevent the Flask server from starting with a bad configuration.
3333
try:
3434
# Attempt a simple database operation (list collection names) to force connection/auth check
35-
print("Attempting connection and write test to MongoDB...")
35+
app.logger.info("Attempting connection and write test to MongoDB...")
3636
# 1. Attempt to insert a test document
3737
db.test_connection.insert_one({"status": "startup_check", "timestamp": int(time.time())})
3838
# 2. Attempt to delete the test document
3939
db.test_connection.delete_one({"status": "startup_check"})
4040
# 3. Final check to ensure we can list collections
4141
db.list_collection_names()
42-
print("Successfully connected and confirmed write access to MongoDB.")
42+
app.logger.info("Successfully connected and confirmed write access to MongoDB.")
4343
except Exception as e:
44-
print("=" * 50)
45-
print("FATAL ERROR: Could not connect to MongoDB or authentication failed.")
46-
print(f"Connection URI: {mongo_uri}")
47-
print(f"Exception: {e}")
48-
print("=" * 50)
44+
app.logger.info("=" * 50)
45+
app.logger.info("FATAL ERROR: Could not connect to MongoDB or authentication failed.")
46+
app.logger.info(f"Connection URI: {mongo_uri}")
47+
app.logger.info(f"Exception: {e}")
48+
app.logger.info("=" * 50)
4949
sys.exit(1)
5050
# If using gunicorn/uwsgi, this exit won't stop the workers, but will prevent the app from running correctly.
5151
# If running with 'python app.py', this will stop the application.
@@ -101,22 +101,40 @@ def label_packets():
101101
400: Error if required fields are missing or packet decoding fails.
102102
500: Error if database insertion fails.
103103
"""
104+
raw_packets = []
105+
capture_times = []
106+
capture_lengths = []
107+
104108
data = request.get_json()
105-
print("Received POST data:", json.dumps(data, indent=4))
109+
app.logger.info("Received POST data:", json.dumps(data, indent=4))
106110
required_keys = ["packets", "prolific_id", "mac_address", "device_name", "activity_label", "start_time", "end_time"]
107111
if not data or not all(key in data for key in required_keys):
112+
app.logger.warning("Missing required fields in POST data")
108113
return jsonify({"error": "Missing required fields"}), 400
114+
115+
if not is_prolific_id_valid(data["prolific_id"]):
116+
app.logger.warning("Invalid Prolific ID received")
117+
return jsonify({"error": "Prolific ID is invalid"}), 500
118+
109119
try:
110-
raw_packets = [base64.b64decode(pkt) for pkt in data["packets"]]
120+
for pkt_metadata in data["packets"]:
121+
# Validate essential keys are present
122+
if not isinstance(pkt_metadata, dict) or 'time' not in pkt_metadata or 'raw_data' not in pkt_metadata:
123+
app.logger.error("Packet object missing 'time' or 'raw_data' key.")
124+
return jsonify({"error": "Packet metadata structure is invalid"}), 400
125+
126+
raw_data_bytes = base64.b64decode(pkt_metadata["raw_data"])
127+
raw_packets.append(raw_data_bytes)
128+
capture_times.append(float(pkt_metadata["time"]))
129+
capture_lengths.append(len(raw_data_bytes))
111130
except Exception as e:
112-
print(f"Packet decoding occurred for collection '{data['prolific_id']}': {e}")
131+
app.logger.warning(f"Packet decoding occurred for collection '{data['prolific_id']}': {e}")
113132
return jsonify({"error": "Packet decoding failed"}), 400
114133

115-
if not is_prolific_id_valid(data["prolific_id"]):
116-
return jsonify({"error": "Prolific ID is invalid"}), 500
117134
folder_path: str = os.path.join(str(data["prolific_id"]), str(data["device_name"]), str(data["activity_label"]))
118135
fullpath = os.path.normpath(os.path.join(packet_root_dir, folder_path))
119136
if not fullpath.startswith(packet_root_dir):
137+
app.logger.warning("Invalid characters detected in path components")
120138
return jsonify({"error": "Seems like invalid characters used in prolific ID, device name or activity label"}), 500
121139

122140
prolific_user_packets_collected = db[data["prolific_id"]]
@@ -127,22 +145,24 @@ def label_packets():
127145
"activity_label": data["activity_label"],
128146
"start_time": int(data["start_time"]),
129147
"end_time": int(data["end_time"]),
130-
"raw_packets": raw_packets
148+
"raw_packets": raw_packets,
149+
"capture_times": capture_times,
150+
"capture_lengths": capture_lengths
131151
}
132152
try:
133153
prolific_user_packets_collected.insert_one(doc)
134154
except Exception as e:
135-
print(f"MongoDB Insert FAILED for collection '{data['prolific_id']}': {e}")
155+
app.logger.warning(f"MongoDB Insert FAILED for collection '{data['prolific_id']}': {e}")
136156
return jsonify({"error": "Database insert failed"}), 500
137157

138158
try:
139159
os.makedirs(fullpath, exist_ok=True)
140160
pcap_file_name: str = make_pcap_filename(int(data["start_time"]), int(data["end_time"]))
141161
pcap_name: str = os.path.join(fullpath, pcap_file_name)
142-
save_packets_to_pcap(raw_packets, pcap_name)
162+
save_packets_to_pcap(raw_packets, capture_times, pcap_name)
143163
except Exception as e:
144164
# If file saving fails, return a 500 but note that the DB save succeeded
145-
print(f"PCAP File Save FAILED for ID: {e}")
165+
app.logger.warning(f"PCAP File Save FAILED for ID: {e}")
146166
return jsonify({
147167
"status": "partial_success",
148168
"inserted": 1,
@@ -170,22 +190,27 @@ def make_pcap_filename(start_time: int, end_time: int) -> str:
170190
return filename
171191

172192

173-
def save_packets_to_pcap(raw_packets: list, filename="output.pcap"):
193+
def save_packets_to_pcap(raw_packets: list, capture_times: list, filename="output.pcap"):
174194
"""
175195
Saves a list of raw packet bytes to a pcap file.
176196
177197
Args:
178198
raw_packets (list): List of bytes objects representing raw packets.
199+
capture_times (list): The epoch time of each packet when it was captured by IoT Inspector.
179200
filename (str): Output pcap file name.
180201
"""
181202
scapy_packets = []
182-
for pkt_bytes in raw_packets:
203+
for i, pkt_bytes in enumerate(raw_packets):
183204
try:
184205
pkt = Ether(pkt_bytes)
185206
if pkt.__class__.__name__ == "Raw":
186207
pkt = IP(pkt_bytes)
187208
except Exception:
188209
pkt = IP(pkt_bytes)
210+
211+
# CRITICAL STEP: Assign the supplied original capture time
212+
# This is guaranteed to be correct for ALL packets.
213+
pkt.time = capture_times[i]
189214
scapy_packets.append(pkt)
190215
wrpcap(filename, scapy_packets)
191216

uv.lock

Lines changed: 3 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)