Skip to content

Commit 86bca52

Browse files
authored
Merge pull request #253 from nyu-mlab/prolific
Collect Prolific ID upon startup.
2 parents 0bec68b + b84419f commit 86bca52

File tree

5 files changed

+88
-12
lines changed

5 files changed

+88
-12
lines changed

src/libinspector/common.py

Lines changed: 80 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,12 @@
66
import functools
77
import requests
88
import libinspector.global_state
9+
from libinspector.privacy import is_ad_tracked
910
import pandas as pd
1011
from typing import Any
1112
import streamlit as st
1213
import logging
14+
import re
1315

1416
config_file_name = 'config.json'
1517
config_lock = threading.Lock()
@@ -35,16 +37,80 @@ def show_warning():
3537
True if the warning is still being shown (user has not accepted).
3638
False if the user has accepted the warning and can proceed.
3739
"""
40+
current_id = config_get("prolific_id", "")
3841

39-
if config_get("suppress_warning", False):
42+
# --- GATE 1: PROLIFIC ID CHECK (Must be valid to proceed to confirmation) ---
43+
if is_prolific_id_valid(current_id):
44+
# --- SHOW CONFIRMATION UI (Only reached if ID is valid but warning is unaccepted) ---
45+
st.subheader("1. Prolific ID Confirmation")
46+
st.info(f"Your currently stored ID is: `{current_id}`")
47+
48+
# Allows the user to change the ID, which forces them back through GATE 1
49+
if st.button("Change Prolific ID"):
50+
config_set("prolific_id", "") # Clear the stored ID
51+
st.rerun()
52+
53+
if not config_get("suppress_warning", False):
54+
st.markdown("---")
55+
st.subheader("2. Network Monitoring Warning")
56+
st.markdown(warning_text)
57+
58+
if st.button("OK, I understand and wish to proceed"):
59+
config_set("suppress_warning", True)
60+
st.rerun()
61+
62+
return not is_prolific_id_valid(config_get("prolific_id", ""))
63+
else:
64+
# ID is missing or invalid -> BLOCK and show input form
65+
st.subheader("Prolific ID Required")
66+
st.warning("Please enter your Prolific ID to proceed. This ID is essential for data labeling.")
67+
68+
with st.form("prolific_id_form"):
69+
input_id = st.text_input(
70+
"Enter your Prolific ID (1-50 Alphanumeric Characters):",
71+
value="",
72+
key="prolific_id_input"
73+
).strip()
74+
75+
submitted = st.form_submit_button("Submit ID")
76+
77+
if submitted:
78+
if is_prolific_id_valid(input_id):
79+
config_set("prolific_id", input_id)
80+
st.success("Prolific ID accepted. Please review the details below.")
81+
st.rerun() # Rerun to jump to the confirmation step (GATE 2)
82+
else:
83+
st.error("Invalid Prolific ID. Must be 1-50 alphanumeric characters.")
84+
85+
return True # BLOCK: ID check still needs resolution.
86+
87+
88+
def is_prolific_id_valid(prolific_id: str) -> bool:
89+
"""
90+
Performs sanity checks on the Prolific ID:
91+
1. Not empty.
92+
2. Length between 1 and 50 characters (inclusive).
93+
3. Contains only alphanumeric characters (A-Z, a-z, 0-9).
94+
Args:
95+
prolific_id (str): The Prolific ID to validate.
96+
97+
Returns:
98+
bool: True if the ID is non-empty, 1-50 characters long, and alphanumeric; False otherwise.
99+
"""
100+
if not prolific_id or not isinstance(prolific_id, str):
101+
return False
102+
103+
# 2. Length check
104+
if not 1 <= len(prolific_id) <= 50:
105+
return False
106+
107+
# 3. Alphanumeric check using regex (ensures no special characters)
108+
if not re.fullmatch(r'^[a-zA-Z0-9]+$', prolific_id):
40109
return False
41110

42-
st.markdown(warning_text)
43-
if st.button("OK, I understand and wish to proceed"):
44-
config_set("suppress_warning", True)
45-
st.rerun()
46111
return True
47112

113+
48114
def bar_graph_data_frame(mac_address: str, now: int):
49115
sixty_seconds_ago = now - 60
50116
db_conn, rwlock = libinspector.global_state.db_conn_and_lock
@@ -144,8 +210,10 @@ def get_remote_hostnames(mac_address: str):
144210
"""
145211
with rwlock:
146212
rows = db_conn.execute(sql, (mac_address, mac_address)).fetchall()
147-
hostnames = [row['hostname'] for row in rows if row['hostname']]
148-
remote_hostnames = '+'.join(hostnames) if hostnames else ""
213+
hostnames = [row['hostname'] for row in rows if row['hostname']]
214+
is_tracked = any(is_ad_tracked(hostname) for hostname in hostnames)
215+
config_set(f'tracked@{mac_address}', is_tracked)
216+
remote_hostnames = '+'.join(hostnames) if hostnames else ""
149217
return remote_hostnames
150218

151219

@@ -171,19 +239,23 @@ def call_predict_api(dhcp_hostname: str, oui_vendor: str, remote_hostnames: str,
171239
dict: The response text from the API.
172240
"""
173241
api_key = os.environ.get("API_KEY", "momo")
242+
device_tracked_key = f'tracked@{mac_address}'
243+
174244
headers = {
175245
"Content-Type": "application/json",
176246
"x-api-key": api_key
177247
}
178248
data = {
249+
"prolific_id": config_get("prolific_id", ""),
250+
"mac_address": mac_address,
179251
"fields": {
180252
"oui_friendly": oui_vendor,
181253
"dhcp_hostname": dhcp_hostname,
182254
"remote_hostnames": remote_hostnames,
183255
"user_agent_info": "",
184256
"netdisco_info": "",
185257
"user_labels": "",
186-
"talks_to_ads": False
258+
"talks_to_ads": config_get(device_tracked_key, False)
187259
}
188260
}
189261
non_empty_field_values = [

src/libinspector/device_detail_page.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -65,13 +65,13 @@ def label_activity_workflow(mac_address: str):
6565
st.warning(
6666
"As part of this research project, only the network activity of the device you select will be shared with NYU mLab, and only while you are labeling an activity. "
6767
"By entering your Prolific ID and continuing, you agree to share this data for research and compensation. "
68-
"**Please enter your correct Prolific ID to ensure you receive payment for your participation.**"
68+
"**Please confirm your Prolific ID is correct to ensure you receive payment for your participation.**"
6969
)
70-
prolific_id = st.text_input("Enter your Prolific ID to continue:", key="prolific_id_input")
70+
prolific_id = common.config_get("prolific_id", "")
7171
if st.button("Continue"):
7272
if prolific_id.strip():
7373
st.session_state['external_data_permission_granted'] = True
74-
st.session_state['prolific_id'] = prolific_id.strip()
74+
st.session_state['prolific_id'] = prolific_id
7575
st.rerun()
7676
else:
7777
st.warning("Prolific ID is required to proceed.")

src/libinspector/device_list_page.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ def show_device_card(device_dict: dict):
8888
caption = f'{device_dict["ip_address"]} | {device_dict["mac_address"]}'
8989
if "oui_vendor" in metadata_dict:
9090
caption += f' | {metadata_dict["oui_vendor"]}'
91-
9291
try:
9392
dhcp_hostname, oui_vendor = common.get_device_metadata(device_dict['mac_address'])
9493
remote_hostnames = common.get_remote_hostnames(device_dict['mac_address'])

src/libinspector/page_manager.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def initialize_page():
5050
menu_items={}
5151
)
5252

53+
# If true, block further execution
5354
if common.show_warning():
5455
st.stop()
5556

src/libinspector/server/packet_collector.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import time
55
import sys
66
import datetime
7+
import common
78
from flask import Flask, request, jsonify
89
from pymongo import MongoClient
910
from scapy.all import wrpcap, Ether, IP
@@ -86,12 +87,15 @@ def label_packets():
8687
print(f"Packet decoding occurred for collection '{data['prolific_id']}': {e}")
8788
return jsonify({"error": "Packet decoding failed"}), 400
8889

90+
if not common.is_prolific_id_valid(data["prolific_id"]):
91+
return jsonify({"error": "Prolific ID is invalid"}), 500
8992
folder_path: str = os.path.join(str(data["prolific_id"]), str(data["device_name"]), str(data["activity_label"]))
9093
fullpath = os.path.normpath(os.path.join(packet_root_dir, folder_path))
9194
if not fullpath.startswith(packet_root_dir):
9295
return jsonify({"error": "Seems like invalid characters used in prolific ID, device name or activity label"}), 500
9396

9497
prolific_user_packets_collected = db[data["prolific_id"]]
98+
9599
doc = {
96100
"mac_address": data["mac_address"],
97101
"device_name": data["device_name"],

0 commit comments

Comments
 (0)