Skip to content

Commit 2616f96

Browse files
committed
Address PR feedback
1 parent aebc1fd commit 2616f96

File tree

7 files changed

+141
-57
lines changed

7 files changed

+141
-57
lines changed

README.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,11 @@ See [discussion here](https://github.com/SuffolkLITLab/docassemble-AssemblyLine/
8181
Answer set JSON imports are intentionally restricted to reduce risk from malformed and malicious payloads.
8282
8383
Default behavior:
84-
- Plain JSON values are imported by default, and object reconstruction is allowed only for allowlisted DAObject classes.
85-
- Top-level variable names must match `^[A-Za-z][A-Za-z0-9_]*$`.
86-
- Internal/protected variable names are blocked.
87-
- If `answer set import allowed variables` is not set, imports use a denylist-only policy for backwards compatibility.
88-
- Object payloads can be imported when classes are allowlisted; by default, known `docassemble.base` and `docassemble.AssemblyLine` DAObject descendants are allowed.
84+
- Plain JSON values are imported by default, and object reconstruction is allowed only for allowlisted DAObject classes.
85+
- Top-level variable names must match `^[A-Za-z][A-Za-z0-9_]*$`.
86+
- Internal/protected variable names are blocked.
87+
- If `answer set import allowed variables` is not set, imports allow safe variable names by default, still block protected/internal names, and intersect with the target interview's known variables when AssemblyLine can detect them.
88+
- Object payloads can be imported when classes are allowlisted; by default, known `docassemble.base` and `docassemble.AssemblyLine` DAObject descendants are allowed.
8989

9090
Default import limits (`assembly line: answer set import limits`):
9191
- `max bytes`: `1048576` (1 MB)
@@ -96,7 +96,7 @@ Default import limits (`assembly line: answer set import limits`):
9696
- `max number abs`: `1000000000000000` (`10**15`)
9797

9898
Final allowlist/config policy:
99-
- Default allowlist: unset (`answer set import allowed variables` omitted), to avoid breaking existing interviews unexpectedly.
99+
- Default allowlist: unset (`answer set import allowed variables` omitted), which falls back to safe-name/protected-name checks plus target-interview variable detection when available.
100100
- Recommended production policy: set an explicit allowlist to only shared/reusable variables in your jurisdiction.
101101
- `answer set import allow objects` defaults to `true`; set it to `false` if you want strict plain-JSON-only imports.
102102
- `answer set import allowed object classes` can extend the default DAObject class allowlist with explicit additional class paths.

docassemble/AssemblyLine/al_courts.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -454,7 +454,8 @@ def convert_zip(z: Any) -> str:
454454
"address_zip": convert_zip
455455
}
456456
if hasattr(self, "converters") and self.converters:
457-
assert isinstance(self.converters, dict)
457+
if not isinstance(self.converters, dict):
458+
raise TypeError("converters must be a dict")
458459
merged_converters.update(self.converters)
459460
to_load = path_and_mimetype(load_path)[0]
460461
if self.filename.lower().endswith(".xlsx"):

docassemble/AssemblyLine/al_document.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@
3434
from docassemble.base.pdfa import pdf_to_pdfa
3535
from textwrap import wrap
3636
from math import floor
37-
import subprocess
37+
import subprocess # nosec B404
3838
import pikepdf
3939
from typing import Tuple
4040
import secrets
@@ -1104,7 +1104,7 @@ def as_pdf(
11041104
try:
11051105
main_doc.set_attributes(filename=filename)
11061106
main_doc.set_mimetype("application/pdf")
1107-
except:
1107+
except Exception: # nosec B110
11081108
pass
11091109

11101110
if self.need_addendum():
@@ -1976,7 +1976,7 @@ def get_cacheable_documents(
19761976
)
19771977
)
19781978
result["download_filename"] = filename_root + ext
1979-
except:
1979+
except Exception: # nosec B110
19801980
pass
19811981
results.append(result)
19821982

@@ -2855,7 +2855,7 @@ def ocrmypdf_task(
28552855

28562856
completed_ocr = None
28572857
try:
2858-
completed_ocr = subprocess.run(
2858+
completed_ocr = subprocess.run( # nosec B603
28592859
ocr_params, timeout=60 * 60, check=False, capture_output=True
28602860
)
28612861
to_pdf.commit()

docassemble/AssemblyLine/al_general.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -642,7 +642,7 @@ def normalized_address(self) -> Union[Address, "ALAddress"]:
642642
"""
643643
try:
644644
self.geocode()
645-
except:
645+
except Exception: # nosec B110
646646
pass
647647
if self.was_geocoded_successfully() and hasattr(self, "norm_long"):
648648
return self.norm_long
@@ -672,7 +672,7 @@ def state_name(self, country_code: Optional[str] = None) -> str:
672672
if hasattr(self, "country") and self.country and len(self.country) == 2:
673673
try:
674674
return state_name(self.state, country_code=self.country)
675-
except:
675+
except Exception: # nosec B110
676676
pass
677677
try:
678678
return state_name(
@@ -1037,7 +1037,7 @@ def phone_numbers(
10371037
elif len(nums):
10381038
return list(nums[0].keys())[0]
10391039

1040-
assert False # We should never get here, no default return is necessary
1040+
raise AssertionError("Unreachable: no default return is necessary")
10411041

10421042
def contact_methods(self) -> str:
10431043
"""Generates a formatted string of all provided contact methods.
@@ -2470,7 +2470,7 @@ def is_phone_or_email(text: str) -> bool:
24702470
validation_error("Enter a valid phone number or email address")
24712471
else:
24722472
validation_error("Enter a valid email address")
2473-
assert False, "unreachable"
2473+
raise AssertionError("unreachable")
24742474

24752475

24762476
def github_modified_date(

docassemble/AssemblyLine/data/questions/al_saved_sessions.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,9 +247,12 @@ content: |
247247
template: al_sessions_import_rejected_template
248248
subject: Variables skipped during import
249249
content: |
250-
% for item in al_sessions_last_import_report.get('rejected', []):
250+
% for item in al_sessions_last_import_report.get('rejected', [])[:50]:
251251
* `${ item.get('path', '?') }`: ${ item.get('reason', 'unknown reason') }
252252
% endfor
253+
% if len(al_sessions_last_import_report.get('rejected', [])) > 50:
254+
* ${ len(al_sessions_last_import_report.get('rejected', [])) - 50 } more variables were skipped and are not shown here.
255+
% endif
253256
---
254257
question: |
255258
Upload a JSON file
@@ -266,4 +269,4 @@ validation code: |
266269
code: |
267270
al_sessions_snapshot_results = load_interview_json(al_sessions_json_file.slurp())
268271
al_sessions_last_import_report = get_last_import_report()
269-
al_sessions_import_json = True
272+
al_sessions_import_json = True

docassemble/AssemblyLine/sessions.py

Lines changed: 59 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,12 @@
3333
)
3434
from docassemble.webapp.db_object import init_sqlalchemy
3535
from sqlalchemy.sql import text
36-
from docassemble.base.functions import server, safe_json, serializable_dict
36+
from docassemble.base.functions import (
37+
server,
38+
safe_json,
39+
serializable_dict,
40+
this_thread,
41+
)
3742
from .al_document import (
3843
ALDocument,
3944
ALDocumentBundle,
@@ -270,18 +275,22 @@
270275
"DASet": "docassemble.base.util.DASet",
271276
}
272277

273-
_last_import_report: Dict[str, Any] = {
274-
"accepted": [],
275-
"rejected": [],
276-
"warnings": [],
277-
"limits": {},
278-
"contains_objects": False,
279-
"remapped_classes": [],
280-
}
278+
LAST_IMPORT_REPORT_ATTR = "_assemblyline_last_import_report"
279+
280+
281+
def _default_import_report() -> Dict[str, Any]:
282+
return {
283+
"accepted": [],
284+
"rejected": [],
285+
"warnings": [],
286+
"limits": {},
287+
"contains_objects": False,
288+
"remapped_classes": [],
289+
}
281290

282291

283292
def get_last_import_report() -> Dict[str, Any]:
284-
"""Return a copy of the most recent JSON import report.
293+
"""Return a copy of the most recent JSON import report for this thread.
285294
286295
The report dictionary contains the following keys:
287296
- ``accepted``: list of variable names that were imported successfully.
@@ -294,12 +303,18 @@ def get_last_import_report() -> Dict[str, Any]:
294303
Returns:
295304
Dict[str, Any]: sanitized copy of the last import report.
296305
"""
297-
return safe_json(_last_import_report)
306+
misc = getattr(this_thread, "misc", None)
307+
if not isinstance(misc, dict):
308+
return safe_json(_default_import_report())
309+
return safe_json(misc.get(LAST_IMPORT_REPORT_ATTR, _default_import_report()))
298310

299311

300312
def _set_last_import_report(report: Dict[str, Any]) -> None:
301-
global _last_import_report
302-
_last_import_report = report
313+
misc = getattr(this_thread, "misc", None)
314+
if not isinstance(misc, dict):
315+
misc = {}
316+
setattr(this_thread, "misc", misc)
317+
misc[LAST_IMPORT_REPORT_ATTR] = safe_json(report)
303318

304319

305320
def _import_limits() -> Dict[str, int]:
@@ -382,10 +397,16 @@ def _safe_variable_name(name: str) -> bool:
382397

383398
def _allowed_import_variables() -> Optional[Set[str]]:
384399
"""Optional strict allowlist from config; when empty/undefined, default policy applies."""
385-
allowed = get_config("assembly line", {}).get("answer set import allowed variables")
386-
if not allowed:
400+
raw_allowed = get_config("assembly line", {}).get("answer set import allowed variables")
401+
if raw_allowed is None:
402+
return None
403+
if isinstance(raw_allowed, str):
404+
iterable = [raw_allowed]
405+
elif isinstance(raw_allowed, (list, tuple, set)):
406+
iterable = raw_allowed
407+
else:
387408
return None
388-
cleaned = {str(x).strip() for x in allowed if str(x).strip()}
409+
cleaned = {str(x).strip() for x in iterable if str(x).strip()}
389410
return cleaned if cleaned else None
390411

391412

@@ -424,7 +445,13 @@ def _allowed_object_classes() -> Set[str]:
424445
configured = get_config("assembly line", {}).get(
425446
"answer set import allowed object classes", []
426447
)
427-
for class_name in configured:
448+
if isinstance(configured, str):
449+
iterable = [configured]
450+
elif isinstance(configured, (list, tuple, set)):
451+
iterable = configured
452+
else:
453+
iterable = []
454+
for class_name in iterable:
428455
name = str(class_name).strip()
429456
if name:
430457
allowed.add(name)
@@ -1108,6 +1135,8 @@ def find_matching_sessions(
11081135
metadata_search_conditions = "TRUE"
11091136

11101137
# Add metadata filters
1138+
_ALLOWED_OPERATORS = {"=", "!=", "<", "<=", ">", ">=", "LIKE", "ILIKE"}
1139+
_ALLOWED_CAST_TYPES = {"int", "float"}
11111140
if metadata_filters:
11121141
metadata_filter_conditions = []
11131142
for column, val_tuple in metadata_filters.items():
@@ -1116,6 +1145,14 @@ def find_matching_sessions(
11161145
cast_type = None
11171146
else:
11181147
value, operator, cast_type = val_tuple
1148+
1149+
if operator.upper() not in _ALLOWED_OPERATORS:
1150+
raise ValueError(f"Disallowed SQL operator: {operator}")
1151+
if cast_type and cast_type.lower() not in _ALLOWED_CAST_TYPES:
1152+
raise ValueError(f"Disallowed SQL cast type: {cast_type}")
1153+
if not column.isidentifier():
1154+
raise ValueError(f"Invalid metadata column name: {column}")
1155+
11191156
if cast_type:
11201157
column_expr = f"CAST(jsonstorage.data->>{repr(column)} AS {cast_type})"
11211158
else:
@@ -1157,7 +1194,7 @@ def find_matching_sessions(
11571194
userdict.key as key,
11581195
{', '.join(f"jsonstorage.data->>{repr(column)} as {column}" for column in metadata_column_names)},
11591196
jsonstorage.data as data
1160-
FROM userdict
1197+
FROM userdict
11611198
NATURAL JOIN (
11621199
SELECT key, MAX(modtime) AS modtime, COUNT(key) AS num_keys
11631200
FROM userdict
@@ -1174,7 +1211,7 @@ def find_matching_sessions(
11741211
) AS unique_sessions
11751212
ORDER BY modtime DESC
11761213
LIMIT :limit OFFSET :offset;
1177-
""")
1214+
""") # nosec B608
11781215

11791216
if offset < 0:
11801217
offset = 0
@@ -2322,7 +2359,9 @@ def update_session_metadata(
23222359

23232360
# 2) Derive two signed 32‑bit ints from MD5(session_id|filename|tags)
23242361
key_string = f"{session_id}|{filename}|{metadata_key_name}"
2325-
digest = hashlib.md5(key_string.encode("utf-8")).digest()
2362+
digest = hashlib.md5(
2363+
key_string.encode("utf-8"), usedforsecurity=False
2364+
).digest() # nosec B324
23262365
high_u32, low_u32 = struct.unpack(">II", digest[:8])
23272366

23282367
def to_signed_32(x: int) -> int:

0 commit comments

Comments
 (0)