Skip to content

Commit e72180a

Browse files
committed
Merge branch 'fix-readonly' of github.com:yibeichan/reproschema-py into fix-readonly
2 parents 3775e77 + 78ca16a commit e72180a

File tree

2 files changed

+52
-28
lines changed

2 files changed

+52
-28
lines changed

reproschema/redcap2reproschema.py

Lines changed: 44 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,9 @@ def clean_header(header):
8282
cleaned_header = {}
8383
for k, v in header.items():
8484
# Strip BOM, whitespace, and enclosing quotation marks if present
85-
cleaned_key = k.lstrip("\ufeff").strip().strip('"') if isinstance(k, str) else k
85+
cleaned_key = (
86+
k.lstrip("\ufeff").strip().strip('"') if isinstance(k, str) else k
87+
)
8688
cleaned_header[cleaned_key] = v
8789
return cleaned_header
8890

@@ -144,11 +146,19 @@ def process_field_properties(data):
144146
condition = True
145147

146148
# Check Field Annotation for special flags - safely handle non-string values
147-
annotation = str(data.get("Field Annotation", "")).upper() if data.get("Field Annotation") is not None else ""
148-
if condition and isinstance(annotation, str) and (
149-
"@READONLY" in annotation
150-
or "@HIDDEN" in annotation
151-
or "@CALCTEXT" in annotation
149+
annotation = (
150+
str(data.get("Field Annotation", "")).upper()
151+
if data.get("Field Annotation") is not None
152+
else ""
153+
)
154+
if (
155+
condition
156+
and isinstance(annotation, str)
157+
and (
158+
"@READONLY" in annotation
159+
or "@HIDDEN" in annotation
160+
or "@CALCTEXT" in annotation
161+
)
152162
):
153163
condition = False
154164

@@ -157,13 +167,18 @@ def process_field_properties(data):
157167
"isAbout": f"items/{data['Variable / Field Name']}",
158168
"isVis": condition,
159169
}
160-
170+
161171
# Handle Required Field check, accounting for NaN values and empty strings
162172
required_field = data.get("Required Field?")
163-
if pd.notna(required_field) and str(required_field).strip(): # Check if value is not NaN and not empty
173+
if (
174+
pd.notna(required_field) and str(required_field).strip()
175+
): # Check if value is not NaN and not empty
164176
if str(required_field).lower() == "y":
165177
prop_obj["valueRequired"] = True
166-
elif str(required_field).lower() not in ["", "n"]: # Only raise error for unexpected values
178+
elif str(required_field).lower() not in [
179+
"",
180+
"n",
181+
]: # Only raise error for unexpected values
167182
raise ValueError(
168183
f"value {required_field} not supported yet for redcap:Required Field?"
169184
)
@@ -264,7 +279,7 @@ def process_choices(choices_str, field_name):
264279

265280
def parse_html(input_string, default_language="en"):
266281
result = {}
267-
282+
268283
# Handle non-string input
269284
if not isinstance(input_string, str):
270285
if pd.isna(input_string): # Handle NaN values
@@ -286,7 +301,9 @@ def parse_html(input_string, default_language="en"):
286301
if not result: # If no text was extracted
287302
result[default_language] = soup.get_text(strip=True)
288303
else:
289-
result[default_language] = soup.get_text(strip=True) # Use the entire text as default language text
304+
result[default_language] = soup.get_text(
305+
strip=True
306+
) # Use the entire text as default language text
290307
return result
291308

292309

@@ -524,24 +541,26 @@ def parse_language_iso_codes(input_string):
524541
]
525542

526543

527-
def process_csv(
528-
csv_file, abs_folder_path, schema_context_url, protocol_name
529-
):
544+
def process_csv(csv_file, abs_folder_path, schema_context_url, protocol_name):
530545
datas = {}
531546
order = {}
532547
compute = {}
533548
languages = []
534549

535550
# Read CSV with explicit BOM handling, and maintain original order
536-
df = pd.read_csv(csv_file, encoding="utf-8-sig") # utf-8-sig handles BOM automatically
537-
551+
df = pd.read_csv(
552+
csv_file, encoding="utf-8-sig"
553+
) # utf-8-sig handles BOM automatically
554+
538555
# Clean column names (headers)
539-
df.columns = df.columns.map(lambda x: x.strip().strip('"').lstrip("\ufeff"))
556+
df.columns = df.columns.map(
557+
lambda x: x.strip().strip('"').lstrip("\ufeff")
558+
)
540559

541560
# Clean string values in the dataframe
542-
object_columns = df.select_dtypes(include=['object']).columns
561+
object_columns = df.select_dtypes(include=["object"]).columns
543562
for col in object_columns:
544-
df[col] = df[col].astype(str).replace('nan', '')
563+
df[col] = df[col].astype(str).replace("nan", "")
545564

546565
# Initialize structures for each unique form
547566
unique_forms = df["Form Name"].unique()
@@ -563,10 +582,10 @@ def process_csv(
563582
field_name = row["Variable / Field Name"]
564583
field_type = row.get("Field Type", "")
565584
field_annotation = row.get("Field Annotation")
566-
585+
567586
# Add row data to datas dictionary
568587
datas[form_name].append(row.to_dict())
569-
588+
570589
if field_type in COMPUTE_LIST:
571590
condition = normalize_condition(
572591
row["Choices, Calculations, OR Slider Labels"],
@@ -578,7 +597,10 @@ def process_csv(
578597
"jsExpression": condition,
579598
}
580599
)
581-
elif isinstance(field_annotation, str) and "@CALCTEXT" in field_annotation.upper():
600+
elif (
601+
isinstance(field_annotation, str)
602+
and "@CALCTEXT" in field_annotation.upper()
603+
):
582604
calc_text = field_annotation
583605
match = re.search(r"@CALCTEXT\((.*)\)", calc_text)
584606
if match:

reproschema/tests/test_redcap2reproschema.py

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,11 @@ def test_redcap2reproschema(tmpdir):
2525

2626
shutil.copy(CSV_TEST_FILE, str(temp_csv_file))
2727
shutil.copy(YAML_TEST_FILE, str(temp_yaml_file))
28-
28+
2929
# Add debug output to see the content of the CSV file
30-
with open(str(temp_csv_file), 'r') as f:
30+
with open(str(temp_csv_file), "r") as f:
3131
print("CSV content:", f.read())
32-
32+
3333
with tmpdir.as_cwd():
3434
# Read YAML to find the expected output directory name
3535
with open(str(temp_yaml_file), "r") as file:
@@ -44,8 +44,10 @@ def test_redcap2reproschema(tmpdir):
4444
str(temp_yaml_file),
4545
],
4646
)
47-
47+
4848
print("Command output:", result.output) # Add debug output
49-
49+
5050
assert result.exit_code == 0, f"Command failed with: {result.output}"
51-
assert os.path.isdir(protocol_name), f"Expected output directory '{protocol_name}' does not exist"
51+
assert os.path.isdir(
52+
protocol_name
53+
), f"Expected output directory '{protocol_name}' does not exist"

0 commit comments

Comments
 (0)