Skip to content

Commit 6bd6094

Browse files
committed
fix strip() issue for choices
1 parent aecab71 commit 6bd6094

File tree

2 files changed

+126
-6
lines changed

2 files changed

+126
-6
lines changed

reproschema/redcap2reproschema.py

Lines changed: 15 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -201,7 +201,8 @@ def process_choices(choices_str, field_name):
201201
choices = []
202202
choices_value_type = []
203203
for ii, choice in enumerate(choices_str.split("|")):
204-
parts = choice.split(", ")
204+
choice = choice.strip() # Strip leading/trailing whitespace for each choice
205+
parts = [p.strip() for p in choice.split(",")]
205206

206207
# Handle the case where the choice is something like "1,"
207208
if len(parts) == 1:
@@ -213,14 +214,22 @@ def process_choices(choices_str, field_name):
213214
)
214215
parts = [ii, parts[0]]
215216

216-
# Try to convert the first part to an integer, if it fails, keep it as a string
217-
try:
218-
value = int(parts[0])
217+
# Determine if value should be treated as an integer or string
218+
if parts[0] == '0':
219+
# Special case for "0", treat it as an integer
220+
value = 0
219221
choices_value_type.append("xsd:integer")
220-
except ValueError:
222+
elif parts[0].isdigit() and parts[0][0] == '0':
223+
# If it has leading zeros, treat it as a string
221224
value = parts[0]
222225
choices_value_type.append("xsd:string")
223-
226+
else:
227+
try:
228+
value = int(parts[0])
229+
choices_value_type.append("xsd:integer")
230+
except ValueError:
231+
value = parts[0]
232+
choices_value_type.append("xsd:string")
224233
choice_obj = {
225234
"name": {"en": " ".join(parts[1:]).strip()},
226235
"value": value,
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
import os
2+
import shutil
3+
4+
import pytest
5+
import yaml
6+
from click.testing import CliRunner
7+
8+
from ..cli import main
9+
from ..redcap2reproschema import process_choices
10+
11+
def test_process_choices_numeric_codes():
12+
# Test standard numeric codes with descriptions
13+
choices_str = "1, Male | 2, Female | 3, Other"
14+
choices, value_types = process_choices(choices_str, "gender")
15+
assert choices == [
16+
{"name": {"en": "Male"}, "value": 1},
17+
{"name": {"en": "Female"}, "value": 2},
18+
{"name": {"en": "Other"}, "value": 3},
19+
]
20+
assert value_types == ["xsd:integer"]
21+
22+
def test_process_choices_boolean():
23+
# Test boolean choices (Yes/No)
24+
choices_str = "1, Yes | 0, No"
25+
choices, value_types = process_choices(choices_str, "boolean_field")
26+
assert choices == [
27+
{"name": {"en": "Yes"}, "value": 1},
28+
{"name": {"en": "No"}, "value": 0},
29+
]
30+
assert value_types == ["xsd:integer"]
31+
32+
def test_process_choices_special_characters():
33+
# Test choices with special characters
34+
choices_str = "1, Option A | 2, \"Option B\" | 3, Option C with 'quotes'"
35+
choices, value_types = process_choices(choices_str, "special_chars")
36+
assert choices == [
37+
{"name": {"en": "Option A"}, "value": 1},
38+
{"name": {"en": "\"Option B\""}, "value": 2},
39+
{"name": {"en": "Option C with 'quotes'"}, "value": 3},
40+
]
41+
assert value_types == ["xsd:integer"]
42+
43+
def test_process_choices_with_missing_values():
44+
# Test choices with a missing value (commonly used for "Not applicable" or "Prefer not to say")
45+
choices_str = "1, Yes | 2, No | 99, Not applicable"
46+
choices, value_types = process_choices(choices_str, "missing_values")
47+
assert choices == [
48+
{"name": {"en": "Yes"}, "value": 1},
49+
{"name": {"en": "No"}, "value": 2},
50+
{"name": {"en": "Not applicable"}, "value": 99},
51+
]
52+
assert value_types == ["xsd:integer"]
53+
54+
def test_process_choices_with_unicode():
55+
# Test choices with Unicode characters (e.g., accents, symbols)
56+
choices_str = "1, Café | 2, Niño | 3, Résumé | 4, ☺"
57+
choices, value_types = process_choices(choices_str, "unicode_field")
58+
assert choices == [
59+
{"name": {"en": "Café"}, "value": 1},
60+
{"name": {"en": "Niño"}, "value": 2},
61+
{"name": {"en": "Résumé"}, "value": 3},
62+
{"name": {"en": "☺"}, "value": 4},
63+
]
64+
assert value_types == ["xsd:integer"]
65+
66+
def test_process_choices_alpha_codes():
67+
# Test alpha codes (e.g., categorical text codes)
68+
choices_str = "A, Apple | B, Banana | C, Cherry"
69+
choices, value_types = process_choices(choices_str, "alpha_codes")
70+
assert choices == [
71+
{"name": {"en": "Apple"}, "value": "A"},
72+
{"name": {"en": "Banana"}, "value": "B"},
73+
{"name": {"en": "Cherry"}, "value": "C"},
74+
]
75+
assert sorted(value_types) == ["xsd:string"]
76+
77+
def test_process_choices_incomplete_values():
78+
# Test choices with missing descriptions
79+
choices_str = "1, Yes | 2, | 3, No"
80+
choices, value_types = process_choices(choices_str, "incomplete_values")
81+
assert choices == [
82+
{"name": {"en": "Yes"}, "value": 1},
83+
{"name": {"en": ""}, "value": 2},
84+
{"name": {"en": "No"}, "value": 3},
85+
]
86+
assert value_types == ["xsd:integer"]
87+
88+
def test_process_choices_numeric_strings():
89+
# Test numeric strings as values (e.g., not converted to integers)
90+
choices_str = "001, Option 001 | 002, Option 002 | 003, Option 003"
91+
choices, value_types = process_choices(choices_str, "numeric_strings")
92+
assert choices == [
93+
{"name": {"en": "Option 001"}, "value": "001"},
94+
{"name": {"en": "Option 002"}, "value": "002"},
95+
{"name": {"en": "Option 003"}, "value": "003"},
96+
]
97+
assert sorted(value_types) == ["xsd:string"]
98+
99+
def test_process_choices_spaces_in_values():
100+
# Test choices with spaces in values and names
101+
choices_str = "A B, Choice AB | C D, Choice CD"
102+
choices, value_types = process_choices(choices_str, "spaces_in_values")
103+
assert choices == [
104+
{"name": {"en": "Choice AB"}, "value": "A B"},
105+
{"name": {"en": "Choice CD"}, "value": "C D"},
106+
]
107+
assert sorted(value_types) == ["xsd:string"]
108+
109+
# Run pytest if script is called directly
110+
if __name__ == "__main__":
111+
pytest.main()

0 commit comments

Comments
 (0)