Skip to content

Commit 7571b31

Browse files
committed
Fixed #542 - stripped empty newline characters
Signed-off-by: Chin Yeung Li <[email protected]>
1 parent 82c0bd0 commit 7571b31

File tree

4 files changed

+63
-16
lines changed

4 files changed

+63
-16
lines changed

src/attributecode/gen.py

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
from attributecode.util import to_posix
3434
from attributecode.util import UNC_PREFIX_POSIX
3535
from attributecode.util import load_scancode_json, load_csv, load_json, load_excel
36+
from attributecode.util import strip_inventory_value
3637

3738

3839
def check_duplicated_columns(location):
@@ -128,6 +129,7 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
128129
"""
129130
errors = []
130131
abouts = []
132+
is_spreadsheet = False
131133

132134
if base_dir:
133135
base_dir = util.to_posix(base_dir)
@@ -140,8 +142,10 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
140142
errors.extend(dup_cols_err)
141143
return errors, abouts
142144
inventory = load_csv(location)
145+
is_spreadsheet = True
143146
elif location.endswith('.xlsx'):
144147
dup_cols_err, inventory = load_excel(location, worksheet)
148+
is_spreadsheet = True
145149
if dup_cols_err:
146150
errors.extend(dup_cols_err)
147151
return errors, abouts
@@ -151,7 +155,14 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
151155
try:
152156
arp_list = []
153157
errors = []
154-
for component in inventory:
158+
159+
if is_spreadsheet:
160+
# Only the .csv and .xlsx may have newline issue
161+
stripped_inv = strip_inventory_value(inventory)
162+
else:
163+
stripped_inv = inventory
164+
165+
for component in stripped_inv:
155166
if not from_attrib:
156167
arp = component['about_resource']
157168
dup_err = check_duplicated_about_resource(arp, arp_list)
@@ -164,11 +175,11 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
164175
invalid_about_filename = check_about_resource_filename(arp)
165176
if invalid_about_filename and not invalid_about_filename in errors:
166177
errors.append(invalid_about_filename)
167-
"""
178+
168179
newline_in_file_err = check_newline_in_file_field(component)
169180
if newline_in_file_err:
170181
errors.extend(newline_in_file_err)
171-
"""
182+
172183
if errors:
173184
return errors, abouts
174185
except Exception as e:
@@ -178,7 +189,7 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
178189
return errors, abouts
179190

180191
custom_fields_list = []
181-
for fields in inventory:
192+
for fields in stripped_inv:
182193
# check does the input contains the required fields
183194
required_fields = model.About.required_fields
184195

src/attributecode/util.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -803,6 +803,20 @@ def write_licenses(lic_dict, location):
803803
return errors
804804

805805

806+
def strip_inventory_value(inventory):
807+
"""
808+
The inventory is a list of dictionaries. This function will strip the value
809+
of the dictionary and return the stripped dictionary to a list
810+
"""
811+
stripped_inventory = []
812+
for component in inventory:
813+
comp_dict = {}
814+
for key in component:
815+
comp_dict[key] = component[key].strip()
816+
stripped_inventory.append(comp_dict)
817+
return stripped_inventory
818+
819+
806820
"""
807821
Return True if a string s name is safe to use as an attribute name.
808822
"""

tests/test_attrib.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,8 @@ def test_scancode_input_dup_lic_match(self):
245245
test_file = get_test_loc(
246246
'test_attrib/scancode_input/sc-dup-lic-match.json')
247247
errors, abouts = gen.load_inventory(test_file, scancode=True)
248+
print("############################")
249+
print(errors)
248250
# Check if there is error's level > INFO
249251
result = [(level, e) for level, e in errors if level > INFO]
250252
assert result == []

tests/test_util.py

Lines changed: 32 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -162,7 +162,7 @@ def test_check_file_names_with_dupes_return_errors(self):
162162
Error(
163163
CRITICAL,
164164
"Duplicate files: 'some/PAth' and 'some/path' have the same case-insensitive file name")
165-
]
165+
]
166166
assert expected == result
167167

168168
def test_check_file_names_without_dupes_return_no_error(self):
@@ -196,9 +196,11 @@ def test_check_file_names_with_invalid_chars_return_errors(self):
196196
]
197197
import sys
198198
if sys.version_info[0] < 3: # python2
199-
expected = [Error(CRITICAL, b"Invalid characters '\xe9\xe8' in file name at: 'Accessibilit\xe9/ p\xe9rim\xe8tre'")]
199+
expected = [Error(
200+
CRITICAL, b"Invalid characters '\xe9\xe8' in file name at: 'Accessibilit\xe9/ p\xe9rim\xe8tre'")]
200201
else:
201-
expected = [Error(CRITICAL, "Invalid characters ':' in file name at: 'locations/in:valid'")]
202+
expected = [
203+
Error(CRITICAL, "Invalid characters ':' in file name at: 'locations/in:valid'")]
202204
result = util.check_file_names(paths)
203205

204206
assert expected[0].message == result[0].message
@@ -272,7 +274,8 @@ def test_get_about_locations(self):
272274
assert expected == result
273275

274276
def test_get_locations_can_yield_a_single_file(self):
275-
test_file = get_test_loc('test_util/about_locations/file with_spaces.ABOUT')
277+
test_file = get_test_loc(
278+
'test_util/about_locations/file with_spaces.ABOUT')
276279
result = list(util.get_locations(test_file))
277280
assert 1 == len(result)
278281

@@ -351,13 +354,15 @@ def test_format_about_dict_output(self):
351354

352355
def test_load_csv_microsoft_utf_8(self):
353356
test_file = get_test_loc('test_util/csv/test_ms_utf8.csv')
354-
expected = [dict([(u'about_resource', u'/myFile'), (u'name', u'myName')])]
357+
expected = [
358+
dict([(u'about_resource', u'/myFile'), (u'name', u'myName')])]
355359
result = util.load_csv(test_file)
356360
assert expected == result
357361

358362
def test_load_csv_utf_8(self):
359363
test_file = get_test_loc('test_util/csv/test_utf8.csv')
360-
expected = [dict([(u'about_resource', u'/myFile'), (u'name', u'\u540d')])]
364+
expected = [
365+
dict([(u'about_resource', u'/myFile'), (u'name', u'\u540d')])]
361366
result = util.load_csv(test_file)
362367
assert expected == result
363368

@@ -409,7 +414,7 @@ def test_load_non_list_json(self):
409414
'about_resource': '.',
410415
'name': 'AboutCode',
411416
'version': '0.11.0'
412-
}]
417+
}]
413418
result = util.load_json(test_file)
414419
assert expected == result
415420

@@ -420,7 +425,7 @@ def test_load_non_list_json2(self):
420425
'about_resource': '.',
421426
'name': 'AboutCode',
422427
'version': '0.11.0'
423-
}]
428+
}]
424429
result = util.load_json(test_file)
425430
assert expected == result
426431

@@ -450,7 +455,7 @@ def test_load_json_from_scancode(self):
450455
'dirs_count': 0,
451456
'size_count': 0,
452457
'scan_errors': []
453-
}]
458+
}]
454459
result = util.load_scancode_json(test_file)
455460
assert expected == result
456461

@@ -497,7 +502,7 @@ def test_load_yaml_about_file_raise_exception_on__duplicate(self):
497502
try:
498503
saneyaml.load(test, allow_duplicate_keys=False)
499504
self.fail('Exception not raised')
500-
except saneyaml.UnsupportedYamlFeatureError as e :
505+
except saneyaml.UnsupportedYamlFeatureError as e:
501506
assert 'Duplicate key in YAML source: notes' == str(e)
502507

503508
def test_load_yaml_about_file_raise_exception_on_invalid_yaml_ignore_non_key_line(self):
@@ -532,7 +537,7 @@ def test_load_yaml_about_file_with_multiline(self):
532537
try:
533538
saneyaml.load(test, allow_duplicate_keys=False)
534539
self.fail('Exception not raised')
535-
except saneyaml.UnsupportedYamlFeatureError as e :
540+
except saneyaml.UnsupportedYamlFeatureError as e:
536541
# notes: exceptio is rasied only for the first dupe
537542
assert 'Duplicate key in YAML source: owner' == str(e)
538543

@@ -558,7 +563,8 @@ def test_ungroup_licenses(self):
558563
u'https://enterprise.dejacode.com/urn/?urn=urn:dje:license:mit',
559564
u'https://enterprise.dejacode.com/urn/?urn=urn:dje:license:bsd-new']
560565
expected_spdx = [u'MIT', u'BSD-3-Clause']
561-
lic_key, lic_name, lic_file, lic_url, spdx_lic_key, lic_score, _matched_text = util.ungroup_licenses(about)
566+
lic_key, lic_name, lic_file, lic_url, spdx_lic_key, lic_score, _matched_text = util.ungroup_licenses(
567+
about)
562568
assert expected_lic_key == lic_key
563569
assert expected_lic_name == lic_name
564570
assert expected_lic_file == lic_file
@@ -648,3 +654,17 @@ def test_copy_file_with_dir(self):
648654
assert len(licenses) == len(files_list)
649655
for license in licenses:
650656
assert license in files_list
657+
658+
def test_strip_inventory_value(self):
659+
test = [{'about_resource': 'empty_newlines.rpm\n\n', 'name': 'empty_newlines.rpm'},
660+
{'about_resource': 'spaces_after.rpm ',
661+
'name': 'spaces_after.rpm '},
662+
{'about_resource': 'value_after_newline\n123.rpm ',
663+
'name': 'value_after'}]
664+
expected = [{'about_resource': 'empty_newlines.rpm', 'name': 'empty_newlines.rpm'},
665+
{'about_resource': 'spaces_after.rpm',
666+
'name': 'spaces_after.rpm'},
667+
{'about_resource': 'value_after_newline\n123.rpm',
668+
'name': 'value_after'}]
669+
stripped_result = util.strip_inventory_value(test)
670+
assert stripped_result == expected

0 commit comments

Comments
 (0)