Skip to content

Commit 71eb1df

Browse files
authored
Merge pull request #374 from nexB/368-remove-filter
Remove filter
2 parents dde73cb + 23f48b1 commit 71eb1df

File tree

9 files changed

+10
-260
lines changed

9 files changed

+10
-260
lines changed

src/attributecode/attrib.py

Lines changed: 2 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,6 @@
3131
from attributecode.licenses import COMMON_LICENSES
3232
from attributecode.model import parse_license_expression
3333
from attributecode.util import add_unc
34-
from attributecode.util import get_about_file_path
3534

3635

3736
# FIXME: the template dir should be outside the code tree
@@ -167,93 +166,26 @@ def generate_from_file(abouts, template_loc=DEFAULT_TEMPLATE_FILE, variables=Non
167166

168167

169168
def generate_and_save(abouts, output_location, template_loc=None, variables=None,
170-
mapping_file=None, inventory_location=None):
169+
mapping_file=None):
171170
"""
172171
Generate an attribution text from an `abouts` list of About objects, a
173172
`template_loc` template file location and a `variables` optional
174173
mapping of extra variables. Save the generated attribution text in the
175174
`output_location` file.
176175
Return a list of Error objects if any.
177176
178-
FIXME: these three argument are too complex:
179-
180177
Optionally use the `mapping_file` mapping config if provided.
181-
Optionally filter `abouts` object based on the inventory JSON or CSV at `inventory_location`.
182178
"""
183179
updated_abouts = []
184-
lstrip_afp = []
185-
afp_list = []
186-
not_match_path = []
187180
errors = []
188181

189-
if not inventory_location:
190-
updated_abouts = abouts
191-
192-
# FIXME: this is too complex
193-
# Do the following if a filter list (inventory_location) is provided
194-
else:
195-
if not os.path.exists(inventory_location):
196-
# FIXME: this message does not make sense
197-
msg = (u'"INVENTORY_LOCATION" does not exist. Generation halted.')
198-
errors.append(Error(ERROR, msg))
199-
return errors
200-
201-
if inventory_location.endswith('.csv') or inventory_location.endswith('.json'):
202-
# FIXME: we should use the same inventory loading that we use everywhere
203-
204-
try:
205-
# Return a list which contains only the about file path
206-
about_list = get_about_file_path(inventory_location, mapping_file=mapping_file)
207-
# FIXME: why catching all exceptions?
208-
except Exception:
209-
# 'about_file_path' key/column doesn't exist
210-
211-
msg = u"The required key: 'about_file_path' does not exist. Generation halted."
212-
errors.append(Error(ERROR, msg))
213-
return errors
214-
else:
215-
# FIXME: this message does not make sense
216-
msg = u'Only .csv and .json are supported for the "INVENTORY_LOCATION". Generation halted.'
217-
errors.append(Error(ERROR, msg))
218-
return errors
219-
220-
for afp in about_list:
221-
lstrip_afp.append(afp.lstrip('/'))
222-
223-
# return a list of paths that point all to .ABOUT files
224-
about_files_list = as_about_paths(lstrip_afp)
225-
226-
# Collect all the about_file_path
227-
for about in abouts:
228-
afp_list.append(about.about_file_path)
229-
230-
# Get the not matching list if any
231-
for fp in about_files_list:
232-
if not fp in afp_list:
233-
not_match_path.append(fp)
234-
235-
if not_match_path:
236-
if len(not_match_path) == len(about_files_list):
237-
msg = "None of the paths in the provided 'inventory_location' match with the 'LOCATION'."
238-
errors.append(Error(ERROR, msg))
239-
return errors
240-
else:
241-
for path in not_match_path:
242-
msg = 'Path: ' + path + ' cannot be found.'
243-
errors.append(Error(ERROR, msg))
244-
245-
for about in abouts:
246-
for fp in about_files_list:
247-
if about.about_file_path == fp:
248-
updated_abouts.append(about)
249-
250182
# Parse license_expression and save to the license list
251183
for about in updated_abouts:
252184
if not about.license_expression.value:
253185
continue
254186
special_char_in_expression, lic_list = parse_license_expression(about.license_expression.value)
255187
if special_char_in_expression:
256-
msg = (u"The following character(s) cannot be in the licesne_expression: " +
188+
msg = (u"The following character(s) cannot be in the license_expression: " +
257189
str(special_char_in_expression))
258190
errors.append(Error(ERROR, msg))
259191
else:
@@ -274,24 +206,3 @@ def generate_and_save(abouts, output_location, template_loc=None, variables=None
274206
of.write(rendered)
275207

276208
return errors
277-
278-
279-
# FIXME: this function purpose needs to be explained.
280-
def as_about_paths(paths):
281-
"""
282-
Return a list of paths to .ABOUT files from a list of `paths`
283-
strings.
284-
"""
285-
from posixpath import basename
286-
from posixpath import dirname
287-
288-
about_paths = []
289-
for path in paths:
290-
if path.endswith('.ABOUT'):
291-
about_paths.append(path)
292-
else:
293-
# FIXME: this is not the way to check that a path is a directory, too weak
294-
if path.endswith('/'):
295-
path += basename(dirname(path))
296-
about_paths.append(path + '.ABOUT')
297-
return about_paths

src/attributecode/cmd.py

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,6 @@
4343
from attributecode.model import collect_inventory
4444
from attributecode.model import write_output
4545
from attributecode.util import extract_zip
46-
from attributecode.util import inventory_filter
4746

4847

4948
__copyright__ = """
@@ -165,13 +164,6 @@ def validate_extensions(ctx, param, value, extensions=tuple(('.csv', '.json',)))
165164
metavar='OUTPUT',
166165
type=click.Path(exists=False, dir_okay=False, writable=True, resolve_path=True))
167166

168-
# fIXME: this is too complex and should be removed
169-
@click.option('--filter',
170-
multiple=True,
171-
metavar='<key>=<value>',
172-
callback=validate_key_values,
173-
help='Filter the inventory to ABOUT matching these key=value e.g. "license_expression=gpl-2.0')
174-
175167
@click.option('-f', '--format',
176168
is_flag=False,
177169
default='csv',
@@ -203,7 +195,7 @@ def validate_extensions(ctx, param, value, extensions=tuple(('.csv', '.json',)))
203195
@click.help_option('-h', '--help')
204196

205197
def inventory(location, output, mapping, mapping_file,
206-
format, filter, quiet, verbose): # NOQA
198+
format, quiet, verbose): # NOQA
207199
"""
208200
Collect the inventory of .ABOUT file data as CSV or JSON.
209201
@@ -224,10 +216,6 @@ def inventory(location, output, mapping, mapping_file,
224216

225217
errors, abouts = collect_inventory(location, mapping_file=mapping_file)
226218

227-
# FIXME: this is too complex
228-
if filter:
229-
abouts = inventory_filter(abouts, filter)
230-
231219
# Do not write the output if one of the ABOUT files has duplicated keys
232220
# TODO: why do this check here?? Also if this is the place, we should list what the errors are.
233221
dup_error_msg = u'Duplicated keys'
@@ -447,7 +435,6 @@ def attrib(location, output, template, vartext,
447435
template_loc=template,
448436
variables=vartext,
449437
mapping_file=mapping_file,
450-
inventory_location=inventory,
451438
)
452439
errors.extend(attrib_errors)
453440

src/attributecode/transform.py

Lines changed: 7 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,6 @@ def transform_data(rows, transformer):
9292
if errors:
9393
return column_names, data, errors
9494

95-
if transformer.row_filters:
96-
data = list(transformer.filter_rows(data))
97-
9895
return column_names, data, errors
9996

10097

@@ -143,18 +140,6 @@ def transform_data(rows, transformer):
143140
column_filters:
144141
- name
145142
- version
146-
147-
* row_filters:
148-
An optional list of mappings of <column name>: <value> that a source CSV row
149-
should match to be added to the transformed target CSV. If any column value of a
150-
row matches any such filter it is kept. Otherwise it is skipped. Filters are
151-
applied last after all renamings, checks and tranforms and can therefore onlu
152-
use remaining column names.
153-
154-
For instance with this configuration the target CSV will only contain rows that
155-
have a "path" equal to "/root/user/lib":
156-
row_filters:
157-
path : /root/user/lib
158143
'''
159144

160145

@@ -165,15 +150,20 @@ class Transformer(object):
165150
column_renamings = attr.attrib(default=attr.Factory(dict))
166151
required_columns = attr.attrib(default=attr.Factory(list))
167152
column_filters = attr.attrib(default=attr.Factory(list))
168-
row_filters = attr.attrib(default=attr.Factory(list))
169153

170-
# TODO: populate these!
171154
# a list of all the standard columns from AboutCode toolkit
172155
standard_columns = attr.attrib(default=attr.Factory(list), init=False)
173156
# a list of the subset of standard columns that are essential and MUST be
174157
# present for AboutCode toolkit to work
175158
essential_columns = attr.attrib(default=attr.Factory(list), init=False)
176159

160+
# called by attr after the __init__()
161+
def __attrs_post_init__(self, *args, **kwargs):
162+
from attributecode.model import About
163+
about = About()
164+
self.essential_columns = list(about.required_fields)
165+
self.standard_columns = [f.name for f in about.all_fields()]
166+
177167
@classmethod
178168
def default(cls):
179169
"""
@@ -183,7 +173,6 @@ def default(cls):
183173
column_renamings={},
184174
required_columns=[],
185175
column_filters=[],
186-
row_filters=[],
187176
)
188177

189178
@classmethod
@@ -198,7 +187,6 @@ def from_file(cls, location):
198187
column_renamings=data.get('column_renamings', {}),
199188
required_columns=data.get('required_columns', []),
200189
column_filters=data.get('column_filters', []),
201-
row_filters=data.get('row_filters', []),
202190
)
203191

204192
def check_required_columns(self, data):
@@ -257,20 +245,6 @@ def filter_columns(self, data):
257245
items = ((k, v) for k, v in entry.items() if k in column_filters)
258246
yield OrderedDict(items)
259247

260-
def filter_rows(self, data):
261-
"""
262-
Yield a filtered list of mappings from a `data` list of mappings keeping
263-
only items that match any one of the `row_filters` of this Transformer.
264-
Return the data unchanged if no `row_filters` is avilable in this
265-
Transformer.
266-
"""
267-
filters = self.row_filters
268-
for entry in data:
269-
for filt in filters:
270-
for filtered_column_name, filtered_column_value in filt.items():
271-
if entry.get(filtered_column_name) == filtered_column_value:
272-
yield entry
273-
274248

275249
def check_duplicate_columns(column_names):
276250
"""

src/attributecode/util.py

Lines changed: 0 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -348,21 +348,6 @@ def format_output(about_data, mapping_file=None):
348348
order_dict[other_key] = about_data[other_key]
349349
return order_dict
350350

351-
# FIXME: why is this used for
352-
def get_about_file_path(location, mapping_file=None):
353-
"""
354-
Read file at location, return a list of about_file_path.
355-
"""
356-
afp_list = []
357-
if location.endswith('.csv'):
358-
about_data = load_csv(location, mapping_file=mapping_file)
359-
else:
360-
about_data = load_json(location)
361-
362-
for about in about_data:
363-
afp_list.append(about['about_file_path'])
364-
return afp_list
365-
366351

367352
def load_csv(location, mapping_file=None):
368353
"""
@@ -567,34 +552,6 @@ def copy_license_notice_files(fields, base_dir, reference_dir, afp):
567552
print('Cannot copy file at %(from_lic_path)r.' % locals())
568553

569554

570-
# FIXME: this is NOT a util but something to move with inventories or a method
571-
# from About objects
572-
def inventory_filter(abouts, filters):
573-
"""
574-
Return a list of filtered About objects from an `abouts` list of About
575-
object using the `filters` mapping of:
576-
{field_name: [acceptable_values, ....]}
577-
578-
... such that only the About object that have a field_name with a value that
579-
matches one of the acceptable values is returned. Other About object are
580-
filtered out.
581-
"""
582-
matching_abouts = []
583-
for about in abouts:
584-
for field_name, acceptable_values in filters.items():
585-
# Check if the about object has the filtered attribute and if the
586-
# attributed value is the same as the defined in the filter
587-
actual_value = getattr(about, field_name, None)
588-
if actual_value in acceptable_values and not about in matching_abouts:
589-
matching_abouts.append(about)
590-
# FIXME: if it matches once it matches always which is probably not right
591-
break
592-
593-
return matching_abouts
594-
595-
596-
597-
598555
# FIXME: we should use a license object instead
599556
def ungroup_licenses(licenses):
600557
"""

tests/test_util.py

Lines changed: 0 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -397,13 +397,6 @@ def test_load_csv_with_mapping(self):
397397
result = util.load_csv(test_file, mapping_file=DEFAULT_MAPPING)
398398
assert expected == result
399399

400-
def test_get_about_file_path_from_csv_using_mapping(self):
401-
test_file = get_test_loc('test_util/csv/about.csv')
402-
expected = ['about.ABOUT']
403-
result = util.get_about_file_path(
404-
test_file, mapping_file=DEFAULT_MAPPING)
405-
assert expected == result
406-
407400
def test_load_csv_does_convert_column_names_to_lowercase(self):
408401
test_file = get_test_loc('test_util/csv/about_key_with_upper_case.csv')
409402
expected = [OrderedDict(
@@ -474,13 +467,6 @@ def test_load_non_list_json(self):
474467
result = util.load_json(test_file)
475468
assert expected == result
476469

477-
# FIXME: mappings are a CSV-only feature!!!!!
478-
def test_get_about_file_path_from_json_using_mapping(self):
479-
test_file = get_test_loc('test_util/json/expected.json')
480-
expected = ['/load/this.ABOUT']
481-
result = util.get_about_file_path(test_file, mapping_file=DEFAULT_MAPPING)
482-
assert expected == result
483-
484470
def test_load_non_list_json2(self):
485471
test_file = get_test_loc('test_util/json/not_a_list.json')
486472
expected = [OrderedDict([
@@ -632,17 +618,6 @@ def test_load_yaml_about_file_with_multiline(self):
632618
# notes: exceptio is rasied only for the first dupe
633619
assert 'Duplicate key in YAML source: owner' == str(e)
634620

635-
def test_inventory_filter(self):
636-
test_loc = get_test_loc('test_util/inventory_filter')
637-
_errors, abouts = model.collect_inventory(test_loc)
638-
639-
filter_dict = {'name': ['simple']}
640-
# The test loc has 2 .about files, only the simple.about is taken after
641-
# the filtering
642-
updated_abouts = util.inventory_filter(abouts, filter_dict)
643-
for about in updated_abouts:
644-
assert about.name.value == 'simple'
645-
646621
def test_ungroup_licenses(self):
647622
about = [
648623
OrderedDict([

tests/testdata/test_cmd/help/about_inventory_help.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,6 @@ Usage: about inventory [OPTIONS] LOCATION OUTPUT
77
OUTPUT: Path to the JSON or CSV inventory file to create.
88

99
Options:
10-
--filter <key>=<value> Filter the inventory to ABOUT matching these
11-
key=value e.g. "license_expression=gpl-2.0
1210
-f, --format [json|csv] Set OUTPUT inventory file format. [default: csv]
1311
--mapping Use the default built-in "mapping.config" file with
1412
mapping between input keys and .ABOUT field

tests/testdata/test_cmd/help/about_transform_config_help.txt

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,3 @@ and "version" columns and no other column:
4444
- name
4545
- version
4646

47-
* row_filters:
48-
An optional list of mappings of <column name>: <value> that a source CSV row
49-
should match to be added to the transformed target CSV. If any column value of a
50-
row matches any such filter it is kept. Otherwise it is skipped. Filters are
51-
applied last after all renamings, checks and tranforms and can therefore onlu
52-
use remaining column names.
53-
54-
For instance with this configuration the target CSV will only contain rows that
55-
have a "path" equal to "/root/user/lib":
56-
row_filters:
57-
path : /root/user/lib
58-

0 commit comments

Comments
 (0)