@@ -102,7 +102,6 @@ def test_numbered_duplicates_empty_list():
102102 result = _numbered_duplicates ([])
103103 assert result == []
104104
105-
106105 def test_json_to_dict_empty_input ():
107106 result = _json_to_dict ({})
108107 assert result == {}
@@ -114,7 +113,6 @@ def sample_dict():
114113 return {
115114 "url" : "https://example.com/" ,
116115 "errors" : [],
117- "jsonld_errors" : [],
118116 "title" : "Example Domain" ,
119117 "h1" : "Example Domain" ,
120118 "h2" : "Sub‑heading" ,
@@ -138,11 +136,11 @@ def test_filterdict_returns_original_when_no_filters(sample_dict):
138136@pytest .mark .parametrize (
139137 "patterns, expected_keys" ,
140138 [
141- (["^title$" ], {"title" , "url" , "errors" , "jsonld_errors" }),
142- (["^h\\ d$" ], {"h1" , "h2" , "url" , "errors" , "jsonld_errors" }),
139+ (["^title$" ], {"title" , "url" , "errors" }),
140+ (["^h\\ d$" ], {"h1" , "h2" , "url" , "errors" }),
143141 (
144142 ["(title|meta_description)" ],
145- {"title" , "meta_description" , "url" , "errors" , "jsonld_errors" },
143+ {"title" , "meta_description" , "url" , "errors" },
146144 ),
147145 ],
148146)
@@ -162,7 +160,7 @@ def test_filterdict_keep_only(sample_dict, patterns, expected_keys):
162160def test_filterdict_discard_only (sample_dict , patterns , forbidden_keys ):
163161 result = _filter_crawl_dict (sample_dict , discard_columns = patterns )
164162 assert not (set (result ) & set (forbidden_keys ))
165- for k in {"url" , "errors" , "jsonld_errors" }:
163+ for k in {"url" , "errors" }:
166164 assert k in result
167165
168166
@@ -172,14 +170,14 @@ def test_filterdict_discard_overrides_keep(sample_dict):
172170 result = _filter_crawl_dict (sample_dict , keep , discard )
173171 assert "h1" in result
174172 assert "h2" not in result
175- for k in {"url" , "errors" , "jsonld_errors" }:
173+ for k in {"url" , "errors" }:
176174 assert k in result
177175
178176
179- @pytest .mark .parametrize ("pattern" , [r"url" , r"errors" , r"jsonld" ])
177+ @pytest .mark .parametrize ("pattern" , [r"url" , r"errors" ])
180178def test_filterdict_always_include_never_dropped (sample_dict , pattern ):
181179 result = _filter_crawl_dict (sample_dict , discard_columns = [pattern ])
182- for k in {"url" , "errors" , "jsonld_errors" }:
180+ for k in {"url" , "errors" }:
183181 assert k in result
184182
185183
@@ -190,3 +188,34 @@ def test_filterdict_empty_input_dict_returns_empty():
190188def test_filterdict_invalid_regex_raises (sample_dict ):
191189 with pytest .raises (re .error ):
192190 _filter_crawl_dict (sample_dict , keep_columns = ["[" ]) # invalid pattern
191+
192+
193+ def test_filterdict_jsonld_errors_can_be_filtered ():
194+ """Test that jsonld_errors can now be filtered out since it's not always included."""
195+ sample_with_jsonld = {
196+ "url" : "https://example.com/" ,
197+ "errors" : [],
198+ "title" : "Example Domain" ,
199+ "jsonld_errors" : ["Some JSON-LD error" ],
200+ "custom_key" : "value" ,
201+ }
202+
203+ result = _filter_crawl_dict (sample_with_jsonld , discard_columns = ["jsonld_errors" ])
204+ assert "jsonld_errors" not in result
205+ assert "url" in result # always included
206+ assert "errors" in result # always included
207+
208+ result2 = _filter_crawl_dict (sample_with_jsonld , keep_columns = ["jsonld_errors" ])
209+ assert set (result2 .keys ()) == {"url" , "errors" , "jsonld_errors" }
210+
211+
212+ def test_filterdict_jsonld_errors_not_always_present ():
213+ """Test that dictionaries without jsonld_errors work correctly."""
214+ sample_without_jsonld = {
215+ "url" : "https://example.com/" ,
216+ "errors" : [],
217+ "title" : "Example Domain" ,
218+ }
219+
220+ result = _filter_crawl_dict (sample_without_jsonld , keep_columns = ["title" ])
221+ assert set (result .keys ()) == {"url" , "errors" , "title" }
0 commit comments