Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/action-test-before-PR.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,4 +31,4 @@ jobs:
run: poetry run somef configure -a

- name: Run pytest
run: poetry run pytest -v src/somef/test
run: poetry run pytest -v -s src/somef/test
5 changes: 5 additions & 0 deletions src/somef/header_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,11 @@ def extract_header_content(text: str) -> Tuple[pd.DataFrame, str | None]:

content, none_header_content = mardown_parser.extract_content_per_header(text, headers)
parents = mardown_parser.extract_headers_parents(text)

min_len = min(len(header_list), len(content))
header_list = header_list[:min_len]
content = content[:min_len]

df = pd.DataFrame({
'Header': header_list,
'Content': content,
Expand Down
1 change: 1 addition & 0 deletions src/somef/somef_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ def cli_get_data(threshold, ignore_classifiers, repo_url=None, doc_src=None, loc
repository_metadata)
logging.info("--> create excerpts")
excerpts = create_excerpts.create_excerpts(string_list)
logging.info("--> extract text excerpts headers")
excerpts_headers = mardown_parser.extract_text_excerpts_header(readme_unfiltered_text)
header_parents = mardown_parser.extract_headers_parents(readme_unfiltered_text)
score_dict = supervised_classification.run_classifiers(excerpts, file_paths)
Expand Down
50 changes: 25 additions & 25 deletions src/somef/test/test_JSON_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -407,36 +407,36 @@ def test_issue_830(self):
# except Exception as e:
# print(f"Failed to delete {cls.json_file}: {e}")

# def test_issue_862(self):
# """Checks if this repository does not gets stuck when labeling headers"""
# somef_cli.run_cli(threshold=0.8,
# ignore_classifiers=False,
# repo_url=None,
# local_repo=test_data_repositories + "componentInstaller",
# doc_src=None,
# in_file=None,
# output=test_data_path + "test_issue_862.json",
# graph_out=None,
# graph_format="turtle",
# codemeta_out=None,
# pretty=True,
# missing=False,
# readme_only=False)
def test_issue_862(self):
"""Checks if this repository does not gets stuck when labeling headers"""
somef_cli.run_cli(threshold=0.8,
ignore_classifiers=False,
repo_url=None,
local_repo=test_data_repositories + "componentInstaller",
doc_src=None,
in_file=None,
output=test_data_path + "test_issue_862.json",
graph_out=None,
graph_format="turtle",
codemeta_out=None,
pretty=True,
missing=False,
readme_only=False)

# text_file = open(test_data_path + "test_issue_862.json", "r")
# data = text_file.read()
# text_file.close()
# json_content = json.loads(data)
text_file = open(test_data_path + "test_issue_862.json", "r")
data = text_file.read()
text_file.close()
json_content = json.loads(data)

# assert "description" in json_content, "Missing 'description' property"
assert "description" in json_content, "Missing 'description' property"

# assert len(json_content["description"]) > 0, "Description list is empty"
assert len(json_content["description"]) > 0, "Description list is empty"

# first_desc = json_content["description"][0]["result"]
# assert "value" in first_desc, "Missing 'value' in description result"
# assert first_desc["value"], "Description 'value' is empty"
first_desc = json_content["description"][0]["result"]
assert "value" in first_desc, "Missing 'value' in description result"
assert first_desc["value"], "Description 'value' is empty"

# os.remove(test_data_path + "test_issue_862.json")
os.remove(test_data_path + "test_issue_862.json")

def test_issue_859(self):
"""Checks whether a repository without content works fine. Must have just some results from the API."""
Expand Down
8 changes: 6 additions & 2 deletions src/somef/utils/markdown_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,10 @@ def remove_comments(html_text):
-------
Markdown with no HTML comments
"""
comment_pattern = r'<!--(.*?)-->'
html_without_comments = re.sub(comment_pattern, '', html_text, flags=re.DOTALL)
# comment_pattern = r'<!--(.*?)-->'
# comment_pattern = r'<!--[\s\S]*?--\s*>'
comment_pattern = r'<!--[\s\S]*?--.*?>'

html_without_comments = re.sub(comment_pattern, '', html_text)
print(html_without_comments)
return html_without_comments
Loading