diff --git a/.github/workflows/action-test-before-PR.yml b/.github/workflows/action-test-before-PR.yml new file mode 100644 index 00000000..d2ca2786 --- /dev/null +++ b/.github/workflows/action-test-before-PR.yml @@ -0,0 +1,34 @@ +name: Run SOMEF tests before PR + +on: + pull_request: + branches: + - dev #should be this branch or master? + workflow_dispatch: +jobs: + test: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: "3.11" + + - name: Install Poetry + run: curl -sSL https://install.python-poetry.org | python3 - + + - name: Install dependencies + run: poetry install + + - name: Download NLTK data + run: poetry run python -m nltk.downloader wordnet omw-1.4 punkt punkt_tab stopwords + + - name: Configure SOMEF + run: poetry run somef configure -a + + - name: Run pytest + run: poetry run pytest -v src/somef/test diff --git a/docs/pom.md b/docs/pom.md index c9f31631..b03dcda7 100644 --- a/docs/pom.md +++ b/docs/pom.md @@ -110,3 +110,5 @@ package_distribution': [{'result': {'value': 'http://127.0.0.1/websvn/my-project ``` [{'value': 'Java: 1.8', 'name': 'Java', 'version': '1.8'}] ``` + + diff --git a/src/somef/extract_software_type.py b/src/somef/extract_software_type.py index 46577534..43dc695d 100644 --- a/src/somef/extract_software_type.py +++ b/src/somef/extract_software_type.py @@ -1,12 +1,14 @@ import os from pathlib import Path import nbformat +from nbformat.reader import NotJSONError from chardet import detect import re from .extract_workflows import is_file_workflow from .process_results import Result from .utils import constants from .extract_ontologies import is_file_ontology + import pdb @@ -301,14 +303,20 @@ def is_notebook_code(file_path): has_code = False num_code_cells = 0 num_total_cells = 0 - nb = nbformat.read(file_path, as_version=4) - for cell in nb['cells']: - if cell['cell_type'] == 'code': - num_total_cells += 1 - if cell['source'].strip(): - num_code_cells += 1 - has_code = True - return has_code + try: + nb = nbformat.read(file_path, as_version=4) + + for cell in nb['cells']: + if cell['cell_type'] == 'code': + num_total_cells += 1 + if cell['source'].strip(): + num_code_cells += 1 + has_code = True + return has_code + except NotJSONError: + return False + except Exception: + return False def has_code_in_rmd(file_path): diff --git a/src/somef/somef_cli.py b/src/somef/somef_cli.py index 80615220..1ea81329 100644 --- a/src/somef/somef_cli.py +++ b/src/somef/somef_cli.py @@ -71,6 +71,7 @@ def cli_get_data(threshold, ignore_classifiers, repo_url=None, doc_src=None, loc logging.info(f"{servidor} is GitLab.") bGitLab = True + print(f"DEBUG: {servidor} is_gitlab = {bGitLab}") if bGitLab: repo_type = constants.RepositoryType.GITLAB repository_metadata, owner, repo_name, def_branch = process_repository.load_online_repository_metadata( @@ -80,6 +81,13 @@ def cli_get_data(threshold, ignore_classifiers, repo_url=None, doc_src=None, loc repo_type, authorization ) + print("\n=== DEBUG GITLAB SELF-HOSTED ===") + print(f"repo_url: {repo_url}") + print(f"owner: {owner}") + print(f"repo_name: {repo_name}") + print(f"def_branch: {def_branch}") + print(f"repo_type: {repo_type}") + print("=================================\n") # download files and obtain path to download folder if readme_only: