CogStack
diff --git a/‎medcat-v1/.dockerignore‎
Lines changed: 5 additions & 0 deletions b/‎medcat-v1/.dockerignore‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎medcat-v1/.flake8‎
Lines changed: 32 additions & 0 deletions b/‎medcat-v1/.flake8‎
Lines changed: 32 additions & 0 deletions
diff --git a/‎medcat-v1/.github/workflows/codeql.yml‎
Lines changed: 95 additions & 0 deletions b/‎medcat-v1/.github/workflows/codeql.yml‎
Lines changed: 95 additions & 0 deletions
diff --git a/‎medcat-v1/.github/workflows/main.yml‎
Lines changed: 123 additions & 0 deletions b/‎medcat-v1/.github/workflows/main.yml‎
Lines changed: 123 additions & 0 deletions
diff --git a/‎medcat-v1/.github/workflows/production.yml‎
Lines changed: 59 additions & 0 deletions b/‎medcat-v1/.github/workflows/production.yml‎
Lines changed: 59 additions & 0 deletions
diff --git a/‎medcat-v1/.gitignore‎
Lines changed: 58 additions & 0 deletions b/‎medcat-v1/.gitignore‎
Lines changed: 58 additions & 0 deletions
diff --git a/‎medcat-v1/.readthedocs.yaml‎
Lines changed: 19 additions & 0 deletions b/‎medcat-v1/.readthedocs.yaml‎
Lines changed: 19 additions & 0 deletions
@@ -0,0 +1,5 @@
+*.DS_Store
+.git
+.idea
+.dat
+venv
@@ -0,0 +1,32 @@
+[flake8]
+extend-ignore =
+    E124,
+       ; closing bracket does not match visual indentation
+    E127,
+       ; continuation line over-indented for visual indent
+    E128,
+       ; continuation line under-indented for visual indent
+    E221,
+       ; multiple spaces before operator
+    E225,
+       ; missing whitespace around operator
+    E231,
+       ; missing whitespace after ',' and ':'
+    E252,
+       ; missing whitespace around parameter equal
+    E261,
+       ; at least two spaces before inline comment
+    E265,
+       ; block comment should start with '# '
+    E272,
+       ; multiple spaces before keyword
+    E303,
+       ; too many blank lines
+    E501,
+       ; line too long
+    W291,
+       ; trailing whitespace
+    W605,
+       ; invalid escape sequence
+
+per-file-ignores = __init__.py:F401
@@ -0,0 +1,95 @@
+# For most projects, this workflow file will not need changing; you simply need
+# to commit it to your repository.
+#
+# You may wish to alter this file to override the set of languages analyzed,
+# or to provide custom queries or build logic.
+#
+# ******** NOTE ********
+# We have attempted to detect the languages in your repository. Please check
+# the `language` matrix defined below to confirm you have the correct set of
+# supported CodeQL languages.
+#
+name: "CodeQL"
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+  schedule:
+    - cron: '36 14 * * 0'
+
+jobs:
+  analyze:
+    name: Analyze (${{ matrix.language }})
+    # Runner size impacts CodeQL analysis time. To learn more, please see:
+    #   - https://gh.io/recommended-hardware-resources-for-running-codeql
+    #   - https://gh.io/supported-runners-and-hardware-resources
+    #   - https://gh.io/using-larger-runners (GitHub.com only)
+    # Consider using larger runners or machines with greater resources for possible analysis time improvements.
+    runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
+    timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
+    permissions:
+      # required for all workflows
+      security-events: write
+
+      # required to fetch internal or private CodeQL packs
+      packages: read
+
+      # only required for workflows in private repositories
+      actions: read
+      contents: read
+
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+        - language: javascript-typescript
+          build-mode: none
+        - language: python
+          build-mode: none
+        # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift'
+        # Use `c-cpp` to analyze code written in C, C++ or both
+        # Use 'java-kotlin' to analyze code written in Java, Kotlin or both
+        # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
+        # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
+        # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
+        # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
+        # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    # Initializes the CodeQL tools for scanning.
+    - name: Initialize CodeQL
+      uses: github/codeql-action/init@v3
+      with:
+        languages: ${{ matrix.language }}
+        build-mode: ${{ matrix.build-mode }}
+        # If you wish to specify custom queries, you can do so here or in a config file.
+        # By default, queries listed here will override any specified in a config file.
+        # Prefix the list here with "+" to use these queries and those in the config file.
+
+        # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
+        # queries: security-extended,security-and-quality
+
+    # If the analyze step fails for one of the languages you are analyzing with
+    # "We were unable to automatically build your code", modify the matrix above
+    # to set the build mode to "manual" for that language. Then modify this step
+    # to build your code.
+    # ℹ️ Command-line programs to run using the OS shell.
+    # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
+    - if: matrix.build-mode == 'manual'
+      shell: bash
+      run: |
+        echo 'If you are using a "manual" build mode for one or more of the' \
+          'languages you are analyzing, replace this with the commands to build' \
+          'your code, for example:'
+        echo '  make bootstrap'
+        echo '  make release'
+        exit 1
+
+    - name: Perform CodeQL Analysis
+      uses: github/codeql-action/analyze@v3
+      with:
+        category: "/language:${{matrix.language}}"
@@ -0,0 +1,123 @@
+name: build
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  build:
+
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: [ '3.9', '3.10', '3.11', '3.12' ]
+      max-parallel: 4
+
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements-dev.txt
+      - name: Check types
+        run: |
+          python -m mypy --follow-imports=normal medcat
+      - name: Lint
+        run: |
+          flake8 medcat
+      - name: Pydantic 1 check
+        # NOTE: the following will look for use of pydantic1-specific .dict() method and .__fields__ attribute
+        #       if there are some (that are not annotated for pydantic1 backwards compatibility) a non-zero exit
+        #       code is returned, which will hald the workflow and print out the offending parts
+        run: |
+          grep "\.__fields__" medcat -rI | grep -v "# 4pydantic1 - backwards compatibility" | tee /dev/stderr | test $(wc -l) -eq 0
+          grep "\.dict(" medcat -rI | grep -v "# 4pydantic1 - backwards compatibility" | tee /dev/stderr | test $(wc -l) -eq 0
+      - name: Test
+        run: |
+          all_files=$(git ls-files | grep '^tests/.*\.py$' | grep -v '/__init__\.py$' | sed 's/\.py$//' | sed 's/\//./g')
+          num_files=$(echo "$all_files" | wc -l)
+          midpoint=$((num_files / 2))
+          first_half_nl=$(echo "$all_files" | head -n $midpoint)
+          second_half_nl=$(echo "$all_files" | tail -n +$(($midpoint + 1)))
+          timeout 25m python -m unittest ${first_half_nl[@]}
+          timeout 25m python -m unittest ${second_half_nl[@]}
+      - name: Regression
+        run: source tests/resources/regression/run_regression.sh
+      - name: Model backwards compatibility
+        run: source tests/resources/model_compatibility/check_backwards_compatibility.sh
+      - name: Get the latest release version
+        id: get_latest_release
+        uses: actions/github-script@v6
+        with:
+          script: |
+            const latestRelease = await github.rest.repos.getLatestRelease({
+              owner: context.repo.owner,
+              repo: context.repo.repo
+            });
+            core.setOutput('latest_version', latestRelease.data.tag_name);
+      - name: Make sure there's no deprecated methods that should be removed.
+        # only run this for main -> production PR. I.e just before doing a release.
+        if: github.event.pull_request.base.ref == 'main' && github.event.pull_request.head.ref == 'production'
+        env:
+          VERSION: ${{ steps.get_latest_release.outputs.latest_version }}
+        run: |
+          python tests/check_deprecations.py "$VERSION" --next-version --remove-prefix
+
+  publish-to-test-pypi:
+
+    if: |
+      github.repository == 'CogStack/MedCAT' &&
+      github.ref == 'refs/heads/main' &&
+      github.event_name == 'push' &&
+      startsWith(github.ref, 'refs/tags') != true
+    runs-on: ubuntu-24.04
+    timeout-minutes: 45
+    concurrency: publish-to-test-pypi
+    needs: [build]
+
+    steps:
+      - name: Checkout main
+        uses: actions/checkout@v4
+        with:
+          ref: 'main'
+          fetch-depth: 0
+
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.9
+
+      - name: Install pypa/build
+        run: >-
+          python -m
+          pip install
+          build
+          --user
+
+      - name: Configure the version
+        run: >-
+          sed --in-place
+          "s/node-and-date/no-local-version/g"
+          setup.py
+
+      - name: Build a binary wheel and a source tarball
+        run: >-
+          python -m
+          build
+          --sdist
+          --wheel
+          --outdir dist/
+          .
+
+      - name: Publish dev distribution to Test PyPI
+        uses: pypa/[email protected]
+        with:
+          password: ${{ secrets.TEST_PYPI_API_TOKEN }}
+          repository_url: https://test.pypi.org/legacy/
+        continue-on-error: true
@@ -0,0 +1,59 @@
+name: production
+
+on:
+  push:
+    branches: [ production, "v[0-9]+.[0-9]+.post" ]
+  release:
+    types: [ published , edited ]
+
+jobs:
+  build-n-publish-to-pypi:
+    runs-on: ubuntu-24.04
+    concurrency: build-n-publish-to-pypi
+    if: github.repository == 'CogStack/MedCAT'
+
+    steps:
+      - name: Checkout production
+        uses: actions/checkout@v4
+        with:
+          ref: ${{ github.event.release.target_commitish }}
+          fetch-depth: 0
+
+      - name: Set up Python 3.9
+        uses: actions/setup-python@v4
+        with:
+          python-version: 3.9
+
+      - name: Run UATs
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements-dev.txt
+          all_files=$(git ls-files | grep '^tests/.*\.py$' | grep -v '/__init__\.py$' | sed 's/\.py$//' | sed 's/\//./g')
+          num_files=$(echo "$all_files" | wc -l)
+          midpoint=$((num_files / 2))
+          first_half_nl=$(echo "$all_files" | head -n $midpoint)
+          second_half_nl=$(echo "$all_files" | tail -n +$(($midpoint + 1)))
+          timeout 25m python -m unittest ${first_half_nl[@]}
+          timeout 25m python -m unittest ${second_half_nl[@]}
+
+      - name: Install pypa/build
+        run: >-
+          python -m
+          pip install
+          build
+          --user
+
+      - name: Build a binary wheel and a source tarball
+        run: >-
+          python -m
+          build
+          --sdist
+          --wheel
+          --outdir dist/
+          .
+
+      - name: Publish production distribution to PyPI
+        if: startsWith(github.ref, 'refs/tags') && ! github.event.release.prerelease
+        uses: pypa/[email protected]
+        with:
+          password: ${{ secrets.PYPI_API_TOKEN }}
@@ -0,0 +1,58 @@
+#Directories to be ignored fully
+/books/
+/articles/
+/other/
+/output/
+/graphics/
+/models/*.dat
+/notebooks/wandb/
+/notebooks/logs/
+/notebooks/results/
+dist/
+tmp/
+*_tmp/
+medcat.egg-info/
+build/
+.idea
+venv
+db.sqlite3
+.ipynb_checkpoints
+
+# vscode 
+.vscode
+
+#tmp and similar files
+.nfs*
+*.log
+*.pyc
+*.out
+*.swp
+*.swn
+tmp_*
+t_*
+tmp_*
+*_tmp
+*.swo
+*.lyx.emergency
+*.lyx#
+*~
+*hidden*
+nohup.out
+tmp.py
+.DS_Store
+*.lock
+*.egg*
+
+# models files
+*.dat
+!examples/*.dat
+./checkpoints/
+
+# Test output
+tests/model_creator/output/*
+
+# docs outputs
+docs/auto/
+docs/_build
+
+models/
@@ -0,0 +1,19 @@
+# .readthedocs.yaml
+# Read the Docs configuration file
+# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
+
+version: 2
+
+build:
+  os: ubuntu-20.04
+  tools:
+    python: "3.10"
+
+sphinx:
+  configuration: docs/conf.py
+
+python:
+  install:
+    - requirements: docs/requirements.txt
+    - method: pip
+      path: .
-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +*.DS_Store
 +.git
 +.idea
 +.dat
 +venv