sciknoworg · HamedBabaei · Sep 21, 2025 · Aug 22, 2025 · Aug 22, 2025 · Aug 22, 2025
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -1,4 +1,7 @@
 name: Publish Python Package
+permissions:
+  contents: write
+  pull-requests: write
 
 on:
   push:
@@ -13,6 +16,8 @@ jobs:
     steps:
       - name: Checkout code
         uses: actions/checkout@v4
+        with:
+          persist-credentials: false  # important to use PAT for pushing
 
       - name: Set up Python
         uses: actions/setup-python@v4
@@ -24,31 +29,53 @@ jobs:
           curl -sSL https://install.python-poetry.org | python3 -
           echo "export PATH=\"$HOME/.local/bin:$PATH\"" >> $GITHUB_ENV
 
+      - name: Install poetry-dynamic-versioning plugin
+        run: poetry self add "poetry-dynamic-versioning[plugin]"
+
       - name: Install dependencies
         run: poetry install --no-interaction --no-ansi
 
       - name: Build the package
         run: poetry build
 
-      - name: Configure Poetry for PyPI
+      # Generate metadata after publishing
+      - name: Generate Dublin Core metadata
         run: |
-          poetry config pypi-token.pypi ${{ secrets.TWINE_API_TOKEN }}
+          mkdir -p metadata
+          poetry run python -c "from ontolearner import OntoLearnerMetadataExporter; OntoLearnerMetadataExporter().export('metadata/ontolearner-metadata.rdf')"
 
-      - name: Publish to PyPI
+      - name: Create and update Pull Request
+        id: cpr
+        uses: peter-evans/create-pull-request@v7
+        with:
+          token: ${{ secrets.REPO_PUSH_TOKEN }}
+          branch: auto-update
+          base: main
+          commit-message: ":bookmark: Update metadata after release"
+          title: "🤖 Automated metadata update"
+          body: "This PR updates the Dublin Core metadata after release."
+          add-paths: |
+            metadata/ontolearner-metadata.rdf
+
+      # Automatically merge the PR if possible
+      - name: Auto-merge PR
+        if: steps.cpr.outputs.pull-request-operation == 'created'
+        uses: peter-evans/enable-pull-request-automerge@v3
+        with:
+          token: ${{ secrets.REPO_PUSH_TOKEN }}
+          pull-request-number: ${{ steps.cpr.outputs.pull-request-number }}
+          merge-method: squash
+
+      - name: Delete auto-update branch
+        if: steps.cpr.outputs.pull-request-operation == 'created'
         run: |
-          poetry publish --no-interaction --no-ansi
+          git remote set-url origin https://x-access-token:${{ secrets.REPO_PUSH_TOKEN }}@github.com/${{ github.repository }}
+          git push origin --delete auto-update
 
-      # 🔹 NEW STEP: Generate metadata after publishing
-      - name: Generate Dublin Core metadata
+      - name: Configure Poetry for PyPI
         run: |
-          mkdir -p metadata
-          poetry run python -c "from ontolearner import OntoLearnerMetadataExporter; OntoLearnerMetadataExporter().export('metadata/ontolearner-metadata.rdf')"
+          poetry config pypi-token.pypi ${{ secrets.TWINE_API_TOKEN }}
 
-      # 🔹 Commit metadata back to repo
-      - name: Commit and push metadata
+      - name: Publish to PyPI
         run: |
-          git config --global user.name "github-actions[bot]"
-          git config --global user.email "github-actions[bot]@users.noreply.github.com"
-          git add metadata/
-          git commit -m ":bookmark: Update metadata after release"
-          git push origin HEAD:main
+          poetry publish --no-interaction --no-ansi
diff --git a/.github/workflows/test-package.yml b/.github/workflows/test-package.yml
@@ -2,12 +2,17 @@ name: Test OntoLearner Package
 
 on:
   push:
-    branches: [main]
+    branches:
+      - main
+      - '!auto-update'
   pull_request:
-    branches: [main]
+    branches:
+      - main
 
 jobs:
   build-and-test:
+    if: github.head_ref != 'auto-update'
+
     runs-on: ubuntu-latest
 
     strategy:
@@ -28,6 +33,10 @@ jobs:
           curl -sSL https://install.python-poetry.org | python3 -
           echo "$HOME/.local/bin" >> $GITHUB_PATH
 
+      - name: Install poetry-dynamic-versioning plugin
+        run: |
+          poetry self add "poetry-dynamic-versioning[plugin]"
+
       - name: Configure Poetry and install dependencies
         run: |
           poetry config virtualenvs.create false

diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,5 +1,29 @@
 ## Changelog
 
+### v1.4.5 (September 16, 2025)
+- add batch retriever feature to `AutoRetrieverLearner`
+
+
+### v1.4.4 (September 9, 2025)
+- add `trust_remote_code=True` for retrievers like Nomic-AI
+
+### v1.4.3 (September 7, 2025)
+- Update dependencies
+- fix bug in learner
+- cosmetic fix to the docs
+
+### v1.4.2 (September 1, 2025)
+- fix dependency issue for torch and transformers.
+- update maintenance plan
+
+### v1.4.1 (August 22, 2025)
+- added ontolearner-metadata CI/CD based build.
+
+### v1.4.0 (August 22, 2025)
+- added dublin core metadata exporter
+- added ontolearner metadata documentation
+- added `VERSION` file for versioning
+
 ### v1.3.1 (August 13, 2025)
 - `Processor` module is operational. Fixed with ease of use principles.
 - The huggingface readme files template are updated.

diff --git a/CITATION.cff b/CITATION.cff
@@ -31,5 +31,5 @@ keywords:
   - Large Language Models
   - Text-to-ontology
 license: MIT
-version: 1.3.1
+version: 1.4.5
 date-released: '2025'
diff --git a/MAINTENANCE.md b/MAINTENANCE.md
@@ -25,16 +25,17 @@ A core team will be responsible for the ongoing maintenance of OntoLearner, incl
 
 A roadmap for new features and improvements, ensuring the library evolves in response to user needs and feedback is presented as follows. This list will be updated regularly as we explore the variety of works within the ontology alignment field to ensure the diverse methods within the library.
 
-| Category   | Description                                                                                                                                                                                                 |                          Status                           |
-|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------:|
-|Ontologizer| Adding more ontologies to the OntoLearner                                                                                                                                                                   | InProgress|
-| Reasoning  | Integration of reasoning-oriented prompt evaluation tasks to test LLM capabilities in generating consistent and logically valid ontological structures (e.g., subclass chains, disjointness, transitivity). |                          TODO                             |
-| Agentic    | Support for agent-based extensions using platforms like [CrewAI](https://github.com/crewAIInc/crewAI) to enable autonomous, multi-step ontology engineering workflows coordinated through modular agents.   |                          TODO                             |
-|Documentation| Adding more documentation and tutorials                                                                                                                                                                     | InProgress|
-|Testing| Adding unittest to support different stages of modularization                                                                                                                                               | InProgress|
-|Learner| Incorporating more learner models. Including those from LLMs4OL challenge                                                                                                                                   | InProgress|
-|Reasoning| Adding reasoning techniques                                                                                                                                                                                 | To-Do|
-|...| ...                                                                                                                                                                                                         |...|
+| Category        | Description                                                                                                                                                                                                 | Status       |
+|-----------------|-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|--------------|
+| Ontologizer     | Adding more ontologies to the OntoLearner                                                                                                                                                                   | In Progress  |
+| Reasoning       | Integration of reasoning-oriented prompt evaluation tasks to test LLM capabilities in generating consistent and logically valid ontological structures (e.g., subclass chains, disjointness, transitivity). | TODO         |
+| Ontology Search | Enabling search across relations, individuals, and axioms for enhanced exploration and debugging of ontologies.                                                                                             | TODO         |
+| Agentic         | Support for agent-based extensions using platforms like [CrewAI](https://github.com/crewAIInc/crewAI) to enable autonomous, multi-step ontology engineering workflows coordinated through modular agents.   | TODO         |
+| Documentation   | Adding more documentation and tutorials                                                                                                                                                                     | In Progress  |
+| Testing         | Adding unittest to support different stages of modularization                                                                                                                                               | In Progress  |
+| Learner         | Incorporating more learner models, including those from the LLMs4OL 2024 challenge (to be put into action) and 2025 challenge (to be integrated).                                                           | In Progress  |
+| UI / Visualization | Developing user interfaces for interactive exploration and visualization of ontologies.                                                                                                                 | TODO         |
+| ...| ....|...|
 
 > **If you are willing to have your Ontology Learning model or feature within OntoLearner don't hesitate to contact us via [GitHub Issues](https://github.com/sciknoworg/ontolearner/issues) or via email to [[email protected]](mailto:[email protected])**.
 

diff --git a/docs/source/learners/rag.rst b/docs/source/learners/rag.rst
@@ -32,7 +32,7 @@ We start by importing necessary components from the ontolearner package, loading
     # Load the AgrO ontology (an agricultural domain ontology)
     ontology = AgrO()
     ontology.load()
-    ontological_data = ontology.extract(),
+    ontological_data = ontology.extract()
 
     # Extract structured data from the ontology and split into train/test sets
     train_data, test_data = train_test_split(
@@ -111,7 +111,7 @@ You initialize the ``LearnerPipeline`` by directly providing the ``retriever_id`
     # Load the AgrO ontology, which contains concepts related to wines, their properties, and categories
     ontology = AgrO()
     ontology.load()  # Load entities, types, and structured term annotations from the ontology
-    ontological_data = ontology.extract(),
+    ontological_data = ontology.extract()
     # Extract term-typing instances and split into train and test sets
     train_data, test_data = train_test_split(
         ontological_data,

diff --git a/docs/source/learners/retrieval.rst b/docs/source/learners/retrieval.rst
@@ -70,6 +70,14 @@ You will see a evaluations results.
     * T5 models (e.g., "google/flan-t5-base")
     * Nomic-AI models
 
+When working with large contexts, the retriever model may encounter memory issues. To address this, OntoLearner’s ``AutoRetrieverLearner`` provides a ``batch_size`` argument. By setting this, the retriever computes similarities in smaller batches instead of calculating the full cosine similarity across all stored knowledge embeddings at once, reducing memory usage and improving efficiency. To use this, simply:
+
+.. code-block:: python
+
+    ret_learner = AutoRetrieverLearner(top_k=5, batch_size=1024)
+
+
+
 Pipeline Usage
 -----------------------
 

diff --git a/docs/source/ontologizer/metadata.rst b/docs/source/ontologizer/metadata.rst
@@ -3,7 +3,7 @@ Metadata
 
 .. note::
 
-	OntoLearner Metadata will be created automatically at Github under `metadata/ <https://github.com/sciknoworg/OntoLearner/tree/main/metadata>`_ directory, and it is available for download after ``ontolearner > 1.3.1`` also at `Releases <https://github.com/sciknoworg/OntoLearner/releases>`_ per release.
+	OntoLearner Metadata will be created automatically at Github under `metadata/ <https://github.com/sciknoworg/OntoLearner/tree/main/metadata>`_ directory, and it is available for download after ``ontolearner > 1.4.0`` also at `Releases <https://github.com/sciknoworg/OntoLearner/releases>`_ per release.
 
 .. hint::
 
@@ -31,7 +31,7 @@ The ``OntoLearnerMetadataExporter`` is a utility class for generating **Dublin C
       xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
 
       <!-- Top-level collection -->
-      <ontologizer:Collection rdf:about="https://ontolearner.readthedocs.io/benchmarking/">
+      <ontologizer:Collection rdf:about="https://ontolearner.readthedocs.io/benchmarking/benchmark.html">
         <dc:title>OntoLearner Benchmark Ontologies</dc:title>
         <dc:description>This Dublin Core metadata collection describes ontologies benchmarked in OntoLearner. It includes information such as title, creator, format, license, and version.</dc:description>
         <dc:creator>OntoLearner Team</dc:creator>
@@ -74,7 +74,7 @@ The following table summarizes the key **Dublin Core metadata properties** captu
      - NCI Thesaurus (NCIt)
      - Ontology full name
    * - ``dcterms:description``
-     - See above example RDF structure
+     - NCI Thesaurus (NCIt) is a reference terminology that includes broad coverage of the cancer domain...
      - Detailed ontology description
    * - ``dcterms:creator``
      - NCI
@@ -89,7 +89,7 @@ The following table summarizes the key **Dublin Core metadata properties** captu
      - Creative Commons 4.0
      - License information
    * - ``dcterms:source``
-     - URL
+     - `https://terminology.tib.eu/ts/ontologies/NCIT <https://terminology.tib.eu/ts/ontologies/NCIT>`_
      - Download or reference URL
    * - ``dcterms:subject``
      - Medicine
@@ -102,13 +102,13 @@ The following represents the benchmark collection info. The `dcterms:hasVersion`
 
 .. code-block:: xml
 
-	<ontologizer:Collection rdf:about="https://ontolearner.readthedocs.io/benchmarking/">
-        <dc:title>OntoLearner Benchmark Ontologies</dc:title>
-        <dc:description>This Dublin Core metadata collection describes ontologies benchmarked in OntoLearner. It includes information such as title, creator, format, license, and version.</dc:description>
-        <dc:creator>OntoLearner Team</dc:creator>
-        <dcterms:license>MIT License</dcterms:license>
-        <dcterms:hasVersion>1.4.0</dcterms:hasVersion>
-      </ontologizer:Collection>
+	<ontologizer:Collection rdf:about="https://ontolearner.readthedocs.io/benchmarking/benchmark.html">
+		<dc:title>OntoLearner Benchmark Ontologies</dc:title>
+		<dc:description>This Dublin Core metadata collection describes ontologies benchmarked in OntoLearner. It includes information such as title, creator, format, license, and version.</dc:description>
+		<dc:creator>OntoLearner Team</dc:creator>
+		<dcterms:license>MIT License</dcterms:license>
+		<dcterms:hasVersion>1.4.0</dcterms:hasVersion>
+	</ontologizer:Collection>
 
 Exporter
 --------------------

diff --git a/examples/retriever_learner.py b/examples/retriever_learner.py
@@ -9,6 +9,7 @@
 train_data, test_data = train_test_split(ontology.extract(), test_size=0.2, random_state=42)
 
 # Initialize a retriever-style learner for relation extraction tasks
+# batch_size is being used inside the AutoRetrieverLearner to allow for larger KB retrieval!
 ret_learner = AutoRetrieverLearner(top_k=5)
 
 # Load a pre-trained retriever model using its identifier