Refs #23808. Database update.

zesk1999 · zesk1999 · commit 3675757a43d9 · 2025-10-23T07:53:22.000+02:00
Signed-off-by: zesk1999 &lt;zesk1999@gmail.com&gt;
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -140,7 +140,7 @@ jobs:
       - name: Download Annoy Index file from Google Drive
         run: |
           pip install gdown
-          gdown --id 1gTXqQtP9JS92gAtPzhKgZpdIHSV_Lcnp -O ${{ github.workspace }}/src/sustainml_lib/sustainml_modules/sustainml_modules/sustainml-wp1/rag/models_index.ann
+          gdown --id 1eR9IKrO0yS9ITl29rFjoWfARBlIYnDLq -O ${{ github.workspace }}/src/sustainml_lib/sustainml_modules/sustainml_modules/sustainml-wp1/rag/models_index.ann
 
       - name: Compile and run tests
         uses: eProsima/eProsima-CI/multiplatform/colcon_build_test@v0
@@ -230,7 +230,7 @@ jobs:
       - name: Download file from Google Drive
         run: |
           pip install gdown
-          gdown --id 1gTXqQtP9JS92gAtPzhKgZpdIHSV_Lcnp -O ${{ github.workspace }}/src/sustainml_lib/sustainml_modules/sustainml_modules/sustainml-wp1/rag/models_index.ann
+          gdown --id 1eR9IKrO0yS9ITl29rFjoWfARBlIYnDLq -O ${{ github.workspace }}/src/sustainml_lib/sustainml_modules/sustainml_modules/sustainml-wp1/rag/models_index.ann
 
       - name: Compile and run tests
         uses: eProsima/eProsima-CI/multiplatform/clang_build_test@v0
diff --git a/sustainml_docs/rst/developer_manual/architecture/backend/module_nodes/mlmodelprovider.rst b/sustainml_docs/rst/developer_manual/architecture/backend/module_nodes/mlmodelprovider.rst
@@ -78,25 +78,25 @@ And inside ``configuration_callback()`` implement the response to the configurat
     # See the License for the specific language governing permissions and
     # limitations under the License.
     """SustainML ML Model Provider Node Implementation."""
-    
+
     from sustainml_py.nodes.MLModelNode import MLModelNode
-    
+
     # Manage signaling
     import os
     import signal
     import threading
     import time
     import json
-    
+
     from rdftool.ModelONNXCodebase import model
     from rdftool.rdfCode import load_graph, get_models_for_problem, get_models_for_problem_and_tag
-    
-    from rag.rag_backend import answer_question
-    
+
+    from rag.rag_backend import answer_question, get_allowed_models_for_problem
+
     # Whether to go on spinning or interrupt
     running = False
-    
-    
+
+
     # Load the list of unsupported
     def load_unsupported_models(file_path):
         try:
@@ -105,19 +105,19 @@ And inside ``configuration_callback()`` implement the response to the configurat
         except Exception as e:
             print(f"[WARN] Could not load unsupported list: {e}")
             return []
-    
-    
+
+
     unsupported_models = load_unsupported_models(os.path.dirname(__file__) + "/unsupported_models.txt")
-    
-    
+
+
     # Signal handler
     def signal_handler(sig, frame):
         print("\nExiting")
         MLModelNode.terminate()
         global running
         running = False
-    
-    
+
+
     # User Callback implementation
     # Inputs: ml_model_metadata, app_requirements, hw_constraints, ml_model_baseline, hw_baseline, carbonfootprint_baseline
     # Outputs: node_status, ml_model
@@ -129,11 +129,11 @@ And inside ``configuration_callback()`` implement the response to the configurat
                       carbonfootprint_baseline,
                       node_status,
                       ml_model):
-    
+
         # Callback implementation here
-    
+
         print(f"Received Task: {ml_model_metadata.task_id().problem_id()},{ml_model_metadata.task_id().iteration_id()}")
-    
+
         try:
             chosen_model = None
             # Model restriction after various outputs
@@ -147,31 +147,32 @@ And inside ``configuration_callback()`` implement the response to the configurat
                 except json.JSONDecodeError:
                     print("[WARN] In model_provider node extra_data JSON is not valid.")
                     extra_data_dict = {}
-    
+
                 if "type" in extra_data_dict:
                     type = extra_data_dict["type"]
-    
+
                 if "model_restrains" in extra_data_dict:
                     restrained_models = extra_data_dict["model_restrains"]
-    
+
                 if "model_selected" in extra_data_dict:
                     chosen_model = extra_data_dict["model_selected"]
                     print("Model already selected: ", chosen_model)
-    
+
                 problem_short_description = extra_data_dict["problem_short_description"]
-    
+
             metadata = ml_model_metadata.ml_model_metadata()[0]
-            
+
             if chosen_model is None:
                 print(f"Problem short description: {problem_short_description}")
-                
-                # Choose model with the RAG based on the goal selected and the knowledge of the graph.
+
+                # Build the whitelist and force the RAG to pick ONLY from it
+                allowed = get_allowed_models_for_problem(metadata)  # Metadata is the goal name
                 chosen_model = answer_question(
-                     f"Task {metadata} with problem description: {problem_short_description}?"
-                 )
-                
+                    f"Task {metadata} with problem description: {problem_short_description}?",
+                    allowed_models=allowed
+                )
             print(f"ML Model chosen: {chosen_model}")
-    
+
             # Generate model code and keywords
             onnx_path = model(chosen_model)     # TODO - Further development needed
             ml_model.model(chosen_model)
@@ -180,7 +181,7 @@ And inside ``configuration_callback()`` implement the response to the configurat
             extra_data = {"unsupported_models": unsupported_models}
             encoded_data = json.dumps(extra_data).encode("utf-8")
             ml_model.extra_data(encoded_data)
-    
+
         except Exception as e:
             print(f"Failed to determine ML model for task {ml_model_metadata.task_id()}: {e}.")
             ml_model.model("Error")
@@ -189,18 +190,18 @@ And inside ``configuration_callback()`` implement the response to the configurat
             error_info = {"error": error_message}
             encoded_error = json.dumps(error_info).encode("utf-8")
             ml_model.extra_data(encoded_error)
-    
-    
+
+
     # User Configuration Callback implementation
     # Inputs: req
     # Outputs: res
     def configuration_callback(req, res):
-    
+
         # Callback for configuration implementation here
         if 'model_from_goal' in req.configuration():
             res.node_id(req.node_id())
             res.transaction_id(req.transaction_id())
-    
+
             try:
                 text = req.configuration()[len("model_from_goal, "):]
                 parts = text.split(',')
@@ -211,24 +212,24 @@ And inside ``configuration_callback()`` implement the response to the configurat
                 else:
                     goal = text.strip()
                     models = get_models_for_problem(goal)
-    
+
                 sorted_models = ', '.join(sorted([str(m[0]) for m in models]))
-    
+
                 if not sorted_models:
                     res.success(False)
                     res.err_code(1)  # 0: No error || 1: Error
                 else:
                     res.success(True)
                     res.err_code(0)  # 0: No error || 1: Error
-    
+
                 print(f"Models for {goal}: {sorted_models}")  # debug
                 res.configuration(json.dumps(dict(models=sorted_models)))
-    
+
             except Exception as e:
                 print(f"Error getting model from goal from request: {e}")
                 res.success(False)
                 res.err_code(1)
-    
+
         else:
             res.node_id(req.node_id())
             res.transaction_id(req.transaction_id())
@@ -237,8 +238,8 @@ And inside ``configuration_callback()`` implement the response to the configurat
             res.success(False)
             res.err_code(1) # 0: No error || 1: Error
             print(error_msg)
-    
-    
+
+
     # Main workflow routine
     def run():
         start_time = time.time()
@@ -255,19 +256,19 @@ And inside ``configuration_callback()`` implement the response to the configurat
         global running
         running = True
         node.spin()
-    
-    
+
+
     # Call main in program execution
     if __name__ == '__main__':
         signal.signal(signal.SIGINT, signal_handler)
-    
+
         """Python does not process signals async if
         the main thread is blocked (spin()) so, tun
         user work flow in another thread """
         runner = threading.Thread(target=run)
         runner.start()
-    
+
         while running:
             time.sleep(1)
-    
+
         runner.join()
diff --git a/sustainml_docs/rst/installation/framework.rst b/sustainml_docs/rst/installation/framework.rst
@@ -68,12 +68,12 @@ Install them using the package manager of the appropriate Linux distribution.
 
             brew install \
                 curl wget git llvm cmake gcc python swig openssl@3.0 asio tinyxml2 libp11 softhsm qt@5 openjdk@21 neo4j
-            
+
             # Add Java 21 to the PATH (bashrc, zshrc or fish config depending on your shell)
             echo 'export JAVA_HOME=$(/usr/libexec/java_home -v 21)' >> ~/.zshrc
             echo 'export PATH=$JAVA_HOME/bin:$PATH' >> ~/.zshrc
             source ~/.zshrc
-    
+
             # Start Neo4j as a service
             brew services start neo4j
 
@@ -99,7 +99,7 @@ The following command also builds and installs the SustainML framework and all i
             vcs import src < sustainml.repos && cd ~/SustainML/SustainML_ws/src/sustainml_lib && \
             git submodule update --init --recursive && \
             cd ~/SustainML/SustainML_ws/src/sustainml_lib/sustainml_modules/sustainml_modules/sustainml-wp1 && \
-            gdown --id 1gTXqQtP9JS92gAtPzhKgZpdIHSV_Lcnp -O rag/models_index.ann && \
+            gdown --id 1eR9IKrO0yS9ITl29rFjoWfARBlIYnDLq -O rag/models_index.ann && \
             pip3 install -r ~/SustainML/SustainML_ws/src/sustainml_lib/sustainml_modules/requirements.txt && \
             cd ~/SustainML/SustainML_ws && colcon build && \
             source ~/SustainML/SustainML_ws/install/setup.bash && \
@@ -123,7 +123,7 @@ The following command also builds and installs the SustainML framework and all i
             vcs import src < sustainml.repos && cd ~/SustainML/SustainML_ws/src/sustainml_lib && \
             git submodule update --init --recursive && \
             cd ~/SustainML/SustainML_ws/src/sustainml_lib/sustainml_modules/sustainml_modules/sustainml-wp1 && \
-            gdown --id 1gTXqQtP9JS92gAtPzhKgZpdIHSV_Lcnp -O rag/models_index.ann && \
+            gdown --id 1eR9IKrO0yS9ITl29rFjoWfARBlIYnDLq -O rag/models_index.ann && \
             pip3 install -r ~/SustainML/SustainML_ws/src/sustainml_lib/sustainml_modules/requirements.txt && \
             cd ~/SustainML/SustainML_ws && colcon build --packages-up-to sustainml --cmake-args -DCMAKE_CXX_STANDARD=17 \
                     -DQt5_DIR=/usr/local/opt/qt5/lib/cmake/Qt5 && \
diff --git a/sustainml_docs/rst/installation/library.rst b/sustainml_docs/rst/installation/library.rst
@@ -61,12 +61,12 @@ Install them using the package manager of the appropriate OS distribution.
 
             brew install \
                 wget git llvm cmake gcc python swig openssl@3.0 asio tinyxml2 libp11 softhsm openjdk@21 neo4j
-            
+
             # Add Java 21 to the PATH (bashrc, zshrc or fish config depending on your shell)
             echo 'export JAVA_HOME=$(/usr/libexec/java_home -v 21)' >> ~/.zshrc
             echo 'export PATH=$JAVA_HOME/bin:$PATH' >> ~/.zshrc
             source ~/.zshrc
-    
+
 
 .. _installation_library_build:
 
@@ -90,7 +90,7 @@ The following command builds and installs the *SustainML library* and its depend
             vcs import src < sustainml.repos && \
             git submodule update --init --recursive && \
             cd ~/SustainML/SustainML_ws/src/sustainml_lib/sustainml_modules/sustainml_modules/sustainml-wp1 && \
-            gdown --id 1gTXqQtP9JS92gAtPzhKgZpdIHSV_Lcnp -O rag/models_index.ann && \
+            gdown --id 1eR9IKrO0yS9ITl29rFjoWfARBlIYnDLq -O rag/models_index.ann && \
             pip3 install -r ~/SustainML/SustainML_ws/src/sustainml_docs/requirements.txt && \
             colcon build && \
             sudo neo4j-admin database load system \
@@ -112,7 +112,7 @@ The following command builds and installs the *SustainML library* and its depend
             vcs import src < sustainml.repos && \
             git submodule update --init --recursive && \
             cd ~/SustainML/SustainML_ws/src/sustainml_lib/sustainml_modules/sustainml_modules/sustainml-wp1 && \
-            gdown --id 1gTXqQtP9JS92gAtPzhKgZpdIHSV_Lcnp -O rag/models_index.ann && \
+            gdown --id 1eR9IKrO0yS9ITl29rFjoWfARBlIYnDLq -O rag/models_index.ann && \
             pip3 install -r ~/SustainML/SustainML_ws/src/sustainml_docs/requirements.txt && \
             colcon build --cmake-args -DCMAKE_CXX_STANDARD=17 && \
             sudo neo4j-admin database load system \
diff --git a/sustainml_modules/sustainml_modules/sustainml-wp1 b/sustainml_modules/sustainml_modules/sustainml-wp1
@@ -1 +1 @@
-Subproject commit e86830bf9a8112c1cc6f9c4b0c466b0f82326781
+Subproject commit 522a47023afc6dc2fcecf9b487c0ab2de407aab1