natcap
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 93 additions & 4 deletions b/‎.github/workflows/test.yml‎
Lines changed: 93 additions & 4 deletions
diff --git a/‎README.md‎
Lines changed: 0 additions & 19 deletions b/‎README.md‎
Lines changed: 0 additions & 19 deletions
diff --git a/‎invest_processes/README.md‎
Lines changed: 18 additions & 0 deletions b/‎invest_processes/README.md‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎pygeoapi-config-example.yml‎ ‎…st_processes/pygeoapi-config-example.yml‎pygeoapi-config-example.yml renamed to invest_processes/pygeoapi-config-example.yml b/‎pygeoapi-config-example.yml‎ ‎…st_processes/pygeoapi-config-example.yml‎pygeoapi-config-example.yml renamed to invest_processes/pygeoapi-config-example.yml
diff --git a/‎pygeoapi-config.yml‎ ‎invest_processes/pygeoapi-config.yml‎pygeoapi-config.yml renamed to invest_processes/pygeoapi-config.yml b/‎pygeoapi-config.yml‎ ‎invest_processes/pygeoapi-config.yml‎pygeoapi-config.yml renamed to invest_processes/pygeoapi-config.yml
diff --git a/‎invest_processes/src/invest_processes/execute.py‎
Lines changed: 13 additions & 95 deletions b/‎invest_processes/src/invest_processes/execute.py‎
Lines changed: 13 additions & 95 deletions
@@ -45,16 +45,103 @@ jobs:
         with:
           fetch-depth: 0 # Fetch complete history for accurate versioning
 
-      - uses: koesterlab/setup-slurm-action@v1
+
+
+      #############################################################
+      # This section copied from koesterlab/setup-slurm-action@v1
+      # their action does not allow modification of slurm conf vars, and we
+      # need to configure `AccountingStoreFlags: job_comment` so that job
+      # comments are stored in the database.
+      #
+      - name: Download slurm ansible roles
+        shell: bash -e {0}
+        run: |
+          ansible-galaxy role install https://github.com/galaxyproject/ansible-slurm/archive/1.0.1.tar.gz
+
+      - name: Apt prerequisites
+        shell: bash -e {0}
+        run: |
+          sudo apt-get update
+          sudo apt-get install retry
+
+      - name: Define slurm playbook
+        uses: 1arp/create-a-file-action@0.2
+        with:
+          file: slurm-playbook.yml
+          content: |
+            - name: Slurm all in One
+              hosts: localhost
+              roles:
+                - role: 1.0.1
+                  become: true
+              vars:
+                  slurm_upgrade: true
+                  slurm_roles: ['controller', 'exec', 'dbd']
+                  slurm_config_dir: /etc/slurm
+                  slurm_config:
+                      ClusterName: cluster
+                      SlurmctldLogFile: /var/log/slurm/slurmctld.log
+                      SlurmctldPidFile: /run/slurmctld.pid
+                      SlurmdLogFile: /var/log/slurm/slurmd.log
+                      SlurmdPidFile: /run/slurmd.pid
+                      SlurmdSpoolDir: /tmp/slurmd # the default /var/lib/slurm/slurmd does not work because of noexec mounting in github actions
+                      StateSaveLocation: /var/lib/slurm/slurmctld
+                      AccountingStorageType: accounting_storage/slurmdbd
+                      AccountingStoreFlags: job_comment
+                      SelectType: select/cons_tres
+                  slurmdbd_config:
+                      StorageType: accounting_storage/mysql
+                      PidFile: /run/slurmdbd.pid
+                      LogFile: /var/log/slurm/slurmdbd.log
+                      StoragePass: root
+                      StorageUser: root
+                      StorageHost: 127.0.0.1 # see https://stackoverflow.com/questions/58222386/github-actions-using-mysql-service-throws-access-denied-for-user-rootlocalh
+                      StoragePort: 8888
+                      DbdHost: localhost
+                  slurm_create_user: yes
+                  slurm_nodes:
+                      - name: localhost
+                        State: UNKNOWN
+                        Sockets: 1
+                        CoresPerSocket: 2
+                        RealMemory: 2000
+                  slurm_user:
+                      comment: "Slurm Workload Manager"
+                      gid: 1002
+                      group: slurm
+                      home: "/var/lib/slurm"
+                      name: slurm
+                      shell: "/bin/bash"
+                      uid: 1002
+
+      - name: Set XDG_RUNTIME_DIR
+        shell: bash -e {0}
+        run: |
+          mkdir -p /tmp/1002-runtime # work around podman issue (https://github.com/containers/podman/issues/13338)
+          echo XDG_RUNTIME_DIR=/tmp/1002-runtime >> $GITHUB_ENV
+
+      - name: Setup slurm
+        shell: bash -e {0}
+        run: |
+          ansible-playbook slurm-playbook.yml || (journalctl -xe && exit 1)
+
+      - name: Add slurm account
+        shell: bash -e {0}
+        run: |
+          sudo retry --times=24 --delay=5 --until=success -- sacctmgr -i create account "Name=runner"
+          sudo retry --times=24 --delay=5 --until=success -- sacctmgr -i create user "Name=runner" "Account=runner"
+      ############################################################
 
       - name: Setup conda environment
         uses: mamba-org/setup-micromamba@v2
         with:
           environment-name: env
+          # pin numpy: https://github.com/natcap/invest/issues/2288
           create-args: >-
             python=3.13
             natcap.invest
             pytest
+            numpy<2.4.0
           condarc: |
             channels:
               - conda-forge
@@ -74,7 +161,9 @@ jobs:
 
       - name: Run tests
         run: |
-          export PYGEOAPI_CONFIG=pygeoapi-config.yml
-          export PYGEOAPI_OPENAPI=openapi.yml
+          which invest
+          invest --version
+          export PYGEOAPI_CONFIG=invest_processes/pygeoapi-config.yml
+          export PYGEOAPI_OPENAPI=invest_processes/openapi.yml
           pygeoapi openapi generate $PYGEOAPI_CONFIG --output-file $PYGEOAPI_OPENAPI
-          pytest --log-cli-level=DEBUG tests/
+          pytest -s --log-cli-level=DEBUG tests/
@@ -1,21 +1,2 @@
 # invest-compute
 APIs and backend for running invest in the cloud
-
-## pygeoapi server
-
-To launch the server:
-```
-export PYGEOAPI_CONFIG=pygeoapi-config.yml
-export PYGEOAPI_OPENAPI=openapi.yml
-pygeoapi openapi generate $PYGEOAPI_CONFIG --output-file $PYGEOAPI_OPENAPI
-pygeoapi serve
-```
-
-Access the OpenAPI Swagger page in your browser at http://localhost:5000/openapi
-
-### asynchronous requests
-invest model execution should run asynchronously because it can take a long time. To use asynchronous mode, include the `'Prefer: respond-async'` header in the request, as required by `pygeoapi` and the OGC Processes specification ([source](https://docs.pygeoapi.io/en/latest/data-publishing/ogcapi-processes.html#asynchronous-support)).
-
-it seems that the async execution request is supposed to return a JSON object containing info about the job including its ID, which you can then use to query the job status and results. however the request actually returns null, and the only job info is available in the `location` response header. I asked about this here: https://github.com/geopython/pygeoapi/issues/2105
-
-for now, given a `location` header value like `http://localhost:5000/jobs/XXXXXX`, you can check its status at that url, and retrieve results at `http://localhost:5000/jobs/XXXXXX/results`.
@@ -0,0 +1,18 @@
+## pygeoapi server
+
+To launch the server:
+```
+export PYGEOAPI_CONFIG=pygeoapi-config.yml
+export PYGEOAPI_OPENAPI=openapi.yml
+pygeoapi openapi generate $PYGEOAPI_CONFIG --output-file $PYGEOAPI_OPENAPI
+pygeoapi serve
+```
+
+Access the OpenAPI Swagger page in your browser at http://localhost:5000/openapi
+
+### asynchronous requests
+invest model execution should run asynchronously because it can take a long time. To use asynchronous mode, include the `'Prefer: respond-async'` header in the request, as required by `pygeoapi` and the OGC Processes specification ([source](https://docs.pygeoapi.io/en/latest/data-publishing/ogcapi-processes.html#asynchronous-support)).
+
+it seems that the async execution request is supposed to return a JSON object containing info about the job including its ID, which you can then use to query the job status and results. however the request actually returns null, and the only job info is available in the `location` response header. I asked about this here: https://github.com/geopython/pygeoapi/issues/2105
+
+for now, given a `location` header value like `http://localhost:5000/jobs/XXXXXX`, you can check its status at that url, and retrieve results at `http://localhost:5000/jobs/XXXXXX/results`.
@@ -1,11 +1,8 @@
-import importlib
 import logging
-import os
-import tempfile
+from pathlib import Path
 import textwrap
-import time
 
-from natcap.invest import datastack, models, spec, utils
+from invest_processes.utils import download_and_extract_datastack
 from pygeoapi.process.base import BaseProcessor, ProcessorExecuteError
 
 LOGGER = logging.getLogger(__name__)
@@ -33,9 +30,9 @@
         }
     },
     'outputs': {
-        'workspace_dir': {
-            'title': 'Workspace directory',
-            'description': 'Path to the workspace directory containing all model results',
+        'workspace_url': {
+            'title': 'Workspace URL',
+            'description': 'URL to the workspace containing all model results',
             'schema': {
                 'type': 'string',
                 'contentMediaType': 'application/json'
@@ -49,6 +46,7 @@
     }
 }
 
+
 class ExecuteProcessor(BaseProcessor):
     """InVEST execute process"""
 
@@ -65,29 +63,23 @@ def __init__(self, processor_def):
 
         super().__init__(processor_def, PROCESS_METADATA)
 
-    def create_slurm_script(self, datastack_path, workspace_dir):
+    def create_slurm_script(self, datastack_url, workspace_dir):
         """Create a script to run with sbatch.
 
         Args:
-            datastack_path: path to the user provided invest datastack to execute
+            datastack_url: URL to the invest datastack (.tgz) to execute
             workspace_dir: path to the directory that the slurm job will run in
 
         Returns:
             string contents of the script
         """
-        try:
-            model_id = datastack.extract_parameter_set(datastack_path).model_id
-        except Exception as error:
-            raise ProcessorExecuteError(
-                1, "Error when parsing JSON datastack:\n    " + str(error))
-
-        # Create a workspace directory
-        workspace_dir = os.path.join(workspace_dir, f'{model_id}_workspace')
-
+        json_path, model_id = download_and_extract_datastack(
+            datastack_url, Path(workspace_dir) / 'datastack')
+        workspace_dir = Path(workspace_dir) / f'{model_id}_workspace'
         return textwrap.dedent(f"""\
             #!/bin/sh
             #SBATCH --time=10
-            invest run --datastack {datastack_path} --workspace {workspace_dir} {model_id}
+            invest run --datastack {json_path} --workspace {workspace_dir} {model_id}
             """)
 
     def process_output(self, workspace_dir):
@@ -99,81 +91,7 @@ def process_output(self, workspace_dir):
         Returns:
             empty dict
         """
-        return {}
-
-    def execute(self, data, outputs=None):
-        """Execute the process.
-
-        Args:
-            data: dictionary of data inputs
-            outputs:
-
-        Returns:
-            Tuple of (mimetype, outputs)
-        """
-        # Extract model ID and parameters from the datastack file
-        datastack_path = data.get('datastack_path')
-
-        try:
-            parameter_set = datastack.extract_parameter_set(datastack_path)
-        except Exception as error:
-            raise ProcessorExecuteError(
-                1, "Error when parsing JSON datastack:\n    " + str(error))
-
-        # Import the model
-        try:
-            model_module = models.pyname_to_module[
-                models.model_id_to_pyname[parameter_set.model_id]]
-        except KeyError as ex:
-            raise ValueError(f'model ID {parameter_set.model_id} not found')
-
-        # Create a workspace directory
-        workspace_root = os.path.abspath('workspaces')
-        workspace_dir = os.path.join(workspace_root, f'{parameter_set.model_id}_{time.time()}')
-        parameter_set.args['workspace_dir'] = workspace_dir
-
-        for arg_key, val in parameter_set.args.items():
-            try:
-                input_spec = model_module.MODEL_SPEC.get_input(arg_key)
-            except KeyError:
-                continue
-            # Uncomment this for next invest release
-            # if type(input_spec) in {spec.RasterInput, spec.SingleBandRasterInput,
-            #                         spec.VectorInput}:
-            #     parameter_set.args[arg_key] = utils._GDALPath.from_uri(
-            #         val).to_normalized_path()
-
-        with utils.prepare_workspace(workspace_dir,
-                                     model_id=parameter_set.model_id,
-                                     logging_level=logging.DEBUG):
-            LOGGER.log(
-                datastack.ARGS_LOG_LEVEL,
-                'Starting model with parameters: \n' +
-                datastack.format_args_dict(
-                    parameter_set.args,
-                    parameter_set.model_id))
-
-            try:
-                model_module.execute(parameter_set.args)
-            except Exception as ex:
-                LOGGER.error(
-                    f'An error occurred during execution: {ex}', exc_info=ex)
-                raise ProcessorExecuteError(
-                    'An error occurred during execution. See the log file in '
-                    'the workspace for details. \n Workspace: ' + workspace_dir)
-
-            LOGGER.info('Generating metadata for results')
-            try:
-                # If there's an exception from creating metadata
-                # I don't think we want to indicate a model failure
-                spec.generate_metadata_for_outputs(
-                    model_module, parameter_set.args)
-            except Exception as ex:
-                LOGGER.warning(
-                    'Something went wrong while generating metadata', exc_info=ex)
-
-        outputs = {'workspace_dir': workspace_dir}
-        return 'application/json', outputs
+        pass
 
     def __repr__(self):
         return f'<InVESTExecuteProcessor> {self.name}'