clowder-framework · Vismayak · Jan 17, 2024 · Mar 15, 2024 · Mar 15, 2024 · Mar 16, 2024
diff --git a/.gitignore b/.gitignore
@@ -127,3 +127,6 @@ dmypy.json
 
 # Pyre type checker
 .pyre/
+
+#Prithvi geospatial package
+prithvi_geospatial_extractor/geospatial_fm.egg-info/
diff --git a/PDG_MAPLE/requirements.txt b/PDG_MAPLE/requirements.txt
@@ -1 +1 @@
-pyclowder==2.4.0
+pyclowder==3.0.7
diff --git a/event_driven_ml_inference/requirements.txt b/event_driven_ml_inference/requirements.txt
@@ -1,4 +1,4 @@
-pyclowder==2.6.0
+pyclowder==3.0.7
 numpy
 ray[default]==1.13.0
 keras

diff --git a/parallel-batch-ml-inference-huggingface/requirements.txt b/parallel-batch-ml-inference-huggingface/requirements.txt
@@ -1,4 +1,4 @@
-pyclowder==2.6.0
+pyclowder==3.0.7
 ray[default]==1.13.0
 numpy
 scipy

diff --git a/parallel-batch-ml-inference-pytorch/requirements.txt b/parallel-batch-ml-inference-pytorch/requirements.txt
@@ -1,4 +1,4 @@
-pyclowder==2.6.0
+pyclowder==3.0.7
 ray[default]==1.13.0
 torchvision
 pillow
diff --git a/parallel_batch_ml_inference/parallel_ml_inference_extractor.py b/parallel_batch_ml_inference/parallel_ml_inference_extractor.py
@@ -8,6 +8,8 @@
 import numpy as np
 import ray
 from ray.util.queue import Queue
+import os
+from PIL import UnidentifiedImageError
 import pyclowder.files
 from pyclowder.extractors import Extractor
 
@@ -26,7 +28,11 @@ def process_file(self, filepaths):
         from tensorflow.keras.preprocessing import image
 
         # pre-process image
-        original = image.load_img(filepaths, target_size=(224, 224))
+        try:
+            original = image.load_img(filepaths, target_size=(224, 224))
+        except UnidentifiedImageError:
+            print("Unidentified Image Error")
+            return "Unidentified Image Error. Possible corrupted image, please replace image."
         numpy_image = image.img_to_array(original)
         image_batch = np.expand_dims(numpy_image, axis=0)
         processed_image = preprocess_input(image_batch, mode='caffe')
@@ -57,14 +63,19 @@ def __init__(self):
     def process_message(self, connector, host, secret_key, resource, parameters):
         """Dataset extractor. We get all filenames at once."""
         logger = logging.getLogger(__name__)
-        
+
         # Get list of all files in dataset
         filelist = pyclowder.datasets.get_file_list(connector, host, secret_key, parameters['datasetId'])
         localfiles = []
+        clowder_version = int(os.getenv('CLOWDER_VERSION', '1'))
 
         # # Loop through dataset and download all file "locally"
         for file_dict in filelist:
-            extension = "." + file_dict['contentType'].split("/")[1]
+            # Use the correct key depending on the Clowder version
+            if clowder_version == 2:
+                extension = "." + file_dict['content_type']['content_type'].split("/")[1]
+            else:
+                extension = "." + file_dict['contentType'].split("/")[1]
             localfiles.append(pyclowder.files.download(connector, host, secret_key, file_dict['id'], ext=extension))
 
         # These process messages will appear in the Clowder UI under Extractions.

diff --git a/parallel_batch_ml_inference/requirements.txt b/parallel_batch_ml_inference/requirements.txt
@@ -1,4 +1,4 @@
-pyclowder==2.6.0
+pyclowder==3.0.7
 numpy
 ray[default]==1.13.0
 keras

diff --git a/prithvi_geospatial_extractor/.dockerignore b/prithvi_geospatial_extractor/.dockerignore
@@ -0,0 +1,3 @@
+geospatial_fm.egg-info/
+build/
+dist/
diff --git a/prithvi_geospatial_extractor/Dockerfile b/prithvi_geospatial_extractor/Dockerfile
@@ -0,0 +1,44 @@
+#Source - https://huggingface.co/spaces/ibm-nasa-geospatial/Prithvi-100M-sen1floods11-demo/blob/main/Dockerfile
+
+FROM python:3.8
+
+RUN apt-get update && apt-get install --no-install-recommends -y \
+  build-essential \
+  wget \
+  && apt-get clean && rm -rf /var/lib/apt/lists/*
+
+RUN apt-get update && apt-get install ffmpeg libsm6 libxext6  -y
+
+RUN useradd -m -u 1000 user
+
+# Switch to the "user" user
+USER user
+# Set home to the user's home directory
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH \
+    PYTHONPATH=$HOME/app \
+	PYTHONUNBUFFERED=1 \
+	GRADIO_ALLOW_FLAGGING=never \
+	GRADIO_NUM_PORTS=1 \
+	GRADIO_SERVER_NAME=0.0.0.0 \
+	GRADIO_THEME=huggingface \
+	SYSTEM=spaces
+
+# RUN conda install python=3.8
+
+RUN pip install setuptools-rust
+RUN pip install torch==1.11.0+cu115 torchvision==0.12.0+cu115 --extra-index-url https://download.pytorch.org/whl/cu115
+RUN pip install gradio scikit-image pillow openmim
+RUN pip install --upgrade setuptools
+
+WORKDIR /home/user
+
+WORKDIR /extractor
+
+COPY . .
+
+RUN pip install -e .
+
+RUN mim install mmcv-full==1.6.2 -f https://download.openmmlab.com/mmcv/dist/11.5/1.11.0/index.html
+
+CMD ["python3", "-u", "prithvi_finetuned_extractor.py", "--heartbeat", "15"]
diff --git a/prithvi_geospatial_extractor/README.md b/prithvi_geospatial_extractor/README.md
@@ -0,0 +1,58 @@
+# This extractor is best run via CodeFlare, see the top-level README for more. 
+
+# Manual Docker (no CodeFlare)
+
+This extractor is ready to be run as a docker container, the only dependency is a running Clowder instance. Simply build and run.
+
+1. Start Clowder. For help starting Clowder, see our [getting started guide](https://github.com/clowder-framework/clowder/blob/develop/doc/src/sphinx/userguide/installing_clowder.rst).
+
+2. First build the extractor Docker container:
+
+```
+# from this directory, run:
+
+docker build -t prithvi-finetuned-extractor .
+```
+
+3. Finally run the extractor:
+
+```
+docker run -t -i --rm --net clowder_clowder -e "RABBITMQ_URI=amqp://guest:guest@rabbitmq:5672/%2f" --name "prithvi-finetuned-extractor" prithvi-finetuned-extractor
+```
+
+Then open the Clowder web app and run the wordcount extractor on a .txt file (or similar)! Done.
+
+### Python and Docker details
+
+You may use any version of Python 3. Simply edit the first line of the `Dockerfile`, by default it uses `FROM python:3.8`.
+
+Docker flags:
+
+- `--net` links the extractor to the Clowder Docker network (run `docker network ls` to identify your own.)
+- `-e RABBITMQ_URI=` sets the environment variables can be used to control what RabbitMQ server and exchange it will bind itself to. Setting the `RABBITMQ_EXCHANGE` may also help.
+  - You can also use `--link` to link the extractor to a RabbitMQ container.
+- `--name` assigns the container a name visible in Docker Desktop.
+
+## Troubleshooting
+
+**If you run into _any_ trouble**, please reach out on our Clowder Slack in the [#pyclowder channel](https://clowder-software.slack.com/archives/CNC2UVBCP).
+
+Alternate methods of running extractors are below.
+
+# Commandline Execution
+
+To execute the extractor from the command line you will need to have the required packages installed. It is highly recommended to use python virtual environment for this. You will need to create a virtual environment first, then activate it and finally install all required packages.
+
+```
+virtualenv /home/clowder/virtualenv/clowder2
+. /home/clowder/virtualenv/clowder2/bin/activate
+cd prithvi_finetune_extractor/
+pip install -e .
+```
+
+To start the extractor you will need to load the virtual environment and start the extractor.
+
+```
+. /home/clowder/virtualenv/wordcount/bin/activate
+/home/clowder/extractors/wordcount/prithvi_finetune_extractor.py
+```
diff --git a/prithvi_geospatial_extractor/extractor_info.json b/prithvi_geospatial_extractor/extractor_info.json
@@ -0,0 +1,48 @@
+{
+  "@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld",
+  "name": "pritvi.finetuned.inference.file",
+  "version": "1.0",
+  "description": "Chose from fine-tuned Prithvi models to run inference on tif files",
+  "author": "Vismayak Mohanarajan",
+  "contributors": [
+  ],
+  "contexts": [],
+  "repository": [
+    {
+      "repType": "git",
+      "repUrl": "https://opensource.ncsa.illinois.edu/stash/scm/cats/pyclowder.git"
+    }
+  ],
+  "process": {
+    "file": [
+      "manual.submission"
+    ]
+  },
+  "max_retry": 1,
+  "external_services": [],
+  "dependencies": [],
+  "bibtex": [],
+  "parameters": {
+    "schema": {
+      "APPLICATION_TYPE": {
+        "type": "string",
+        "title": "Chose the finetuned model by application",
+         "enum": [
+           "flood_mapping",
+           "burn_scars",
+           "cover_crop"
+        ],
+        "default": "flood_mapping"
+      },
+      "SAVE_IMAGE": {
+        "type": "string",
+        "title": "Save an image of the inferences as a mask overlaying the input image",
+        "enum": [
+          "True",
+          "False"
+        ],
+        "default": "True"
+      }
+    }
+  }
+}
diff --git a/prithvi_geospatial_extractor/geospatial_fm/__init__.py b/prithvi_geospatial_extractor/geospatial_fm/__init__.py
@@ -0,0 +1,27 @@
+from .geospatial_fm import ConvTransformerTokensToEmbeddingNeck, TemporalViTEncoder, GeospatialNeck
+from .geospatial_pipelines import (
+    TorchRandomCrop,
+    LoadGeospatialAnnotations,
+    LoadGeospatialImageFromFile,
+    Reshape,
+    CastTensor,
+    CollectTestList,
+    TorchPermute
+)
+from .datasets import GeospatialDataset
+from .temporal_encoder_decoder import TemporalEncoderDecoder
+
+__all__ = [
+    "GeospatialDataset",
+    "TemporalViTEncoder",
+    "ConvTransformerTokensToEmbeddingNeck",
+    "LoadGeospatialAnnotations",
+    "LoadGeospatialImageFromFile",
+    "TorchRandomCrop",
+    "TemporalEncoderDecoder",
+    "Reshape",
+    "CastTensor",
+    "CollectTestList",
+    "GeospatialNeck",
+    "TorchPermute"
+]
diff --git a/prithvi_geospatial_extractor/geospatial_fm/datasets.py b/prithvi_geospatial_extractor/geospatial_fm/datasets.py
@@ -0,0 +1,25 @@
+from mmseg.datasets.builder import DATASETS
+from mmseg.datasets.custom import CustomDataset
+from .geospatial_pipelines import LoadGeospatialAnnotations
+
+
+@DATASETS.register_module()
+class GeospatialDataset(CustomDataset):
+    """GeospatialDataset dataset.
+    """
+
+    def __init__(self, CLASSES=(0, 1), PALETTE=None, **kwargs):
+
+        self.CLASSES = CLASSES
+
+        self.PALETTE = PALETTE
+
+        gt_seg_map_loader_cfg = kwargs.pop('gt_seg_map_loader_cfg') if 'gt_seg_map_loader_cfg' in kwargs else dict()
+        reduce_zero_label = kwargs.pop('reduce_zero_label') if 'reduce_zero_label' in kwargs else False
+
+        super(GeospatialDataset, self).__init__(
+            reduce_zero_label=reduce_zero_label,
+            # ignore_index=2,
+            **kwargs)
+
+        self.gt_seg_map_loader = LoadGeospatialAnnotations(reduce_zero_label=reduce_zero_label, **gt_seg_map_loader_cfg)