Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -127,3 +127,6 @@ dmypy.json

# Pyre type checker
.pyre/

#Prithvi geospatial package
prithvi_geospatial_extractor/geospatial_fm.egg-info/
2 changes: 1 addition & 1 deletion PDG_MAPLE/requirements.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
pyclowder==2.4.0
pyclowder==3.0.7
2 changes: 1 addition & 1 deletion event_driven_ml_inference/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pyclowder==2.6.0
pyclowder==3.0.7
numpy
ray[default]==1.13.0
keras
Expand Down
2 changes: 1 addition & 1 deletion parallel-batch-ml-inference-huggingface/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pyclowder==2.6.0
pyclowder==3.0.7
ray[default]==1.13.0
numpy
scipy
Expand Down
2 changes: 1 addition & 1 deletion parallel-batch-ml-inference-pytorch/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pyclowder==2.6.0
pyclowder==3.0.7
ray[default]==1.13.0
torchvision
pillow
17 changes: 14 additions & 3 deletions parallel_batch_ml_inference/parallel_ml_inference_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
import numpy as np
import ray
from ray.util.queue import Queue
import os
from PIL import UnidentifiedImageError
import pyclowder.files
from pyclowder.extractors import Extractor

Expand All @@ -26,7 +28,11 @@ def process_file(self, filepaths):
from tensorflow.keras.preprocessing import image

# pre-process image
original = image.load_img(filepaths, target_size=(224, 224))
try:
original = image.load_img(filepaths, target_size=(224, 224))
except UnidentifiedImageError:
print("Unidentified Image Error")
return "Unidentified Image Error. Possible corrupted image, please replace image."
numpy_image = image.img_to_array(original)
image_batch = np.expand_dims(numpy_image, axis=0)
processed_image = preprocess_input(image_batch, mode='caffe')
Expand Down Expand Up @@ -57,14 +63,19 @@ def __init__(self):
def process_message(self, connector, host, secret_key, resource, parameters):
"""Dataset extractor. We get all filenames at once."""
logger = logging.getLogger(__name__)

# Get list of all files in dataset
filelist = pyclowder.datasets.get_file_list(connector, host, secret_key, parameters['datasetId'])
localfiles = []
clowder_version = int(os.getenv('CLOWDER_VERSION', '1'))

# # Loop through dataset and download all file "locally"
for file_dict in filelist:
extension = "." + file_dict['contentType'].split("/")[1]
# Use the correct key depending on the Clowder version
if clowder_version == 2:
extension = "." + file_dict['content_type']['content_type'].split("/")[1]
else:
extension = "." + file_dict['contentType'].split("/")[1]
localfiles.append(pyclowder.files.download(connector, host, secret_key, file_dict['id'], ext=extension))

# These process messages will appear in the Clowder UI under Extractions.
Expand Down
2 changes: 1 addition & 1 deletion parallel_batch_ml_inference/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pyclowder==2.6.0
pyclowder==3.0.7
numpy
ray[default]==1.13.0
keras
Expand Down
3 changes: 3 additions & 0 deletions prithvi_geospatial_extractor/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
geospatial_fm.egg-info/
build/
dist/
44 changes: 44 additions & 0 deletions prithvi_geospatial_extractor/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
#Source - https://huggingface.co/spaces/ibm-nasa-geospatial/Prithvi-100M-sen1floods11-demo/blob/main/Dockerfile

FROM python:3.8

RUN apt-get update && apt-get install --no-install-recommends -y \
build-essential \
wget \
&& apt-get clean && rm -rf /var/lib/apt/lists/*

RUN apt-get update && apt-get install ffmpeg libsm6 libxext6 -y

RUN useradd -m -u 1000 user

# Switch to the "user" user
USER user
# Set home to the user's home directory
ENV HOME=/home/user \
PATH=/home/user/.local/bin:$PATH \
PYTHONPATH=$HOME/app \
PYTHONUNBUFFERED=1 \
GRADIO_ALLOW_FLAGGING=never \
GRADIO_NUM_PORTS=1 \
GRADIO_SERVER_NAME=0.0.0.0 \
GRADIO_THEME=huggingface \
SYSTEM=spaces

# RUN conda install python=3.8

RUN pip install setuptools-rust
RUN pip install torch==1.11.0+cu115 torchvision==0.12.0+cu115 --extra-index-url https://download.pytorch.org/whl/cu115
RUN pip install gradio scikit-image pillow openmim
RUN pip install --upgrade setuptools

WORKDIR /home/user

WORKDIR /extractor

COPY . .

RUN pip install -e .

RUN mim install mmcv-full==1.6.2 -f https://download.openmmlab.com/mmcv/dist/11.5/1.11.0/index.html

CMD ["python3", "-u", "prithvi_finetuned_extractor.py", "--heartbeat", "15"]
58 changes: 58 additions & 0 deletions prithvi_geospatial_extractor/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
# This extractor is best run via CodeFlare, see the top-level README for more.

# Manual Docker (no CodeFlare)

This extractor is ready to be run as a docker container, the only dependency is a running Clowder instance. Simply build and run.

1. Start Clowder. For help starting Clowder, see our [getting started guide](https://github.com/clowder-framework/clowder/blob/develop/doc/src/sphinx/userguide/installing_clowder.rst).

2. First build the extractor Docker container:

```
# from this directory, run:

docker build -t prithvi-finetuned-extractor .
```

3. Finally run the extractor:

```
docker run -t -i --rm --net clowder_clowder -e "RABBITMQ_URI=amqp://guest:guest@rabbitmq:5672/%2f" --name "prithvi-finetuned-extractor" prithvi-finetuned-extractor
```

Then open the Clowder web app and run the wordcount extractor on a .txt file (or similar)! Done.

### Python and Docker details

You may use any version of Python 3. Simply edit the first line of the `Dockerfile`, by default it uses `FROM python:3.8`.

Docker flags:

- `--net` links the extractor to the Clowder Docker network (run `docker network ls` to identify your own.)
- `-e RABBITMQ_URI=` sets the environment variables can be used to control what RabbitMQ server and exchange it will bind itself to. Setting the `RABBITMQ_EXCHANGE` may also help.
- You can also use `--link` to link the extractor to a RabbitMQ container.
- `--name` assigns the container a name visible in Docker Desktop.

## Troubleshooting

**If you run into _any_ trouble**, please reach out on our Clowder Slack in the [#pyclowder channel](https://clowder-software.slack.com/archives/CNC2UVBCP).

Alternate methods of running extractors are below.

# Commandline Execution

To execute the extractor from the command line you will need to have the required packages installed. It is highly recommended to use python virtual environment for this. You will need to create a virtual environment first, then activate it and finally install all required packages.

```
virtualenv /home/clowder/virtualenv/clowder2
. /home/clowder/virtualenv/clowder2/bin/activate
cd prithvi_finetune_extractor/
pip install -e .
```

To start the extractor you will need to load the virtual environment and start the extractor.

```
. /home/clowder/virtualenv/wordcount/bin/activate
/home/clowder/extractors/wordcount/prithvi_finetune_extractor.py
```
48 changes: 48 additions & 0 deletions prithvi_geospatial_extractor/extractor_info.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
{
"@context": "http://clowder.ncsa.illinois.edu/contexts/extractors.jsonld",
"name": "pritvi.finetuned.inference.file",
"version": "1.0",
"description": "Chose from fine-tuned Prithvi models to run inference on tif files",
"author": "Vismayak Mohanarajan",
"contributors": [
],
"contexts": [],
"repository": [
{
"repType": "git",
"repUrl": "https://opensource.ncsa.illinois.edu/stash/scm/cats/pyclowder.git"
}
],
"process": {
"file": [
"manual.submission"
]
},
"max_retry": 1,
"external_services": [],
"dependencies": [],
"bibtex": [],
"parameters": {
"schema": {
"APPLICATION_TYPE": {
"type": "string",
"title": "Chose the finetuned model by application",
"enum": [
"flood_mapping",
"burn_scars",
"cover_crop"
],
"default": "flood_mapping"
},
"SAVE_IMAGE": {
"type": "string",
"title": "Save an image of the inferences as a mask overlaying the input image",
"enum": [
"True",
"False"
],
"default": "True"
}
}
}
}
27 changes: 27 additions & 0 deletions prithvi_geospatial_extractor/geospatial_fm/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from .geospatial_fm import ConvTransformerTokensToEmbeddingNeck, TemporalViTEncoder, GeospatialNeck
from .geospatial_pipelines import (
TorchRandomCrop,
LoadGeospatialAnnotations,
LoadGeospatialImageFromFile,
Reshape,
CastTensor,
CollectTestList,
TorchPermute
)
from .datasets import GeospatialDataset
from .temporal_encoder_decoder import TemporalEncoderDecoder

__all__ = [
"GeospatialDataset",
"TemporalViTEncoder",
"ConvTransformerTokensToEmbeddingNeck",
"LoadGeospatialAnnotations",
"LoadGeospatialImageFromFile",
"TorchRandomCrop",
"TemporalEncoderDecoder",
"Reshape",
"CastTensor",
"CollectTestList",
"GeospatialNeck",
"TorchPermute"
]
25 changes: 25 additions & 0 deletions prithvi_geospatial_extractor/geospatial_fm/datasets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from mmseg.datasets.builder import DATASETS
from mmseg.datasets.custom import CustomDataset
from .geospatial_pipelines import LoadGeospatialAnnotations


@DATASETS.register_module()
class GeospatialDataset(CustomDataset):
"""GeospatialDataset dataset.
"""

def __init__(self, CLASSES=(0, 1), PALETTE=None, **kwargs):

self.CLASSES = CLASSES

self.PALETTE = PALETTE

gt_seg_map_loader_cfg = kwargs.pop('gt_seg_map_loader_cfg') if 'gt_seg_map_loader_cfg' in kwargs else dict()
reduce_zero_label = kwargs.pop('reduce_zero_label') if 'reduce_zero_label' in kwargs else False

super(GeospatialDataset, self).__init__(
reduce_zero_label=reduce_zero_label,
# ignore_index=2,
**kwargs)

self.gt_seg_map_loader = LoadGeospatialAnnotations(reduce_zero_label=reduce_zero_label, **gt_seg_map_loader_cfg)
Loading