diff --git a/Snakefile b/Snakefile index 098edcf..23451f3 100644 --- a/Snakefile +++ b/Snakefile @@ -49,7 +49,7 @@ rule project_mosaics: output: projected=f"{working_dir}/projected_mosaics/{{year}}/{{site}}/{{flight}}_projected.tif", webmercator=f"{working_dir}/projected_mosaics/webmercator/{{year}}/{{site}}/{{flight}}_projected.tif" - conda: + uv: "everwatch" shell: "python project_orthos.py {input.orthomosaic}" @@ -59,7 +59,7 @@ rule predict_birds: projected=f"{working_dir}/projected_mosaics/{{year}}/{{site}}/{{flight}}_projected.tif" output: f"{working_dir}/predictions/{{year}}/{{site}}/{{flight}}_projected.shp" - conda: + uv: "everwatch" resources: gpu=1 @@ -71,7 +71,7 @@ rule combine_birds_site_year: flights_in_year_site output: f"{working_dir}/predictions/{{year}}/{{site}}/{{site}}_{{year}}_combined.shp" - conda: + uv: "everwatch" shell: "python combine_birds_site_year.py {input}" @@ -82,7 +82,7 @@ rule combine_predicted_birds: zip, site=SITES_SY, year=YEARS_SY) output: f"{working_dir}/everwatch-workflow/App/Zooniverse/data/PredictedBirds.zip" - conda: + uv: "everwatch" shell: "python combine_bird_predictions.py {input}" @@ -92,7 +92,7 @@ rule detect_nests: f"{working_dir}/predictions/{{year}}/{{site}}/{{site}}_{{year}}_combined.shp" output: f"{working_dir}/detected_nests/{{year}}/{{site}}/{{site}}_{{year}}_detected_nests.shp" - conda: + uv: "everwatch" shell: "python nest_detection.py {input}" @@ -102,7 +102,7 @@ rule process_nests: f"{working_dir}/detected_nests/{{year}}/{{site}}/{{site}}_{{year}}_detected_nests.shp" output: f"{working_dir}/processed_nests/{{year}}/{{site}}/{{site}}_{{year}}_processed_nests.shp" - conda: + uv: "everwatch" shell: "python process_nests.py {input}" @@ -113,7 +113,7 @@ rule combine_nests: zip, site=SITES_SY, year=YEARS_SY) output: f"{working_dir}/everwatch-workflow/App/Zooniverse/data/nest_detections_processed.zip" - conda: + uv: "everwatch" shell: "python combine_nests.py {input}" @@ -123,7 +123,7 @@ rule create_mbtile: f"{working_dir}/projected_mosaics/webmercator/{{year}}/{{site}}/{{flight}}_projected.tif" output: f"{working_dir}/mapbox/{{year}}/{{site}}/{{flight}}.mbtiles" - conda: + uv: "mbtilesenv" shell: "python mbtile.py {input} {config[mapbox-param]}" @@ -133,7 +133,7 @@ rule upload_mapbox: f"{working_dir}/mapbox/{{year}}/{{site}}/{{flight}}.mbtiles" output: touch(f"{working_dir}/mapbox/last_uploaded/{{year}}/{{site}}/{{flight}}.mbtiles") - conda: + uv: "everwatch" shell: "python upload_mapbox.py {input}" diff --git a/config.yaml b/config.yaml new file mode 100644 index 0000000..0bae2ea --- /dev/null +++ b/config.yaml @@ -0,0 +1,95 @@ +# Config file for DeepForest pytorch module + +# Cpu workers for data loaders +# Dataloaders +workers: 0 +devices: auto +accelerator: auto +batch_size: 1 + +# Model Architecture +architecture: "retinanet" +num_classes: 7 +nms_thresh: 0.05 +score_thresh: 0.1 + +# Set model name to None to initialize from scratch +model: + name: "weecology/deepforest-everglades-bird-species-detector" + revision: "main" + +label_dict: + Tree: 0 + +# Pre-processing parameters +path_to_raster: +patch_size: 400 +patch_overlap: 0.05 +annotations_xml: +rgb_dir: +path_to_rgb: + +train: + csv_file: + root_dir: + + # Optimizer initial learning rate + lr: 0.001 + + # Data augmentations for training + # Augmentations must be a list of augmentation names, or a list + # of dicts with augmentation names and parameters. + # Examples: + # augmentations: + # - HorizontalFlip: {p: 0.5} + # - Downscale: {scale_min: 0.25, scale_max: 0.75, p: 0.5} + augmentations: + - HorizontalFlip: { p: 0.5 } + scheduler: + type: + params: + # Common parameters + T_max: 10 + eta_min: 0.00001 + lr_lambda: "0.95 ** epoch" # For lambdaLR and multiplicativeLR + step_size: 30 # For stepLR + gamma: 0.1 # For stepLR, multistepLR, and exponentialLR + milestones: [50, 100] # For multistepLR + + # ReduceLROnPlateau parameters (used if type is not explicitly mentioned) + mode: "min" + factor: 0.1 + patience: 10 + threshold: 0.0001 + threshold_mode: "rel" + cooldown: 0 + min_lr: 0 + eps: 0.00000001 + + # How many epochs to run for + epochs: 1 + # Useful debugging flag in pytorch lightning, set to True to get a single batch of training to test settings. + fast_dev_run: False + # preload images to GPU memory for fast training. This depends on GPU size and number of images. + preload_images: False + +validation: + csv_file: + root_dir: + preload_images: False + size: + + # For retinanet you may prefer val_classification, but the default val_loss + # should work with all models + lr_plateau_target: val_loss + + # Intersection over union evaluation + iou_threshold: 0.4 + val_accuracy_interval: 20 + + # Data augmentation is none by default for validation, but you can specify: + # augmentations: + # if you need to apply augmentations during the val stage. + +predict: + pin_memory: False diff --git a/deepforest_config.yml b/deepforest_config.yml deleted file mode 100644 index 30211be..0000000 --- a/deepforest_config.yml +++ /dev/null @@ -1,40 +0,0 @@ -# Config file for DeepForest pytorch module - -# Cpu workers for data loaders -# Dataloaders -workers: 1 -devices: 1 -accelerator: 'gpu' -batch_size: 1 - -# Model Architecture -architecture: 'retinanet' -num_classes: 1 -nms_thresh: 0.05 - -# Architecture specific params -retinanet: - # Non-max supression of overlapping predictions - score_thresh: 0.1 - -train: - csv_file: - root_dir: - - # Optimizer initial learning rate - lr: 0.001 - - # Print loss every n epochs - epochs: 1 - # Useful debugging flag in pytorch lightning, set to True to get a single batch of training to test settings. - fast_dev_run: False - # pin images to GPU memory for fast training. This depends on GPU size and number of images. - preload_images: False - -validation: - # callback args - csv_file: - root_dir: - # Intersection over union evaluation - iou_threshold: 0.4 - val_accuracy_interval: 20 \ No newline at end of file diff --git a/environment.yml b/environment.yml index ba6d358..dacbb48 100644 --- a/environment.yml +++ b/environment.yml @@ -1,37 +1,34 @@ name: everwatch -channels: - - defaults +channels: - conda-forge + - defaults dependencies: - python=3.11 - pip - - pytorch::pytorch - - pytorch::torchvision - - nvidia::cudatoolkit + - pyyaml - dask - distributed - - bioconda::snakemake - pandas - rasterio - geopandas - shapely>=2.0 + - pytorch::pytorch + - pytorch::torchvision + - nvidia::cudatoolkit + - bioconda::snakemake + - opencv - pip: - - deepforest - - dask_jobqueue - - panoptescli - - comet_ml - - paramiko - - panoptes_client - - slidingwindow - - xmltodict - - Panoptes - - PyYAML==6.0 - - pytest - - yapf - - black - - pytest-cov - - pytest-xdist - - pdoc - - rio-mbtiles - - boto3 - \ No newline at end of file + - git+https://github.com/weecology/DeepForest.git@main + - dask_jobqueue + - comet_ml + - paramiko + - slidingwindow + - xmltodict + - pytest + - yapf + - black + - pytest-cov + - pytest-xdist + - pdoc + - rio-mbtiles + - boto3 diff --git a/environment_mbtiles.yml b/environment_mbtiles.yml index 1968466..59647e6 100644 --- a/environment_mbtiles.yml +++ b/environment_mbtiles.yml @@ -1,36 +1,11 @@ name: mbtilesenv -channels: - - defaults - - conda-forge +channels: + - defaults + - conda-forge dependencies: - - python=3.11 - - pip - - pytorch::pytorch - - pytorch::torchvision - - nvidia::cudatoolkit - - dask - - distributed - - bioconda::snakemake - - pandas - - rasterio - - geopandas - - pip: - - deepforest - - dask_jobqueue - - panoptescli - - comet_ml - - paramiko - - panoptes_client - - slidingwindow - - xmltodict - - Panoptes - - PyYAML==6.0 - - pytest - - yapf - - black - - pytest-cov - - pytest-xdist - - pdoc - - rio-mbtiles - - boto3 - - shapely==1.7.0 + - python=3.11 + - pip + - rasterio + - pip: + - rio-mbtiles + - shapely==1.7.0 diff --git a/everglades_dryrun_workflow.sh b/everglades_dryrun_workflow.sh index a0e7a59..64e1215 100644 --- a/everglades_dryrun_workflow.sh +++ b/everglades_dryrun_workflow.sh @@ -4,7 +4,7 @@ #SBATCH --mail-type=FAIL #SBATCH --cpus-per-task=3 #SBATCH --mem=20gb -#SBATCH --time=01:30:00 +#SBATCH --time=03:30:00 #SBATCH --gpus=1 #SBATCH --partition=hpg-b200 #SBATCH --output=/blue/ewhite/everglades/everwatch-workflow/logs/everglades_dryrun_workflow.out @@ -19,6 +19,8 @@ source /etc/profile.d/modules.sh ml conda conda activate everwatch +export PYTHONNOUSERSITE=1 + export TEST_ENV=True cd /blue/ewhite/everglades/everwatch-workflow/ diff --git a/everglades_workflow.sh b/everglades_workflow.sh index 734d3a2..51927f7 100644 --- a/everglades_workflow.sh +++ b/everglades_workflow.sh @@ -4,9 +4,8 @@ #SBATCH --mail-type=FAIL #SBATCH --cpus-per-task=30 #SBATCH --mem=250gb -#SBATCH --gpus=5 -#SBATCH --time=100:00:00 -#SBATCH --partition=hpg-b200 +#SBATCH --gpus=2 +#SBATCH --time=60:00:00 #SBATCH --output=/blue/ewhite/everglades/everwatch-workflow/logs/everglades_workflow.out #SBATCH --error=/blue/ewhite/everglades/everwatch-workflow/logs/everglades_workflow.err @@ -19,7 +18,7 @@ source /etc/profile.d/modules.sh ml conda conda activate everwatch - +export PYTHONNOUSERSITE=1 cd /blue/ewhite/everglades/everwatch-workflow/ snakemake --unlock diff --git a/predict.py b/predict.py index 589ae73..7bb631a 100644 --- a/predict.py +++ b/predict.py @@ -5,19 +5,20 @@ import geopandas import pandas as pd import rasterio -import shapely import torch from deepforest import main from deepforest.utilities import boxes_to_shapefile +import PIL.Image +PIL.Image.MAX_IMAGE_PIXELS = None def run(proj_tile_path, savedir="."): """Apply trained model to a drone tile""" model = main.deepforest() - model.load_model("weecology/everglades-bird-species-detector") + #model.load_model("weecology/everglades-bird-species-detector") - boxes = model.predict_tile(raster_path=proj_tile_path, patch_overlap=0, patch_size=1500) + boxes = model.predict_tile(path=proj_tile_path, patch_overlap=0, patch_size=1500) proj_tile_dir = os.path.dirname(proj_tile_path) projected_boxes = boxes_to_shapefile(boxes, proj_tile_dir) if not os.path.exists(savedir):