SegmentAnyTree/run_oracle_pipeline.sh at main · SmartForest-no/SegmentAnyTree · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/bin/bash
set -e

# Set DEBUG_MODE (change this to true or false as needed, DEBUG is for local testing, false is for running on the oracle)
DEBUG_MODE=true
# DEBUG_MODE=false

# Set the path (change this to the path taken from the config file)
# if [ "$DEBUG_MODE" = true ]; then
#     PATH_DATA='/home/nibio/mutable-outside-world' #TODO: change this to the path
# else
#     PATH_DATA='/home/datascience'
# fi

PATH_DATA='/home/datascience'

# Set the input and output folders in the oracle
ORACLE_IN_DATA_FOLDER="$PATH_DATA/docker_in_folder" # This is the folder where the input data is stored on the oracle
ORACLE_OUT_DATA_FOLDER="$PATH_DATA/docker_out_folder" # This is the folder where the output data is stored on the oracle

TMP_IN_DATA_FOLDER="$PATH_DATA/tmp_in_folder" # This is the folder where the input data is stored on the oracle temporarily
TMP_OUT_DATA_FOLDER="$PATH_DATA/tmp_out_folder" # This is the folder where the output data is stored on the oracle temporarily

# Set the input and output folders which mimic the bucket
DOCKER_IN_FOLDER='/home/nibio/mutable-outside-world/bucket_in_folder' # this just mimics the input bucket
DOCKER_OUT_FOLDER='/home/nibio/mutable-outside-world/bucket_out_folder' # this just mimics the output bucket

# function to read the input from the oracle
run_oracle_wrapper_input() {
    if [ "$DEBUG_MODE" = true ]; then
        # This is mapped in the docker run
        bucket_location=${DOCKER_IN_FOLDER}
    else
        # Get the input location from the environment variable
        bucket_location=${OBJ_INPUT_LOCATION}

        # Remap the input location
        bucket_location=${bucket_location//@axqlz2potslu/}
        bucket_location=${bucket_location//oci:\/\//\/mnt\/}
    fi

    # Create the input folder if it does not exist in the docker container
    mkdir -p "$ORACLE_IN_DATA_FOLDER"

    # Copy files from bucket_location to the input folder
    shopt -s nullglob # Enable nullglob to handle empty directories
    cp -r "$bucket_location"/* "$ORACLE_IN_DATA_FOLDER"

    #check if in ORACLE_IN_DATA_FOLDER there are .zip files
    for file in "$ORACLE_IN_DATA_FOLDER"/*.zip; do
        if [ -f "$file" ]; then
            # Unzip the file
            echo "Unzipping $file"
            unzip "$file" -d "$ORACLE_IN_DATA_FOLDER"
            # Remove the zip file
            rm "$file"
        fi
    done
}

# function to write the output to the oracle
run_oracle_wrapper_output() {
    if [ "$DEBUG_MODE" = true ]; then
        # This is mapped in the docker run
        bucket_location=${DOCKER_OUT_FOLDER}
    else
        # Get the output location from the environment variable
        bucket_location=${OBJ_OUTPUT_LOCATION}

        # Remap the output location
        bucket_location=${bucket_location//@axqlz2potslu/}
        bucket_location=${bucket_location//oci:\/\//\/mnt\/}
    fi

    # Create the output folder if it does not exist in the docker container
    mkdir -p "$bucket_location"

    # make a temporary folder to store the results
    mkdir -p "$PATH_DATA/results"

    # copy the results from the oracle output folder to the temporary folder
    cp -r "$ORACLE_OUT_DATA_FOLDER/final_results/"* "$PATH_DATA/results"

    # Find and zip only the files in the specified directory, excluding subfolders
    find "$PATH_DATA/results" -maxdepth 1 -type f -exec zip "$PATH_DATA/results.zip" {} +

    # copy the zipped results to the output location
    cp "$PATH_DATA/results.zip" "$bucket_location"

    # # Zip the output folder
    # zip "$ORACLE_OUT_DATA_FOLDER/results.zip" "$ORACLE_OUT_DATA_FOLDER/final_results"/*

    # # Copy the zipped folder to the output_location
    # cp "$ORACLE_OUT_DATA_FOLDER/results.zip" "$bucket_location"
}

### Main execution ###

# Run the input script
run_oracle_wrapper_input

# Create temporary folders if they do not exist
mkdir -p "$TMP_IN_DATA_FOLDER"
mkdir -p "$TMP_OUT_DATA_FOLDER"
mkdir -p "$ORACLE_OUT_DATA_FOLDER/final_results"

# Get a list of files in ORACLE_IN_DATA_FOLDER
# Save the current IFS
OLD_IFS="$IFS"
# Change IFS to handle only newline characters
IFS=$'\n'

files=($(find "$ORACLE_IN_DATA_FOLDER" -maxdepth 1 -type f))

IFS="$OLD_IFS"

# Process files in chunks of 10
for ((i=0; i<${#files[@]}; i+=10)); do
    # Copy up to 10 files to TMP_IN_DATA_FOLDER
    cp "${files[@]:i:10}" "$TMP_IN_DATA_FOLDER/"

    # Run the inference script on these files
    bash run_inference.sh "$TMP_IN_DATA_FOLDER" "$TMP_OUT_DATA_FOLDER"

    # Copy results from TMP_OUT_DATA_FOLDER to ORACLE_OUT_DATA_FOLDER
    cp -r "$TMP_OUT_DATA_FOLDER/final_results/"* "$ORACLE_OUT_DATA_FOLDER/final_results/"

    # Clear TMP_IN_DATA_FOLDER for the next batch
    rm -rf "$TMP_IN_DATA_FOLDER"/*
done

# Run the output script
run_oracle_wrapper_output

echo "Processing complete."