microsoft
diff --git a/‎.gitignore‎
Lines changed: 11 additions & 0 deletions b/‎.gitignore‎
Lines changed: 11 additions & 0 deletions
diff --git a/‎Dockerfile‎
Lines changed: 74 additions & 0 deletions b/‎Dockerfile‎
Lines changed: 74 additions & 0 deletions
diff --git a/‎client.py‎
Lines changed: 94 additions & 0 deletions b/‎client.py‎
Lines changed: 94 additions & 0 deletions
@@ -3,3 +3,14 @@ weights/icon_caption_florence
 weights/icon_detect/
 .gradio
 __pycache__
+
+# Swap files
+*.swp
+
+# Environment files
+.env
+.env.*
+
+# Environment
+venv/
+*.pem
@@ -0,0 +1,74 @@
+FROM nvidia/cuda:12.3.1-devel-ubuntu22.04
+
+# Install system dependencies with explicit OpenGL libraries
+RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \
+    git \
+    git-lfs \
+    wget \
+    libgl1 \
+    libglib2.0-0 \
+    libsm6 \
+    libxext6 \
+    libxrender1 \
+    libglu1-mesa \
+    libglib2.0-0 \
+    libsm6 \
+    libxrender1 \
+    libxext6 \
+    python3-opencv \
+    && apt-get clean \
+    && rm -rf /var/lib/apt/lists/* \
+    && git lfs install
+
+# Install Miniconda for Python 3.12
+RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh && \
+    bash miniconda.sh -b -p /opt/conda && \
+    rm miniconda.sh
+ENV PATH="/opt/conda/bin:$PATH"
+
+# Create and activate Conda environment with Python 3.12, and set it as the default
+RUN conda create -n omni python=3.12 && \
+    echo "source activate omni" > ~/.bashrc
+ENV CONDA_DEFAULT_ENV=omni
+ENV PATH="/opt/conda/envs/omni/bin:$PATH"
+
+# Set the working directory in the container
+WORKDIR /usr/src/app
+
+# Copy project files and requirements
+COPY . .
+COPY requirements.txt /usr/src/app/requirements.txt
+
+# Initialize Git LFS and pull LFS files
+RUN git lfs install && \
+    git lfs pull
+
+# Install dependencies from requirements.txt with specific opencv-python-headless version
+RUN . /opt/conda/etc/profile.d/conda.sh && conda activate omni && \
+    pip uninstall -y opencv-python opencv-python-headless && \
+    pip install --no-cache-dir opencv-python-headless==4.8.1.78 && \
+    pip install -r requirements.txt && \
+    pip install huggingface_hub
+
+# Run download.py to fetch model weights and convert safetensors to .pt format
+RUN . /opt/conda/etc/profile.d/conda.sh && conda activate omni && \
+    python download.py && \
+    echo "Contents of weights directory:" && \
+    ls -lR weights && \
+    python weights/convert_safetensor_to_pt.py
+
+# Expose the default Gradio port
+EXPOSE 7861
+
+# Configure Gradio to be accessible externally
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+
+# Copy and set permissions for entrypoint script
+COPY entrypoint.sh /usr/src/app/entrypoint.sh
+RUN chmod +x /usr/src/app/entrypoint.sh
+
+# To debug, keep the container running
+# CMD ["tail", "-f", "/dev/null"]
+
+# Set the entrypoint
+ENTRYPOINT ["/usr/src/app/entrypoint.sh"]
@@ -0,0 +1,94 @@
+"""
+This module provides a command-line interface to interact with the OmniParser Gradio server.
+
+Usage:
+    python client.py "http://<server_ip>:7861" "path/to/image.jpg"
+"""
+
+import fire
+from gradio_client import Client
+from loguru import logger
+from PIL import Image
+import base64
+from io import BytesIO
+import os
+import shutil
+
+def predict(server_url: str, image_path: str, box_threshold: float = 0.05, iou_threshold: float = 0.1):
+    """
+    Makes a prediction using the OmniParser Gradio client with the provided server URL and image.
+
+    Args:
+        server_url (str): The URL of the OmniParser Gradio server.
+        image_path (str): Path to the image file to be processed.
+        box_threshold (float): Box threshold value (default: 0.05).
+        iou_threshold (float): IOU threshold value (default: 0.1).
+    """
+    client = Client(server_url)
+    
+    # Load and encode the image
+    with open(image_path, "rb") as image_file:
+        encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
+
+    # Prepare the image input in the format expected by the server
+    image_input = {
+        "path": None,
+        "url": f"data:image/png;base64,{encoded_image}",
+        "size": None,
+        "orig_name": image_path,
+        "mime_type": "image/png",
+        "is_stream": False,
+        "meta": {}
+    }
+
+    # Make the prediction
+    try:
+        result = client.predict(
+            image_input,    # image input as dictionary
+            box_threshold,  # box_threshold
+            iou_threshold,  # iou_threshold
+            api_name="/process"
+        )
+
+        # Process and log the results
+        output_image, parsed_content = result
+        
+        logger.info("Prediction completed successfully")
+        logger.info(f"Parsed content:\n{parsed_content}")
+        
+        # Save the output image
+        output_image_path = "output_image.png"
+        if isinstance(output_image, dict) and 'url' in output_image:
+            # Handle base64 encoded image
+            img_data = base64.b64decode(output_image['url'].split(',')[1])
+            with open(output_image_path, 'wb') as f:
+                f.write(img_data)
+        elif isinstance(output_image, str):
+            if output_image.startswith('data:image'):
+                # Handle base64 encoded image string
+                img_data = base64.b64decode(output_image.split(',')[1])
+                with open(output_image_path, 'wb') as f:
+                    f.write(img_data)
+            elif os.path.exists(output_image):
+                # Handle file path
+                shutil.copy(output_image, output_image_path)
+            else:
+                logger.warning(f"Unexpected output_image format: {output_image}")
+        elif isinstance(output_image, Image.Image):
+            output_image.save(output_image_path)
+        else:
+            logger.warning(f"Unexpected output_image format: {type(output_image)}")
+            logger.warning(f"Output image content: {output_image[:100]}...")  # Log the first 100 characters
+        
+        if os.path.exists(output_image_path):
+            logger.info(f"Output image saved to: {output_image_path}")
+        else:
+            logger.warning(f"Failed to save output image to: {output_image_path}")
+    
+    except Exception as e:
+        logger.error(f"An error occurred: {str(e)}")
+        logger.exception("Traceback:")
+
+if __name__ == "__main__":
+    fire.Fire(predict)
+