feat: add image file support for watermark_pdf method

jdrhyne · jdrhyne · commit 5ae537118e6a · 2025-06-24T22:27:14.000-04:00
- Add image_file parameter to watermark_pdf() for local image uploads - Support path strings, bytes, or file-like objects as image input - Update builder API to handle image file watermarks - Add comprehensive unit and integration tests - Update documentation with examples - Maintain backward compatibility with text and URL watermarks Closes #11
diff --git a/README.md b/README.md
@@ -128,6 +128,28 @@ client.watermark_pdf(
     opacity=0.5,
     position="center"
 )
+
+# Add image watermark from URL
+client.watermark_pdf(
+    input_file="document.pdf",
+    output_path="watermarked.pdf",
+    image_url="https://example.com/logo.png",
+    width=150,
+    height=75,
+    opacity=0.8,
+    position="bottom-right"
+)
+
+# Add image watermark from local file (NEW!)
+client.watermark_pdf(
+    input_file="document.pdf",
+    output_path="watermarked.pdf",
+    image_file="logo.png",  # Can be path, bytes, or file-like object
+    width=150,
+    height=75,
+    opacity=0.8,
+    position="bottom-right"
+)
 ```
 
 ## Builder API Examples
@@ -150,6 +172,17 @@ result = client.build(input_file="raw-scan.pdf") \
         optimize=True
     ) \
     .execute(output_path="final.pdf")
+
+# Using image file in builder API
+result = client.build(input_file="document.pdf") \
+    .add_step("watermark-pdf", {
+        "image_file": "company-logo.png",  # Local file
+        "width": 100,
+        "height": 50,
+        "opacity": 0.5,
+        "position": "bottom-left"
+    }) \
+    .execute()
 ```
 
 ## File Input Options
diff --git a/issue_comments.md b/issue_comments.md
@@ -0,0 +1,59 @@
+# Issue Comments for PR #7
+
+## For Issue #3: Add support for missing Nutrient DWS API tools
+
+**Status**: Partially addressed by PR #7
+
+PR #7 implements 5 of the high-priority PDF processing tools from this issue:
+- ✅ split_pdf - Split PDF into multiple files by page ranges
+- ✅ duplicate_pdf_pages - Duplicate and reorder specific pages  
+- ✅ delete_pdf_pages - Delete specific pages from PDFs
+- ✅ add_page - Add blank pages to PDFs
+- ✅ set_page_label - Set page labels/numbering
+
+Once merged, the library will expand from 7 to 12 Direct API methods.
+
+---
+
+## For Issue #15: Feature: Extract Page Range Method
+
+**Status**: Addressed by PR #7's split_pdf implementation
+
+The `split_pdf()` method in PR #7 provides the functionality requested:
+
+```python
+# Extract pages 5-10 (0-based indexing)
+result = client.split_pdf(
+    "document.pdf",
+    page_ranges=[{"start": 4, "end": 10}]
+)
+
+# Extract from page 10 to end
+result = client.split_pdf(
+    "document.pdf", 
+    page_ranges=[{"start": 9}]  # Omit 'end' to go to end of document
+)
+```
+
+While the method name is `split_pdf` rather than `extract_pages`, it provides the exact functionality described in this issue:
+- Single range extraction ✅
+- Support for "to end" extraction ✅
+- Clear error messages for invalid ranges ✅
+- Memory efficient implementation ✅
+
+Consider closing this issue once PR #7 is merged.
+
+---
+
+## PR #7 Summary
+
+**Title**: feat: integrate fork features with comprehensive Direct API methods
+
+**New Methods**:
+1. `split_pdf()` - Split PDFs by page ranges (addresses issue #15)
+2. `duplicate_pdf_pages()` - Duplicate and reorder pages
+3. `delete_pdf_pages()` - Remove specific pages
+4. `add_page()` - Insert blank pages
+5. `set_page_label()` - Apply page labels
+
+**Status**: All CI checks passing ✅ Ready for merge\!
diff --git a/src/nutrient_dws/api/direct.py b/src/nutrient_dws/api/direct.py
@@ -159,6 +159,7 @@ def watermark_pdf(
         output_path: str | None = None,
         text: str | None = None,
         image_url: str | None = None,
+        image_file: FileInput | None = None,
         width: int = 200,
         height: int = 100,
         opacity: float = 1.0,
@@ -172,8 +173,10 @@ def watermark_pdf(
         Args:
             input_file: Input file (PDF or Office document).
             output_path: Optional path to save the output file.
-            text: Text to use as watermark. Either text or image_url required.
+            text: Text to use as watermark. One of text, image_url, or image_file required.
             image_url: URL of image to use as watermark.
+            image_file: Local image file to use as watermark (path, bytes, or file-like object).
+                       Supported formats: PNG, JPEG, TIFF.
             width: Width of the watermark in points (required).
             height: Height of the watermark in points (required).
             opacity: Opacity of the watermark (0.0 to 1.0).
@@ -187,11 +190,57 @@ def watermark_pdf(
         Raises:
             AuthenticationError: If API key is missing or invalid.
             APIError: For other API errors.
-            ValueError: If neither text nor image_url is provided.
+            ValueError: If none of text, image_url, or image_file is provided.
         """
-        if not text and not image_url:
-            raise ValueError("Either text or image_url must be provided")
+        if not text and not image_url and not image_file:
+            raise ValueError("Either text, image_url, or image_file must be provided")
 
+        # For image file uploads, we need to use the builder directly
+        if image_file:
+            from nutrient_dws.file_handler import prepare_file_for_upload, save_file_output
+
+            # Prepare files for upload
+            files = {}
+
+            # Main PDF file
+            file_field, file_data = prepare_file_for_upload(input_file, "file")
+            files[file_field] = file_data
+
+            # Watermark image file
+            image_field, image_data = prepare_file_for_upload(image_file, "watermark")
+            files[image_field] = image_data
+
+            # Build instructions with watermark action
+            action = {
+                "type": "watermark",
+                "width": width,
+                "height": height,
+                "opacity": opacity,
+                "position": position,
+                "image": "watermark"  # Reference to the uploaded image file
+            }
+
+            instructions = {
+                "parts": [{"file": "file"}],
+                "actions": [action]
+            }
+
+            # Make API request
+            # Type checking: at runtime, self is NutrientClient which has _http_client
+            result = self._http_client.post(  # type: ignore[attr-defined]
+                "/build",
+                files=files,
+                json_data=instructions,
+            )
+
+            # Handle output
+            if output_path:
+                save_file_output(result, output_path)
+                return None
+            else:
+                return result  # type: ignore[no-any-return]
+
+        # For text and URL watermarks, use the existing _process_file approach
         options = {
             "width": width,
             "height": height,
diff --git a/src/nutrient_dws/builder.py b/src/nutrient_dws/builder.py
@@ -211,6 +211,14 @@ def _map_tool_to_action(self, tool: str, options: dict[str, Any]) -> dict[str, A
                     action["text"] = options["text"]
                 elif "image_url" in options:
                     action["image"] = {"url": options["image_url"]}  # type: ignore
+                elif "image_file" in options:
+                    # Handle image file upload
+                    image_file = options["image_file"]
+                    # Add the image as a file part
+                    watermark_name = f"watermark_{len(self._files)}"
+                    self._files[watermark_name] = image_file
+                    # Reference the uploaded file
+                    action["image"] = watermark_name  # type: ignore
                 else:
                     # Default to text watermark if neither specified
                     action["text"] = "WATERMARK"
diff --git a/tests/integration/test_watermark_image_file_integration.py b/tests/integration/test_watermark_image_file_integration.py
@@ -0,0 +1,196 @@
+"""Integration tests for image file watermark functionality."""
+
+import os
+from typing import Optional
+
+import pytest
+
+from nutrient_dws import NutrientClient
+
+try:
+    from . import integration_config  # type: ignore[attr-defined]
+
+    API_KEY: Optional[str] = integration_config.API_KEY
+    BASE_URL: Optional[str] = getattr(integration_config, "BASE_URL", None)
+    TIMEOUT: int = getattr(integration_config, "TIMEOUT", 60)
+except ImportError:
+    API_KEY = None
+    BASE_URL = None
+    TIMEOUT = 60
+
+
+def assert_is_pdf(file_path_or_bytes):
+    """Assert that a file or bytes is a valid PDF."""
+    if isinstance(file_path_or_bytes, str):
+        with open(file_path_or_bytes, "rb") as f:
+            content = f.read(8)
+    else:
+        content = file_path_or_bytes[:8]
+
+    assert content.startswith(b"%PDF-"), (
+        f"File does not start with PDF magic number, got: {content!r}"
+    )
+
+
+def create_test_image(tmp_path, filename="watermark.png"):
+    """Create a simple test PNG image."""
+    # PNG header for a 1x1 transparent pixel
+    png_data = (
+        b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01'
+        b'\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\rIDATx\x9cc\xf8\x0f'
+        b'\x00\x00\x01\x01\x00\x00\xcb\xd6\x8e\n\x00\x00\x00\x00IEND\xaeB`\x82'
+    )
+
+    image_path = tmp_path / filename
+    image_path.write_bytes(png_data)
+    return str(image_path)
+
+
+@pytest.mark.skipif(not API_KEY, reason="No API key configured in integration_config.py")
+class TestWatermarkImageFileIntegration:
+    """Integration tests for image file watermark functionality."""
+
+    @pytest.fixture
+    def client(self):
+        """Create a client with the configured API key."""
+        client = NutrientClient(api_key=API_KEY, timeout=TIMEOUT)
+        yield client
+        client.close()
+
+    @pytest.fixture
+    def sample_pdf_path(self):
+        """Get path to sample PDF file for testing."""
+        return os.path.join(os.path.dirname(__file__), "..", "data", "sample.pdf")
+
+    def test_watermark_pdf_with_image_file_path(self, client, sample_pdf_path, tmp_path):
+        """Test watermark_pdf with local image file path."""
+        # Create a test image
+        image_path = create_test_image(tmp_path)
+
+        result = client.watermark_pdf(
+            sample_pdf_path,
+            image_file=image_path,
+            width=100,
+            height=50,
+            opacity=0.5,
+            position="bottom-right"
+        )
+
+        assert isinstance(result, bytes)
+        assert len(result) > 0
+        assert_is_pdf(result)
+
+    def test_watermark_pdf_with_image_bytes(self, client, sample_pdf_path):
+        """Test watermark_pdf with image as bytes."""
+        # PNG header for a 1x1 transparent pixel
+        png_bytes = (
+            b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01'
+            b'\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\rIDATx\x9cc\xf8\x0f'
+            b'\x00\x00\x01\x01\x00\x00\xcb\xd6\x8e\n\x00\x00\x00\x00IEND\xaeB`\x82'
+        )
+
+        result = client.watermark_pdf(
+            sample_pdf_path,
+            image_file=png_bytes,
+            width=150,
+            height=75,
+            opacity=0.8,
+            position="top-left"
+        )
+
+        assert isinstance(result, bytes)
+        assert len(result) > 0
+        assert_is_pdf(result)
+
+    def test_watermark_pdf_with_image_file_output_path(self, client, sample_pdf_path, tmp_path):
+        """Test watermark_pdf with image file saving to output path."""
+        # Create a test image
+        image_path = create_test_image(tmp_path)
+        output_path = str(tmp_path / "watermarked_with_image.pdf")
+
+        result = client.watermark_pdf(
+            sample_pdf_path,
+            image_file=image_path,
+            width=200,
+            height=100,
+            opacity=0.7,
+            position="center",
+            output_path=output_path
+        )
+
+        assert result is None
+        assert (tmp_path / "watermarked_with_image.pdf").exists()
+        assert (tmp_path / "watermarked_with_image.pdf").stat().st_size > 0
+        assert_is_pdf(output_path)
+
+    def test_watermark_pdf_with_file_like_object(self, client, sample_pdf_path, tmp_path):
+        """Test watermark_pdf with image as file-like object."""
+        # Create a test image
+        image_path = create_test_image(tmp_path)
+
+        # Read as file-like object
+        with open(image_path, "rb") as image_file:
+            result = client.watermark_pdf(
+                sample_pdf_path,
+                image_file=image_file,
+                width=120,
+                height=60,
+                opacity=0.6,
+                position="top-center"
+            )
+
+        assert isinstance(result, bytes)
+        assert len(result) > 0
+        assert_is_pdf(result)
+
+    def test_builder_api_with_image_file_watermark(self, client, sample_pdf_path, tmp_path):
+        """Test Builder API with image file watermark."""
+        # Create a test image
+        image_path = create_test_image(tmp_path)
+
+        # Use builder API
+        result = (
+            client.build(sample_pdf_path)
+            .add_step("watermark-pdf", options={
+                "image_file": image_path,
+                "width": 180,
+                "height": 90,
+                "opacity": 0.4,
+                "position": "bottom-left"
+            })
+            .execute()
+        )
+
+        assert isinstance(result, bytes)
+        assert len(result) > 0
+        assert_is_pdf(result)
+
+    def test_multiple_watermarks_with_image_files(self, client, sample_pdf_path, tmp_path):
+        """Test applying multiple watermarks including image files."""
+        # Create test images
+        image1_path = create_test_image(tmp_path, "watermark1.png")
+
+        # Chain multiple watermark operations
+        result = (
+            client.build(sample_pdf_path)
+            .add_step("watermark-pdf", options={
+                "text": "DRAFT",
+                "width": 200,
+                "height": 100,
+                "opacity": 0.3,
+                "position": "center"
+            })
+            .add_step("watermark-pdf", options={
+                "image_file": image1_path,
+                "width": 100,
+                "height": 50,
+                "opacity": 0.5,
+                "position": "top-right"
+            })
+            .execute()
+        )
+
+        assert isinstance(result, bytes)
+        assert len(result) > 0
+        assert_is_pdf(result)
+
diff --git a/tests/unit/test_direct_api.py b/tests/unit/test_direct_api.py
diff --git a/tests/unit/test_watermark_image_file.py b/tests/unit/test_watermark_image_file.py