Add duplicate_pdf_pages method for PDF page duplication

msch-nutrient · claude · msch-nutrient · commit c8f4819a6101 · 2025-06-19T21:17:24.000+02:00
- Add duplicate_pdf_pages method to DirectAPIMixin using Build API pattern - Support flexible page selection with 0-based indexing and negative indexes - Enable page duplication by repeating indexes in page_indexes list - Add comprehensive integration tests with live API verification - Update documentation in SUPPORTED_OPERATIONS.md with examples - Follow established patterns from split_pdf implementation 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
diff --git a/SUPPORTED_OPERATIONS.md b/SUPPORTED_OPERATIONS.md
@@ -188,6 +188,39 @@ client.split_pdf(
 pages = client.split_pdf("document.pdf")
 ```
 
+### 9. `duplicate_pdf_pages(input_file, page_indexes, output_path=None)`
+Duplicates specific pages within a PDF document.
+
+**Parameters:**
+- `input_file`: PDF file to process
+- `page_indexes`: List of page indexes to include (0-based). Pages can be repeated for duplication. Negative indexes supported (-1 for last page)
+- `output_path`: Optional path to save the output file
+
+**Returns:**
+- Processed PDF as bytes, or None if `output_path` provided
+
+**Example:**
+```python
+# Duplicate first page twice, then include second page
+result = client.duplicate_pdf_pages(
+    "document.pdf", 
+    page_indexes=[0, 0, 1]  # Page 1, Page 1, Page 2
+)
+
+# Include last page at beginning and end
+result = client.duplicate_pdf_pages(
+    "document.pdf",
+    page_indexes=[-1, 0, 1, 2, -1]  # Last, First, Second, Third, Last
+)
+
+# Save to specific file
+client.duplicate_pdf_pages(
+    "document.pdf",
+    page_indexes=[0, 2, 1],  # Reorder: Page 1, Page 3, Page 2
+    output_path="reordered.pdf"
+)
+```
+
 ## Builder API
 
 The Builder API allows chaining multiple operations. Like the Direct API, it automatically converts Office documents to PDF when needed:
diff --git a/src/nutrient_dws/api/direct.py b/src/nutrient_dws/api/direct.py
@@ -317,6 +317,90 @@ def split_pdf(
 
         return results if not output_paths else []
 
+    def duplicate_pdf_pages(
+        self,
+        input_file: FileInput,
+        page_indexes: List[int],
+        output_path: Optional[str] = None,
+    ) -> Optional[bytes]:
+        """Duplicate specific pages within a PDF document.
+
+        Creates a new PDF containing the specified pages in the order provided.
+        Pages can be duplicated multiple times by including their index multiple times.
+
+        Args:
+            input_file: Input PDF file.
+            page_indexes: List of page indexes to include (0-based).
+                         Pages can be repeated to create duplicates.
+                         Negative indexes are supported (-1 for last page).
+            output_path: Optional path to save the output file.
+
+        Returns:
+            Processed PDF as bytes, or None if output_path is provided.
+
+        Raises:
+            AuthenticationError: If API key is missing or invalid.
+            APIError: For other API errors.
+            ValueError: If page_indexes is empty.
+
+        Examples:
+            # Duplicate first page twice, then include second page
+            result = client.duplicate_pdf_pages(
+                "document.pdf",
+                page_indexes=[0, 0, 1]  # Page 1, Page 1, Page 2
+            )
+
+            # Include last page at beginning and end
+            result = client.duplicate_pdf_pages(
+                "document.pdf",
+                page_indexes=[-1, 0, 1, 2, -1]  # Last, First, Second, Third, Last
+            )
+
+            # Save to specific file
+            client.duplicate_pdf_pages(
+                "document.pdf",
+                page_indexes=[0, 2, 1],  # Reorder: Page 1, Page 3, Page 2
+                output_path="reordered.pdf"
+            )
+        """
+        from nutrient_dws.file_handler import prepare_file_for_upload, save_file_output
+
+        # Validate inputs
+        if not page_indexes:
+            raise ValueError("page_indexes cannot be empty")
+
+        # Prepare file for upload
+        file_field, file_data = prepare_file_for_upload(input_file, "file")
+        files = {file_field: file_data}
+
+        # Build parts for each page index
+        parts = []
+        for page_index in page_indexes:
+            if page_index < 0:
+                # For negative indexes, use the index directly (API supports negative indexes)
+                parts.append({"file": "file", "pages": {"start": page_index, "end": page_index}})
+            else:
+                # For positive indexes, create single-page range
+                parts.append({"file": "file", "pages": {"start": page_index, "end": page_index}})
+
+        # Build instructions for duplication
+        instructions = {"parts": parts, "actions": []}
+
+        # Make API request
+        # Type checking: at runtime, self is NutrientClient which has _http_client
+        result = self._http_client.post(  # type: ignore[attr-defined]
+            "/build",
+            files=files,
+            json_data=instructions,
+        )
+
+        # Handle output
+        if output_path:
+            save_file_output(result, output_path)
+            return None
+        else:
+            return result  # type: ignore[no-any-return]
+
     def merge_pdfs(
         self,
         input_files: List[FileInput],
diff --git a/tests/integration/test_live_api.py b/tests/integration/test_live_api.py
@@ -159,3 +159,58 @@ def test_split_pdf_single_page_default(self, client, sample_pdf_path):
 
         # Verify result is a valid PDF
         assert_is_pdf(result[0])
+
+    def test_duplicate_pdf_pages_basic(self, client, sample_pdf_path):
+        """Test duplicate_pdf_pages method with basic duplication."""
+        # Test duplicating first page twice
+        result = client.duplicate_pdf_pages(sample_pdf_path, page_indexes=[0, 0])
+
+        assert isinstance(result, bytes)
+        assert len(result) > 0
+
+        # Verify result is a valid PDF
+        assert_is_pdf(result)
+
+    def test_duplicate_pdf_pages_reorder(self, client, sample_pdf_path):
+        """Test duplicate_pdf_pages method with page reordering."""
+        # Test reordering pages (assumes sample PDF has at least 2 pages)
+        result = client.duplicate_pdf_pages(sample_pdf_path, page_indexes=[1, 0])
+
+        assert isinstance(result, bytes)
+        assert len(result) > 0
+
+        # Verify result is a valid PDF
+        assert_is_pdf(result)
+
+    def test_duplicate_pdf_pages_with_output_file(self, client, sample_pdf_path, tmp_path):
+        """Test duplicate_pdf_pages method saving to output file."""
+        output_path = str(tmp_path / "duplicated.pdf")
+
+        # Test duplicating and saving to file
+        result = client.duplicate_pdf_pages(
+            sample_pdf_path, page_indexes=[0, 0, 1], output_path=output_path
+        )
+
+        # Should return None when saving to file
+        assert result is None
+
+        # Check that output file was created
+        assert (tmp_path / "duplicated.pdf").exists()
+        assert (tmp_path / "duplicated.pdf").stat().st_size > 0
+        assert_is_pdf(output_path)
+
+    def test_duplicate_pdf_pages_negative_indexes(self, client, sample_pdf_path):
+        """Test duplicate_pdf_pages method with negative indexes."""
+        # Test using negative indexes (last page)
+        result = client.duplicate_pdf_pages(sample_pdf_path, page_indexes=[-1, 0, -1])
+
+        assert isinstance(result, bytes)
+        assert len(result) > 0
+
+        # Verify result is a valid PDF
+        assert_is_pdf(result)
+
+    def test_duplicate_pdf_pages_empty_indexes_error(self, client, sample_pdf_path):
+        """Test duplicate_pdf_pages method with empty page_indexes raises error."""
+        with pytest.raises(ValueError, match="page_indexes cannot be empty"):
+            client.duplicate_pdf_pages(sample_pdf_path, page_indexes=[])
diff --git a/tests/unit/test_client.py b/tests/unit/test_client.py
@@ -68,6 +68,7 @@ def test_client_has_direct_api_methods():
     assert hasattr(client, "apply_redactions")
     assert hasattr(client, "merge_pdfs")
     assert hasattr(client, "split_pdf")
+    assert hasattr(client, "duplicate_pdf_pages")
 
 
 def test_client_context_manager():