Skip to content

Commit c8f4819

Browse files
msch-nutrientclaude
andcommitted
Add duplicate_pdf_pages method for PDF page duplication
- Add duplicate_pdf_pages method to DirectAPIMixin using Build API pattern - Support flexible page selection with 0-based indexing and negative indexes - Enable page duplication by repeating indexes in page_indexes list - Add comprehensive integration tests with live API verification - Update documentation in SUPPORTED_OPERATIONS.md with examples - Follow established patterns from split_pdf implementation 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <[email protected]>
1 parent cd4c72a commit c8f4819

File tree

4 files changed

+173
-0
lines changed

4 files changed

+173
-0
lines changed

SUPPORTED_OPERATIONS.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,39 @@ client.split_pdf(
188188
pages = client.split_pdf("document.pdf")
189189
```
190190

191+
### 9. `duplicate_pdf_pages(input_file, page_indexes, output_path=None)`
192+
Duplicates specific pages within a PDF document.
193+
194+
**Parameters:**
195+
- `input_file`: PDF file to process
196+
- `page_indexes`: List of page indexes to include (0-based). Pages can be repeated for duplication. Negative indexes supported (-1 for last page)
197+
- `output_path`: Optional path to save the output file
198+
199+
**Returns:**
200+
- Processed PDF as bytes, or None if `output_path` provided
201+
202+
**Example:**
203+
```python
204+
# Duplicate first page twice, then include second page
205+
result = client.duplicate_pdf_pages(
206+
"document.pdf",
207+
page_indexes=[0, 0, 1] # Page 1, Page 1, Page 2
208+
)
209+
210+
# Include last page at beginning and end
211+
result = client.duplicate_pdf_pages(
212+
"document.pdf",
213+
page_indexes=[-1, 0, 1, 2, -1] # Last, First, Second, Third, Last
214+
)
215+
216+
# Save to specific file
217+
client.duplicate_pdf_pages(
218+
"document.pdf",
219+
page_indexes=[0, 2, 1], # Reorder: Page 1, Page 3, Page 2
220+
output_path="reordered.pdf"
221+
)
222+
```
223+
191224
## Builder API
192225

193226
The Builder API allows chaining multiple operations. Like the Direct API, it automatically converts Office documents to PDF when needed:

src/nutrient_dws/api/direct.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -317,6 +317,90 @@ def split_pdf(
317317

318318
return results if not output_paths else []
319319

320+
def duplicate_pdf_pages(
321+
self,
322+
input_file: FileInput,
323+
page_indexes: List[int],
324+
output_path: Optional[str] = None,
325+
) -> Optional[bytes]:
326+
"""Duplicate specific pages within a PDF document.
327+
328+
Creates a new PDF containing the specified pages in the order provided.
329+
Pages can be duplicated multiple times by including their index multiple times.
330+
331+
Args:
332+
input_file: Input PDF file.
333+
page_indexes: List of page indexes to include (0-based).
334+
Pages can be repeated to create duplicates.
335+
Negative indexes are supported (-1 for last page).
336+
output_path: Optional path to save the output file.
337+
338+
Returns:
339+
Processed PDF as bytes, or None if output_path is provided.
340+
341+
Raises:
342+
AuthenticationError: If API key is missing or invalid.
343+
APIError: For other API errors.
344+
ValueError: If page_indexes is empty.
345+
346+
Examples:
347+
# Duplicate first page twice, then include second page
348+
result = client.duplicate_pdf_pages(
349+
"document.pdf",
350+
page_indexes=[0, 0, 1] # Page 1, Page 1, Page 2
351+
)
352+
353+
# Include last page at beginning and end
354+
result = client.duplicate_pdf_pages(
355+
"document.pdf",
356+
page_indexes=[-1, 0, 1, 2, -1] # Last, First, Second, Third, Last
357+
)
358+
359+
# Save to specific file
360+
client.duplicate_pdf_pages(
361+
"document.pdf",
362+
page_indexes=[0, 2, 1], # Reorder: Page 1, Page 3, Page 2
363+
output_path="reordered.pdf"
364+
)
365+
"""
366+
from nutrient_dws.file_handler import prepare_file_for_upload, save_file_output
367+
368+
# Validate inputs
369+
if not page_indexes:
370+
raise ValueError("page_indexes cannot be empty")
371+
372+
# Prepare file for upload
373+
file_field, file_data = prepare_file_for_upload(input_file, "file")
374+
files = {file_field: file_data}
375+
376+
# Build parts for each page index
377+
parts = []
378+
for page_index in page_indexes:
379+
if page_index < 0:
380+
# For negative indexes, use the index directly (API supports negative indexes)
381+
parts.append({"file": "file", "pages": {"start": page_index, "end": page_index}})
382+
else:
383+
# For positive indexes, create single-page range
384+
parts.append({"file": "file", "pages": {"start": page_index, "end": page_index}})
385+
386+
# Build instructions for duplication
387+
instructions = {"parts": parts, "actions": []}
388+
389+
# Make API request
390+
# Type checking: at runtime, self is NutrientClient which has _http_client
391+
result = self._http_client.post( # type: ignore[attr-defined]
392+
"/build",
393+
files=files,
394+
json_data=instructions,
395+
)
396+
397+
# Handle output
398+
if output_path:
399+
save_file_output(result, output_path)
400+
return None
401+
else:
402+
return result # type: ignore[no-any-return]
403+
320404
def merge_pdfs(
321405
self,
322406
input_files: List[FileInput],

tests/integration/test_live_api.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,3 +159,58 @@ def test_split_pdf_single_page_default(self, client, sample_pdf_path):
159159

160160
# Verify result is a valid PDF
161161
assert_is_pdf(result[0])
162+
163+
def test_duplicate_pdf_pages_basic(self, client, sample_pdf_path):
164+
"""Test duplicate_pdf_pages method with basic duplication."""
165+
# Test duplicating first page twice
166+
result = client.duplicate_pdf_pages(sample_pdf_path, page_indexes=[0, 0])
167+
168+
assert isinstance(result, bytes)
169+
assert len(result) > 0
170+
171+
# Verify result is a valid PDF
172+
assert_is_pdf(result)
173+
174+
def test_duplicate_pdf_pages_reorder(self, client, sample_pdf_path):
175+
"""Test duplicate_pdf_pages method with page reordering."""
176+
# Test reordering pages (assumes sample PDF has at least 2 pages)
177+
result = client.duplicate_pdf_pages(sample_pdf_path, page_indexes=[1, 0])
178+
179+
assert isinstance(result, bytes)
180+
assert len(result) > 0
181+
182+
# Verify result is a valid PDF
183+
assert_is_pdf(result)
184+
185+
def test_duplicate_pdf_pages_with_output_file(self, client, sample_pdf_path, tmp_path):
186+
"""Test duplicate_pdf_pages method saving to output file."""
187+
output_path = str(tmp_path / "duplicated.pdf")
188+
189+
# Test duplicating and saving to file
190+
result = client.duplicate_pdf_pages(
191+
sample_pdf_path, page_indexes=[0, 0, 1], output_path=output_path
192+
)
193+
194+
# Should return None when saving to file
195+
assert result is None
196+
197+
# Check that output file was created
198+
assert (tmp_path / "duplicated.pdf").exists()
199+
assert (tmp_path / "duplicated.pdf").stat().st_size > 0
200+
assert_is_pdf(output_path)
201+
202+
def test_duplicate_pdf_pages_negative_indexes(self, client, sample_pdf_path):
203+
"""Test duplicate_pdf_pages method with negative indexes."""
204+
# Test using negative indexes (last page)
205+
result = client.duplicate_pdf_pages(sample_pdf_path, page_indexes=[-1, 0, -1])
206+
207+
assert isinstance(result, bytes)
208+
assert len(result) > 0
209+
210+
# Verify result is a valid PDF
211+
assert_is_pdf(result)
212+
213+
def test_duplicate_pdf_pages_empty_indexes_error(self, client, sample_pdf_path):
214+
"""Test duplicate_pdf_pages method with empty page_indexes raises error."""
215+
with pytest.raises(ValueError, match="page_indexes cannot be empty"):
216+
client.duplicate_pdf_pages(sample_pdf_path, page_indexes=[])

tests/unit/test_client.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ def test_client_has_direct_api_methods():
6868
assert hasattr(client, "apply_redactions")
6969
assert hasattr(client, "merge_pdfs")
7070
assert hasattr(client, "split_pdf")
71+
assert hasattr(client, "duplicate_pdf_pages")
7172

7273

7374
def test_client_context_manager():

0 commit comments

Comments
 (0)