Skip to content

Commit 5ae5371

Browse files
committed
feat: add image file support for watermark_pdf method
- Add image_file parameter to watermark_pdf() for local image uploads - Support path strings, bytes, or file-like objects as image input - Update builder API to handle image file watermarks - Add comprehensive unit and integration tests - Update documentation with examples - Maintain backward compatibility with text and URL watermarks Closes #11
1 parent a1e2d21 commit 5ae5371

File tree

7 files changed

+568
-11
lines changed

7 files changed

+568
-11
lines changed

README.md

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,28 @@ client.watermark_pdf(
128128
opacity=0.5,
129129
position="center"
130130
)
131+
132+
# Add image watermark from URL
133+
client.watermark_pdf(
134+
input_file="document.pdf",
135+
output_path="watermarked.pdf",
136+
image_url="https://example.com/logo.png",
137+
width=150,
138+
height=75,
139+
opacity=0.8,
140+
position="bottom-right"
141+
)
142+
143+
# Add image watermark from local file (NEW!)
144+
client.watermark_pdf(
145+
input_file="document.pdf",
146+
output_path="watermarked.pdf",
147+
image_file="logo.png", # Can be path, bytes, or file-like object
148+
width=150,
149+
height=75,
150+
opacity=0.8,
151+
position="bottom-right"
152+
)
131153
```
132154

133155
## Builder API Examples
@@ -150,6 +172,17 @@ result = client.build(input_file="raw-scan.pdf") \
150172
optimize=True
151173
) \
152174
.execute(output_path="final.pdf")
175+
176+
# Using image file in builder API
177+
result = client.build(input_file="document.pdf") \
178+
.add_step("watermark-pdf", {
179+
"image_file": "company-logo.png", # Local file
180+
"width": 100,
181+
"height": 50,
182+
"opacity": 0.5,
183+
"position": "bottom-left"
184+
}) \
185+
.execute()
153186
```
154187

155188
## File Input Options

issue_comments.md

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Issue Comments for PR #7
2+
3+
## For Issue #3: Add support for missing Nutrient DWS API tools
4+
5+
**Status**: Partially addressed by PR #7
6+
7+
PR #7 implements 5 of the high-priority PDF processing tools from this issue:
8+
- ✅ split_pdf - Split PDF into multiple files by page ranges
9+
- ✅ duplicate_pdf_pages - Duplicate and reorder specific pages
10+
- ✅ delete_pdf_pages - Delete specific pages from PDFs
11+
- ✅ add_page - Add blank pages to PDFs
12+
- ✅ set_page_label - Set page labels/numbering
13+
14+
Once merged, the library will expand from 7 to 12 Direct API methods.
15+
16+
---
17+
18+
## For Issue #15: Feature: Extract Page Range Method
19+
20+
**Status**: Addressed by PR #7's split_pdf implementation
21+
22+
The `split_pdf()` method in PR #7 provides the functionality requested:
23+
24+
```python
25+
# Extract pages 5-10 (0-based indexing)
26+
result = client.split_pdf(
27+
"document.pdf",
28+
page_ranges=[{"start": 4, "end": 10}]
29+
)
30+
31+
# Extract from page 10 to end
32+
result = client.split_pdf(
33+
"document.pdf",
34+
page_ranges=[{"start": 9}] # Omit 'end' to go to end of document
35+
)
36+
```
37+
38+
While the method name is `split_pdf` rather than `extract_pages`, it provides the exact functionality described in this issue:
39+
- Single range extraction ✅
40+
- Support for "to end" extraction ✅
41+
- Clear error messages for invalid ranges ✅
42+
- Memory efficient implementation ✅
43+
44+
Consider closing this issue once PR #7 is merged.
45+
46+
---
47+
48+
## PR #7 Summary
49+
50+
**Title**: feat: integrate fork features with comprehensive Direct API methods
51+
52+
**New Methods**:
53+
1. `split_pdf()` - Split PDFs by page ranges (addresses issue #15)
54+
2. `duplicate_pdf_pages()` - Duplicate and reorder pages
55+
3. `delete_pdf_pages()` - Remove specific pages
56+
4. `add_page()` - Insert blank pages
57+
5. `set_page_label()` - Apply page labels
58+
59+
**Status**: All CI checks passing ✅ Ready for merge\!

src/nutrient_dws/api/direct.py

Lines changed: 53 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,7 @@ def watermark_pdf(
159159
output_path: str | None = None,
160160
text: str | None = None,
161161
image_url: str | None = None,
162+
image_file: FileInput | None = None,
162163
width: int = 200,
163164
height: int = 100,
164165
opacity: float = 1.0,
@@ -172,8 +173,10 @@ def watermark_pdf(
172173
Args:
173174
input_file: Input file (PDF or Office document).
174175
output_path: Optional path to save the output file.
175-
text: Text to use as watermark. Either text or image_url required.
176+
text: Text to use as watermark. One of text, image_url, or image_file required.
176177
image_url: URL of image to use as watermark.
178+
image_file: Local image file to use as watermark (path, bytes, or file-like object).
179+
Supported formats: PNG, JPEG, TIFF.
177180
width: Width of the watermark in points (required).
178181
height: Height of the watermark in points (required).
179182
opacity: Opacity of the watermark (0.0 to 1.0).
@@ -187,11 +190,57 @@ def watermark_pdf(
187190
Raises:
188191
AuthenticationError: If API key is missing or invalid.
189192
APIError: For other API errors.
190-
ValueError: If neither text nor image_url is provided.
193+
ValueError: If none of text, image_url, or image_file is provided.
191194
"""
192-
if not text and not image_url:
193-
raise ValueError("Either text or image_url must be provided")
195+
if not text and not image_url and not image_file:
196+
raise ValueError("Either text, image_url, or image_file must be provided")
194197

198+
# For image file uploads, we need to use the builder directly
199+
if image_file:
200+
from nutrient_dws.file_handler import prepare_file_for_upload, save_file_output
201+
202+
# Prepare files for upload
203+
files = {}
204+
205+
# Main PDF file
206+
file_field, file_data = prepare_file_for_upload(input_file, "file")
207+
files[file_field] = file_data
208+
209+
# Watermark image file
210+
image_field, image_data = prepare_file_for_upload(image_file, "watermark")
211+
files[image_field] = image_data
212+
213+
# Build instructions with watermark action
214+
action = {
215+
"type": "watermark",
216+
"width": width,
217+
"height": height,
218+
"opacity": opacity,
219+
"position": position,
220+
"image": "watermark" # Reference to the uploaded image file
221+
}
222+
223+
instructions = {
224+
"parts": [{"file": "file"}],
225+
"actions": [action]
226+
}
227+
228+
# Make API request
229+
# Type checking: at runtime, self is NutrientClient which has _http_client
230+
result = self._http_client.post( # type: ignore[attr-defined]
231+
"/build",
232+
files=files,
233+
json_data=instructions,
234+
)
235+
236+
# Handle output
237+
if output_path:
238+
save_file_output(result, output_path)
239+
return None
240+
else:
241+
return result # type: ignore[no-any-return]
242+
243+
# For text and URL watermarks, use the existing _process_file approach
195244
options = {
196245
"width": width,
197246
"height": height,

src/nutrient_dws/builder.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,14 @@ def _map_tool_to_action(self, tool: str, options: dict[str, Any]) -> dict[str, A
211211
action["text"] = options["text"]
212212
elif "image_url" in options:
213213
action["image"] = {"url": options["image_url"]} # type: ignore
214+
elif "image_file" in options:
215+
# Handle image file upload
216+
image_file = options["image_file"]
217+
# Add the image as a file part
218+
watermark_name = f"watermark_{len(self._files)}"
219+
self._files[watermark_name] = image_file
220+
# Reference the uploaded file
221+
action["image"] = watermark_name # type: ignore
214222
else:
215223
# Default to text watermark if neither specified
216224
action["text"] = "WATERMARK"
Lines changed: 196 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
"""Integration tests for image file watermark functionality."""
2+
3+
import os
4+
from typing import Optional
5+
6+
import pytest
7+
8+
from nutrient_dws import NutrientClient
9+
10+
try:
11+
from . import integration_config # type: ignore[attr-defined]
12+
13+
API_KEY: Optional[str] = integration_config.API_KEY
14+
BASE_URL: Optional[str] = getattr(integration_config, "BASE_URL", None)
15+
TIMEOUT: int = getattr(integration_config, "TIMEOUT", 60)
16+
except ImportError:
17+
API_KEY = None
18+
BASE_URL = None
19+
TIMEOUT = 60
20+
21+
22+
def assert_is_pdf(file_path_or_bytes):
23+
"""Assert that a file or bytes is a valid PDF."""
24+
if isinstance(file_path_or_bytes, str):
25+
with open(file_path_or_bytes, "rb") as f:
26+
content = f.read(8)
27+
else:
28+
content = file_path_or_bytes[:8]
29+
30+
assert content.startswith(b"%PDF-"), (
31+
f"File does not start with PDF magic number, got: {content!r}"
32+
)
33+
34+
35+
def create_test_image(tmp_path, filename="watermark.png"):
36+
"""Create a simple test PNG image."""
37+
# PNG header for a 1x1 transparent pixel
38+
png_data = (
39+
b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01'
40+
b'\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\rIDATx\x9cc\xf8\x0f'
41+
b'\x00\x00\x01\x01\x00\x00\xcb\xd6\x8e\n\x00\x00\x00\x00IEND\xaeB`\x82'
42+
)
43+
44+
image_path = tmp_path / filename
45+
image_path.write_bytes(png_data)
46+
return str(image_path)
47+
48+
49+
@pytest.mark.skipif(not API_KEY, reason="No API key configured in integration_config.py")
50+
class TestWatermarkImageFileIntegration:
51+
"""Integration tests for image file watermark functionality."""
52+
53+
@pytest.fixture
54+
def client(self):
55+
"""Create a client with the configured API key."""
56+
client = NutrientClient(api_key=API_KEY, timeout=TIMEOUT)
57+
yield client
58+
client.close()
59+
60+
@pytest.fixture
61+
def sample_pdf_path(self):
62+
"""Get path to sample PDF file for testing."""
63+
return os.path.join(os.path.dirname(__file__), "..", "data", "sample.pdf")
64+
65+
def test_watermark_pdf_with_image_file_path(self, client, sample_pdf_path, tmp_path):
66+
"""Test watermark_pdf with local image file path."""
67+
# Create a test image
68+
image_path = create_test_image(tmp_path)
69+
70+
result = client.watermark_pdf(
71+
sample_pdf_path,
72+
image_file=image_path,
73+
width=100,
74+
height=50,
75+
opacity=0.5,
76+
position="bottom-right"
77+
)
78+
79+
assert isinstance(result, bytes)
80+
assert len(result) > 0
81+
assert_is_pdf(result)
82+
83+
def test_watermark_pdf_with_image_bytes(self, client, sample_pdf_path):
84+
"""Test watermark_pdf with image as bytes."""
85+
# PNG header for a 1x1 transparent pixel
86+
png_bytes = (
87+
b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00\x00\x01'
88+
b'\x08\x06\x00\x00\x00\x1f\x15\xc4\x89\x00\x00\x00\rIDATx\x9cc\xf8\x0f'
89+
b'\x00\x00\x01\x01\x00\x00\xcb\xd6\x8e\n\x00\x00\x00\x00IEND\xaeB`\x82'
90+
)
91+
92+
result = client.watermark_pdf(
93+
sample_pdf_path,
94+
image_file=png_bytes,
95+
width=150,
96+
height=75,
97+
opacity=0.8,
98+
position="top-left"
99+
)
100+
101+
assert isinstance(result, bytes)
102+
assert len(result) > 0
103+
assert_is_pdf(result)
104+
105+
def test_watermark_pdf_with_image_file_output_path(self, client, sample_pdf_path, tmp_path):
106+
"""Test watermark_pdf with image file saving to output path."""
107+
# Create a test image
108+
image_path = create_test_image(tmp_path)
109+
output_path = str(tmp_path / "watermarked_with_image.pdf")
110+
111+
result = client.watermark_pdf(
112+
sample_pdf_path,
113+
image_file=image_path,
114+
width=200,
115+
height=100,
116+
opacity=0.7,
117+
position="center",
118+
output_path=output_path
119+
)
120+
121+
assert result is None
122+
assert (tmp_path / "watermarked_with_image.pdf").exists()
123+
assert (tmp_path / "watermarked_with_image.pdf").stat().st_size > 0
124+
assert_is_pdf(output_path)
125+
126+
def test_watermark_pdf_with_file_like_object(self, client, sample_pdf_path, tmp_path):
127+
"""Test watermark_pdf with image as file-like object."""
128+
# Create a test image
129+
image_path = create_test_image(tmp_path)
130+
131+
# Read as file-like object
132+
with open(image_path, "rb") as image_file:
133+
result = client.watermark_pdf(
134+
sample_pdf_path,
135+
image_file=image_file,
136+
width=120,
137+
height=60,
138+
opacity=0.6,
139+
position="top-center"
140+
)
141+
142+
assert isinstance(result, bytes)
143+
assert len(result) > 0
144+
assert_is_pdf(result)
145+
146+
def test_builder_api_with_image_file_watermark(self, client, sample_pdf_path, tmp_path):
147+
"""Test Builder API with image file watermark."""
148+
# Create a test image
149+
image_path = create_test_image(tmp_path)
150+
151+
# Use builder API
152+
result = (
153+
client.build(sample_pdf_path)
154+
.add_step("watermark-pdf", options={
155+
"image_file": image_path,
156+
"width": 180,
157+
"height": 90,
158+
"opacity": 0.4,
159+
"position": "bottom-left"
160+
})
161+
.execute()
162+
)
163+
164+
assert isinstance(result, bytes)
165+
assert len(result) > 0
166+
assert_is_pdf(result)
167+
168+
def test_multiple_watermarks_with_image_files(self, client, sample_pdf_path, tmp_path):
169+
"""Test applying multiple watermarks including image files."""
170+
# Create test images
171+
image1_path = create_test_image(tmp_path, "watermark1.png")
172+
173+
# Chain multiple watermark operations
174+
result = (
175+
client.build(sample_pdf_path)
176+
.add_step("watermark-pdf", options={
177+
"text": "DRAFT",
178+
"width": 200,
179+
"height": 100,
180+
"opacity": 0.3,
181+
"position": "center"
182+
})
183+
.add_step("watermark-pdf", options={
184+
"image_file": image1_path,
185+
"width": 100,
186+
"height": 50,
187+
"opacity": 0.5,
188+
"position": "top-right"
189+
})
190+
.execute()
191+
)
192+
193+
assert isinstance(result, bytes)
194+
assert len(result) > 0
195+
assert_is_pdf(result)
196+

0 commit comments

Comments
 (0)