Skip to content

Commit 8231773

Browse files
committed
Add sample inference script and restore _toctree
1 parent 9179302 commit 8231773

File tree

2 files changed

+85
-1
lines changed

2 files changed

+85
-1
lines changed

docs/source/en/_toctree.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -568,6 +568,6 @@
568568
title: VAE Image Processor
569569
- local: api/video_processor
570570
title: Video Processor
571-
title: Internal classes
571+
title: Internal classes
572572
title: API
573573

examples/research_projects/ip_adapter/README.md

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,90 @@ save_file(image_proj_sd, "image_proj.safetensors")
134134
save_file(ip_sd, "ip_adapter.safetensors")
135135
```
136136

137+
### Sample Inference Script using the CLIP Model
138+
139+
```python
140+
141+
import torch
142+
from safetensors.torch import load_file
143+
from transformers import CLIPProcessor, CLIPModel # Using the Hugging Face CLIP model
144+
145+
# Load model components from safetensors
146+
image_proj_ckpt = "image_proj.safetensors"
147+
ip_adapter_ckpt = "ip_adapter.safetensors"
148+
149+
# Load the saved weights
150+
image_proj_sd = load_file(image_proj_ckpt)
151+
ip_adapter_sd = load_file(ip_adapter_ckpt)
152+
153+
# Define the model Parameters
154+
class ImageProjectionModel(torch.nn.Module):
155+
def __init__(self, input_dim=768, output_dim=512): # CLIP's default embedding size is 768
156+
super().__init__()
157+
self.model = torch.nn.Linear(input_dim, output_dim)
158+
159+
def forward(self, x):
160+
return self.model(x)
161+
162+
class IPAdapterModel(torch.nn.Module):
163+
def __init__(self, input_dim=512, output_dim=10): # Example for 10 classes
164+
super().__init__()
165+
self.model = torch.nn.Linear(input_dim, output_dim)
166+
167+
def forward(self, x):
168+
return self.model(x)
169+
170+
# Initialize models
171+
image_proj_model = ImageProjectionModel()
172+
ip_adapter_model = IPAdapterModel()
173+
174+
# Load weights into models
175+
image_proj_model.load_state_dict(image_proj_sd)
176+
ip_adapter_model.load_state_dict(ip_adapter_sd)
177+
178+
# Set models to evaluation mode
179+
image_proj_model.eval()
180+
ip_adapter_model.eval()
181+
182+
#Inference pipeline
183+
def inference(image_tensor):
184+
"""
185+
Run inference using the loaded models.
186+
187+
Args:
188+
image_tensor: Preprocessed image tensor from CLIPProcessor
189+
190+
Returns:
191+
Final inference results
192+
"""
193+
with torch.no_grad():
194+
# Step 1: Project the image features
195+
image_proj = image_proj_model(image_tensor)
196+
197+
# Step 2: Pass the projected features through the IP Adapter
198+
result = ip_adapter_model(image_proj)
199+
200+
return result
201+
202+
# Using CLIP for image preprocessing
203+
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
204+
clip_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
205+
206+
#Image file path
207+
image_path = "path/to/image.jpg"
208+
209+
# Preprocess the image
210+
inputs = processor(images=image_path, return_tensors="pt")
211+
image_features = clip_model.get_image_features(inputs["pixel_values"])
212+
213+
# Normalize the image features as per CLIP's recommendations
214+
image_features = image_features / image_features.norm(dim=-1, keepdim=True)
215+
216+
# Run inference
217+
output = inference(image_features)
218+
print("Inference output:", output)
219+
```
220+
137221
#### Parameters:
138222
- `ckpt`: Path to the trained model checkpoint file.
139223
- `map_location="cpu"`: Specifies that the model should be loaded onto the CPU.

0 commit comments

Comments
 (0)