@@ -134,6 +134,90 @@ save_file(image_proj_sd, "image_proj.safetensors")
134134save_file(ip_sd, " ip_adapter.safetensors" )
135135```
136136
137+ ### Sample Inference Script using the CLIP Model
138+
139+ ``` python
140+
141+ import torch
142+ from safetensors.torch import load_file
143+ from transformers import CLIPProcessor, CLIPModel # Using the Hugging Face CLIP model
144+
145+ # Load model components from safetensors
146+ image_proj_ckpt = " image_proj.safetensors"
147+ ip_adapter_ckpt = " ip_adapter.safetensors"
148+
149+ # Load the saved weights
150+ image_proj_sd = load_file(image_proj_ckpt)
151+ ip_adapter_sd = load_file(ip_adapter_ckpt)
152+
153+ # Define the model Parameters
154+ class ImageProjectionModel (torch .nn .Module ):
155+ def __init__ (self , input_dim = 768 , output_dim = 512 ): # CLIP's default embedding size is 768
156+ super ().__init__ ()
157+ self .model = torch.nn.Linear(input_dim, output_dim)
158+
159+ def forward (self , x ):
160+ return self .model(x)
161+
162+ class IPAdapterModel (torch .nn .Module ):
163+ def __init__ (self , input_dim = 512 , output_dim = 10 ): # Example for 10 classes
164+ super ().__init__ ()
165+ self .model = torch.nn.Linear(input_dim, output_dim)
166+
167+ def forward (self , x ):
168+ return self .model(x)
169+
170+ # Initialize models
171+ image_proj_model = ImageProjectionModel()
172+ ip_adapter_model = IPAdapterModel()
173+
174+ # Load weights into models
175+ image_proj_model.load_state_dict(image_proj_sd)
176+ ip_adapter_model.load_state_dict(ip_adapter_sd)
177+
178+ # Set models to evaluation mode
179+ image_proj_model.eval()
180+ ip_adapter_model.eval()
181+
182+ # Inference pipeline
183+ def inference (image_tensor ):
184+ """
185+ Run inference using the loaded models.
186+
187+ Args:
188+ image_tensor: Preprocessed image tensor from CLIPProcessor
189+
190+ Returns:
191+ Final inference results
192+ """
193+ with torch.no_grad():
194+ # Step 1: Project the image features
195+ image_proj = image_proj_model(image_tensor)
196+
197+ # Step 2: Pass the projected features through the IP Adapter
198+ result = ip_adapter_model(image_proj)
199+
200+ return result
201+
202+ # Using CLIP for image preprocessing
203+ processor = CLIPProcessor.from_pretrained(" openai/clip-vit-base-patch32" )
204+ clip_model = CLIPModel.from_pretrained(" openai/clip-vit-base-patch32" )
205+
206+ # Image file path
207+ image_path = " path/to/image.jpg"
208+
209+ # Preprocess the image
210+ inputs = processor(images = image_path, return_tensors = " pt" )
211+ image_features = clip_model.get_image_features(inputs[" pixel_values" ])
212+
213+ # Normalize the image features as per CLIP's recommendations
214+ image_features = image_features / image_features.norm(dim = - 1 , keepdim = True )
215+
216+ # Run inference
217+ output = inference(image_features)
218+ print (" Inference output:" , output)
219+ ```
220+
137221#### Parameters:
138222- ` ckpt ` : Path to the trained model checkpoint file.
139223- ` map_location="cpu" ` : Specifies that the model should be loaded onto the CPU.
0 commit comments