@@ -2675,7 +2675,6 @@ def generate_streaming(tools, functions, function_call, prompt):
26752675 usage = completion ["usage" ],
26762676 )
26772677
2678-
26792678class Llava15ChatHandler :
26802679 DEFAULT_SYSTEM_MESSAGE : Optional [str ] = (
26812680 "A chat between a curious human and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the human's questions."
@@ -2716,9 +2715,9 @@ class Llava15ChatHandler:
27162715 "{% endif %}"
27172716 )
27182717
2719- def __init__ (self , clip_model_path : str , llama_model : Optional [ llama . Llama ] = None , verbose : bool = True ):
2718+ def __init__ (self , clip_model_path : str , verbose : bool = True ):
27202719 import llama_cpp .mtmd_cpp as mtmd_cpp
2721-
2720+
27222721 self .clip_model_path = clip_model_path
27232722 self .verbose = verbose
27242723 self ._mtmd_cpp = mtmd_cpp
@@ -2763,15 +2762,6 @@ def mtmd_free():
27632762
27642763 self ._exit_stack .callback (mtmd_free )
27652764
2766- def __call__ (self , * args , ** kwargs ):
2767- if self .clip_ctx is None :
2768- # Initialize MTMD context with the llama model from the first argument
2769- if len (args ) > 0 and isinstance (args [0 ], llama .Llama ):
2770- self .initialize_mtmd_context (args [0 ])
2771- else :
2772- raise ValueError ("MTMD context not initialized. Please call initialize_mtmd_context with a llama model first." )
2773- return super ().__call__ (* args , ** kwargs )
2774-
27752765 def load_image (self , image_url : str ) -> bytes :
27762766 return self ._load_image (image_url )
27772767
@@ -3056,26 +3046,6 @@ def __call__(
30563046 )
30573047 return _convert_completion_to_chat (completion_or_chunks , stream = stream )
30583048
3059- def eval_image (self , llama : llama .Llama , image_url : str ):
3060- image_bytes = self .load_image (image_url )
3061- embed = self ._embed_image_bytes (image_bytes , llama .context_params .n_threads_batch )
3062- if llama .n_tokens + embed .contents .n_image_pos > llama .n_ctx ():
3063- raise ValueError (
3064- f"Prompt exceeds n_ctx: { llama .n_tokens + embed .contents .n_image_pos } > { llama .n_ctx ()} "
3065- )
3066- n_past = ctypes .c_int (llama .n_tokens )
3067- n_past_p = ctypes .pointer (n_past )
3068- with suppress_stdout_stderr (disable = self .verbose ):
3069- self ._mtmd_cpp .mtmd_cpp_eval_image_embed (
3070- llama .ctx ,
3071- embed ,
3072- llama .n_batch ,
3073- n_past_p ,
3074- )
3075- # Required to avoid issues with hf tokenizer
3076- llama .input_ids [llama .n_tokens : n_past .value ] = - 1
3077- llama .n_tokens = n_past .value
3078-
30793049 @staticmethod
30803050 def _load_image (image_url : str ) -> bytes :
30813051 # TODO: Add Pillow support for other image formats beyond (jpg, png)
0 commit comments