Review round 1

fellhorn · fellhorn · commit d02546562355 · 2025-08-13T23:33:24.000+02:00
* Remove unsafe Send, Sync
* Cleanup error handling
* Use default mtmd_context directly

Signed-off-by: Dennis Keck &lt;26092524+fellhorn@users.noreply.github.com&gt;
diff --git a/examples/mtmd/src/mtmd.rs b/examples/mtmd/src/mtmd.rs
@@ -10,7 +10,9 @@ use llama_cpp_2::context::params::LlamaContextParams;
 use llama_cpp_2::context::LlamaContext;
 use llama_cpp_2::llama_batch::LlamaBatch;
 use llama_cpp_2::model::params::LlamaModelParams;
-use llama_cpp_2::mtmd::{MtmdBitmap, MtmdBitmapError, MtmdContext, MtmdContextParams, MtmdInputText};
+use llama_cpp_2::mtmd::{
+    MtmdBitmap, MtmdBitmapError, MtmdContext, MtmdContextParams, MtmdInputText,
+};
 
 use llama_cpp_2::llama_backend::LlamaBackend;
 use llama_cpp_2::model::{LlamaChatMessage, LlamaChatTemplate, LlamaModel, Special};
diff --git a/llama-cpp-2/src/mtmd.rs b/llama-cpp-2/src/mtmd.rs
@@ -77,28 +77,42 @@ pub struct MtmdContextParams {
 
 impl Default for MtmdContextParams {
     fn default() -> Self {
-        Self {
-            use_gpu: false,
-            print_timings: true,
-            n_threads: 4,
-            media_marker: CString::new(mtmd_default_marker()).unwrap_or_default(),
-        }
+        unsafe { llama_cpp_sys_2::mtmd_context_params_default() }.into()
     }
 }
 
 impl From<&MtmdContextParams> for llama_cpp_sys_2::mtmd_context_params {
     fn from(params: &MtmdContextParams) -> Self {
         let mut context = unsafe { llama_cpp_sys_2::mtmd_context_params_default() };
-
-        context.use_gpu = params.use_gpu;
-        context.print_timings = params.print_timings;
-        context.n_threads = params.n_threads;
-        context.media_marker = params.media_marker.as_ptr();
+        let MtmdContextParams {
+            use_gpu,
+            print_timings,
+            n_threads,
+            media_marker,
+        } = params;
+
+        context.use_gpu = *use_gpu;
+        context.print_timings = *print_timings;
+        context.n_threads = *n_threads;
+        context.media_marker = media_marker.as_ptr();
 
         context
     }
 }
 
+impl From<llama_cpp_sys_2::mtmd_context_params> for MtmdContextParams {
+    fn from(params: llama_cpp_sys_2::mtmd_context_params) -> Self {
+        Self {
+            use_gpu: params.use_gpu,
+            print_timings: params.print_timings,
+            n_threads: params.n_threads,
+            media_marker: unsafe { CStr::from_ptr(params.media_marker) }
+                .to_owned()
+                .into(),
+        }
+    }
+}
+
 /// Text input configuration
 ///
 /// # Examples
@@ -165,40 +179,41 @@ impl MtmdContext {
             )
         };
 
-        if context.is_null() {
-            return Err(MtmdInitError::NullResult);
-        }
-
         let context = NonNull::new(context).ok_or(MtmdInitError::NullResult)?;
         Ok(Self { context })
     }
 
     /// Check whether non-causal attention mask is needed before `llama_decode`.
-    #[must_use] pub fn decode_use_non_causal(&self) -> bool {
+    #[must_use]
+    pub fn decode_use_non_causal(&self) -> bool {
         unsafe { llama_cpp_sys_2::mtmd_decode_use_non_causal(self.context.as_ptr()) }
     }
 
     /// Check whether the current model uses M-RoPE for `llama_decode`.
     ///
     /// M-RoPE (Multimodal Rotary Position Embedding) affects how positions
     /// are calculated for multimodal inputs.
-    #[must_use] pub fn decode_use_mrope(&self) -> bool {
+    #[must_use]
+    pub fn decode_use_mrope(&self) -> bool {
         unsafe { llama_cpp_sys_2::mtmd_decode_use_mrope(self.context.as_ptr()) }
     }
 
     /// Check whether the current model supports vision input.
-    #[must_use] pub fn support_vision(&self) -> bool {
+    #[must_use]
+    pub fn support_vision(&self) -> bool {
         unsafe { llama_cpp_sys_2::mtmd_support_vision(self.context.as_ptr()) }
     }
 
     /// Check whether the current model supports audio input.
-    #[must_use] pub fn support_audio(&self) -> bool {
+    #[must_use]
+    pub fn support_audio(&self) -> bool {
         unsafe { llama_cpp_sys_2::mtmd_support_audio(self.context.as_ptr()) }
     }
 
     /// Get audio bitrate in Hz (e.g., 16000 for Whisper).
     /// Returns -1 if audio is not supported.
-    #[must_use] pub fn get_audio_bitrate(&self) -> i32 {
+    #[must_use]
+    pub fn get_audio_bitrate(&self) -> i32 {
         unsafe { llama_cpp_sys_2::mtmd_get_audio_bitrate(self.context.as_ptr()) }
     }
 
@@ -243,7 +258,7 @@ impl MtmdContext {
         bitmaps: &[&MtmdBitmap],
     ) -> Result<MtmdInputChunks, MtmdTokenizeError> {
         let chunks = MtmdInputChunks::new();
-        let text_cstring = CString::new(text.text).unwrap_or_default();
+        let text_cstring = CString::new(text.text)?;
         let input_text = llama_cpp_sys_2::mtmd_input_text {
             text: text_cstring.as_ptr(),
             add_special: text.add_special,
@@ -304,9 +319,6 @@ impl MtmdContext {
     }
 }
 
-unsafe impl Send for MtmdContext {}
-unsafe impl Sync for MtmdContext {}
-
 impl Drop for MtmdContext {
     fn drop(&mut self) {
         unsafe { llama_cpp_sys_2::mtmd_free(self.context.as_ptr()) }
@@ -471,43 +483,48 @@ impl MtmdBitmap {
     }
 
     /// Get bitmap width in pixels.
-    #[must_use] pub fn nx(&self) -> u32 {
+    #[must_use]
+    pub fn nx(&self) -> u32 {
         unsafe { llama_cpp_sys_2::mtmd_bitmap_get_nx(self.bitmap.as_ptr()) }
     }
 
     /// Get bitmap height in pixels.
-    #[must_use] pub fn ny(&self) -> u32 {
+    #[must_use]
+    pub fn ny(&self) -> u32 {
         unsafe { llama_cpp_sys_2::mtmd_bitmap_get_ny(self.bitmap.as_ptr()) }
     }
 
     /// Get bitmap data as a byte slice.
     ///
     /// For images: RGB format with length `nx * ny * 3`
     /// For audio: PCM F32 format with length `n_samples * 4`
-    #[must_use] pub fn data(&self) -> &[u8] {
+    #[must_use]
+    pub fn data(&self) -> &[u8] {
         let ptr = unsafe { llama_cpp_sys_2::mtmd_bitmap_get_data(self.bitmap.as_ptr()) };
         let len = unsafe { llama_cpp_sys_2::mtmd_bitmap_get_n_bytes(self.bitmap.as_ptr()) };
         unsafe { slice::from_raw_parts(ptr, len) }
     }
 
     /// Check if this bitmap contains audio data (vs image data).
-    #[must_use] pub fn is_audio(&self) -> bool {
+    #[must_use]
+    pub fn is_audio(&self) -> bool {
         unsafe { llama_cpp_sys_2::mtmd_bitmap_is_audio(self.bitmap.as_ptr()) }
     }
 
     /// Get the bitmap's optional ID string.
     ///
     /// Bitmap ID is useful for KV cache tracking and can e.g. be calculated
     /// based on a hash of the bitmap data.
-    #[must_use] pub fn id(&self) -> Option<String> {
+    #[must_use]
+    pub fn id(&self) -> Option<String> {
         let ptr = unsafe { llama_cpp_sys_2::mtmd_bitmap_get_id(self.bitmap.as_ptr()) };
         if ptr.is_null() {
             None
         } else {
-            unsafe { CStr::from_ptr(ptr) }
+            let id = unsafe { CStr::from_ptr(ptr) }
                 .to_string_lossy()
-                .into_owned()
-                .into()
+                .into_owned();
+            Some(id)
         }
     }
 
@@ -580,24 +597,28 @@ impl MtmdInputChunks {
     /// assert_eq!(chunks.len(), 0);
     /// assert!(chunks.is_empty());
     /// ```
-    #[must_use] pub fn new() -> Self {
+    #[must_use]
+    pub fn new() -> Self {
         let chunks = unsafe { llama_cpp_sys_2::mtmd_input_chunks_init() };
         let chunks = NonNull::new(chunks).unwrap();
         Self { chunks }
     }
 
     /// Get the number of chunks
-    #[must_use] pub fn len(&self) -> usize {
+    #[must_use]
+    pub fn len(&self) -> usize {
         unsafe { llama_cpp_sys_2::mtmd_input_chunks_size(self.chunks.as_ptr()) }
     }
 
     /// Check if chunks collection is empty
-    #[must_use] pub fn is_empty(&self) -> bool {
+    #[must_use]
+    pub fn is_empty(&self) -> bool {
         self.len() == 0
     }
 
     /// Get a chunk by index
-    #[must_use] pub fn get(&self, index: usize) -> Option<MtmdInputChunk> {
+    #[must_use]
+    pub fn get(&self, index: usize) -> Option<MtmdInputChunk> {
         if index >= self.len() {
             return None;
         }
@@ -619,15 +640,17 @@ impl MtmdInputChunks {
     /// Get total number of tokens across all chunks.
     ///
     /// This is useful for keeping track of KV cache size.
-    #[must_use] pub fn total_tokens(&self) -> usize {
+    #[must_use]
+    pub fn total_tokens(&self) -> usize {
         unsafe { llama_cpp_sys_2::mtmd_helper_get_n_tokens(self.chunks.as_ptr()) }
     }
 
     /// Get total position count across all chunks.
     ///
     /// This is useful to keep track of `n_past`. Normally `n_pos` equals `n_tokens`,
     /// but for M-RoPE it is different.
-    #[must_use] pub fn total_positions(&self) -> i32 {
+    #[must_use]
+    pub fn total_positions(&self) -> i32 {
         unsafe { llama_cpp_sys_2::mtmd_helper_get_n_pos(self.chunks.as_ptr()) }
     }
 
@@ -709,7 +732,8 @@ pub struct MtmdInputChunk {
 
 impl MtmdInputChunk {
     /// Get the type of this chunk
-    #[must_use] pub fn chunk_type(&self) -> MtmdInputChunkType {
+    #[must_use]
+    pub fn chunk_type(&self) -> MtmdInputChunkType {
         let chunk_type = unsafe { llama_cpp_sys_2::mtmd_input_chunk_get_type(self.chunk.as_ptr()) };
         MtmdInputChunkType::from(chunk_type)
     }
@@ -721,7 +745,8 @@ impl MtmdInputChunk {
     /// # Returns
     ///
     /// Returns `Some(&[LlamaToken])` for text chunks, `None` otherwise.
-    #[must_use] pub fn text_tokens(&self) -> Option<&[LlamaToken]> {
+    #[must_use]
+    pub fn text_tokens(&self) -> Option<&[LlamaToken]> {
         if self.chunk_type() != MtmdInputChunkType::Text {
             return None;
         }
@@ -744,21 +769,24 @@ impl MtmdInputChunk {
     }
 
     /// Get the number of tokens in this chunk
-    #[must_use] pub fn n_tokens(&self) -> usize {
+    #[must_use]
+    pub fn n_tokens(&self) -> usize {
         unsafe { llama_cpp_sys_2::mtmd_input_chunk_get_n_tokens(self.chunk.as_ptr()) }
     }
 
     /// Get the number of positions in this chunk.
     ///
     /// Returns the number of temporal positions (always 1 for M-RoPE, `n_tokens` otherwise).
-    #[must_use] pub fn n_positions(&self) -> i32 {
+    #[must_use]
+    pub fn n_positions(&self) -> i32 {
         unsafe { llama_cpp_sys_2::mtmd_input_chunk_get_n_pos(self.chunk.as_ptr()) }
     }
 
     /// Get chunk ID if available.
     ///
     /// Returns `None` for text chunks, may return an ID for image/audio chunks.
-    #[must_use] pub fn id(&self) -> Option<String> {
+    #[must_use]
+    pub fn id(&self) -> Option<String> {
         let ptr = unsafe { llama_cpp_sys_2::mtmd_input_chunk_get_id(self.chunk.as_ptr()) };
         if ptr.is_null() {
             None
@@ -819,7 +847,8 @@ impl Drop for MtmdInputChunk {
 /// let text = format!("Describe this image: {}", marker);
 /// assert!(text.contains(marker));
 /// ```
-#[must_use] pub fn mtmd_default_marker() -> &'static str {
+#[must_use]
+pub fn mtmd_default_marker() -> &'static str {
     unsafe {
         let c_str = llama_cpp_sys_2::mtmd_default_marker();
         CStr::from_ptr(c_str).to_str().unwrap_or("<__media__>")
@@ -877,6 +906,9 @@ pub enum MtmdTokenizeError {
     /// Image preprocessing error occurred
     #[error("Image preprocessing error")]
     ImagePreprocessingError,
+    /// Text contains characters that cannot be converted to C string
+    #[error("Failed to create CString from text: {0}")]
+    CStringError(#[from] std::ffi::NulError),
     /// Unknown error occurred during tokenization
     #[error("Unknown error: {0}")]
     UnknownError(i32),
diff --git a/llama-cpp-sys-2/build.rs b/llama-cpp-sys-2/build.rs
@@ -277,8 +277,8 @@ fn main() {
     // Configure mtmd feature if enabled
     if cfg!(feature = "mtmd") {
         bindings_builder = bindings_builder
-        .allowlist_function("mtmd_.*")
-        .allowlist_type("mtmd_.*");
+            .allowlist_function("mtmd_.*")
+            .allowlist_type("mtmd_.*");
     }
 
     // Configure Android-specific bindgen settings