Skip to content

Commit 4702a97

Browse files
committed
fix: propagate ocr_elements, paddle_ocr_config, element_config in Ruby binding
Add missing ocr_elements field conversion in extraction_result_to_ruby and complete OCR config parsing (paddle_ocr_config, element_config, output_format) in the Ruby native extension.
1 parent f24752c commit 4702a97

File tree

2 files changed

+43
-0
lines changed

2 files changed

+43
-0
lines changed

packages/ruby/ext/kreuzberg_rb/native/src/config/types.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,35 @@ pub fn parse_ocr_config(ruby: &Ruby, hash: RHash) -> Result<OcrConfig, Error> {
5858
config.tesseract_config = Some(parsed);
5959
}
6060

61+
if let Some(val) = get_kw(ruby, hash, "paddle_ocr_config")
62+
&& !val.is_nil()
63+
{
64+
config.paddle_ocr_config = Some(ruby_value_to_json(val)?);
65+
}
66+
67+
if let Some(val) = get_kw(ruby, hash, "element_config")
68+
&& !val.is_nil()
69+
{
70+
let ec_json = ruby_value_to_json(val)?;
71+
let parsed: kreuzberg::types::OcrElementConfig =
72+
serde_json::from_value(ec_json).map_err(|e| runtime_error(format!("Invalid element_config: {}", e)))?;
73+
config.element_config = Some(parsed);
74+
}
75+
76+
if let Some(val) = get_kw(ruby, hash, "output_format")
77+
&& !val.is_nil()
78+
{
79+
let format_str = symbol_to_string(val)?;
80+
let format: OutputFormat = match format_str.as_str() {
81+
"plain" | "Plain" => OutputFormat::Plain,
82+
"markdown" | "Markdown" => OutputFormat::Markdown,
83+
"djot" | "Djot" => OutputFormat::Djot,
84+
"html" | "Html" => OutputFormat::Html,
85+
other => return Err(runtime_error(format!("Invalid ocr output_format: '{}'", other))),
86+
};
87+
config.output_format = Some(format);
88+
}
89+
6190
Ok(config)
6291
}
6392

packages/ruby/ext/kreuzberg_rb/native/src/result.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,20 @@ pub fn extraction_result_to_ruby(ruby: &Ruby, result: RustExtractionResult) -> R
324324
set_hash_entry(ruby, &hash, "elements", ruby.qnil().as_value())?;
325325
}
326326

327+
// Convert ocr_elements
328+
if let Some(ocr_elements) = result.ocr_elements {
329+
let elements_array = ruby.ary_new();
330+
for elem in ocr_elements {
331+
let elem_json = serde_json::to_value(&elem)
332+
.map_err(|e| runtime_error(format!("Failed to serialize ocr_element: {}", e)))?;
333+
let elem_ruby = json_value_to_ruby(ruby, &elem_json)?;
334+
elements_array.push(elem_ruby)?;
335+
}
336+
set_hash_entry(ruby, &hash, "ocr_elements", elements_array.into_value_with(ruby))?;
337+
} else {
338+
set_hash_entry(ruby, &hash, "ocr_elements", ruby.qnil().as_value())?;
339+
}
340+
327341
// Convert document structure
328342
if let Some(doc_structure) = result.document {
329343
let document_hash = ruby.hash_new();

0 commit comments

Comments
 (0)