-
|
Hi, I am new to Datafusion, and I want to use it as an in-memory DB. I have read the doc and it seems I can construct a table in which a field, say I try to modify this example https://github.com/apache/datafusion/blob/main/datafusion-examples/examples/dataframe_in_memory.rs but I don't know how to insert the data of a list of lists of strings into the table. I cannot find something useful in the documentation. Can anyone help? Or, if you have pointers to related info, that would be great! Here is my code. #[cfg(test)]
mod test {
use std::sync::Arc;
use datafusion::arrow::array::{Int32Array, ListArray, StringArray};
use datafusion::arrow::datatypes::{DataType, Field, Schema, Utf8Type};
use datafusion::arrow::record_batch::RecordBatch;
use datafusion::error::Result;
use datafusion::prelude::*;
#[tokio::test]
async fn test_datafustion() -> Result<()> {
// define a schema.
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Utf8, false),
Field::new_list("properties", Arc::new(Field::new("property", DataType::Utf8, false)), true),
]));
// define data. Error Here
let batch = RecordBatch::try_new(
schema,
vec![
Arc::new(StringArray::from(vec!["a", "b", "c", "d"])),
// according to the code sample in the doc of ListArray::from_iter_primitive, seems type mismatched
Arc::new(ListArray::from_iter_primitive(vec![
Some(vec![Some("a"), Some("a")]),
Some(vec![Some("b"), Some("b")]),
Some(vec![Some("c"), Some("c")]),
Some(vec![Some("d"), Some("d")]),
])),
],
)?;
let ctx = SessionContext::new();
ctx.register_batch("t", batch)?;
let df = ctx.table("t").await?;
// find a row in which properties contains "b"
let filter = array_has(col("properties"), lit("b"));
let df = df.select_columns(&["id", "properties"])?.filter(filter)?;
df.show().await?;
Ok(())
}
} |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment
-
|
String is not a primitive type and can't use use crate::arrow::buffer::OffsetBuffer;
async fn test_datafustion() -> Result<()> {
// define a schema.
let item_field = Arc::new(Field::new("property", DataType::Utf8, false));
let schema = Arc::new(Schema::new(vec![
Field::new("id", DataType::Utf8, false),
Field::new_list("properties", item_field.clone(), true),
]));
let string_arry = StringArray::from(vec!["a", "a", "b", "b", "c", "c", "d", "d"]);
let offsets = OffsetBuffer::new(vec![0, 2, 4, 6, 8].into());
let list_array = ListArray::new(item_field, offsets, Arc::new(string_arry), None);
// define data. Error Here
let batch = RecordBatch::try_new(
schema,
vec![
Arc::new(StringArray::from(vec!["a", "b", "c", "d"])),
Arc::new(list_array) as ArrayRef,
],
)?;
let ctx = SessionContext::new();
ctx.register_batch("t", batch)?;
let df = ctx.table("t").await?;
// find a row in which properties contains "b"
let filter = array_has(col("properties"), lit("b"));
let df = df.select_columns(&["id", "properties"])?.filter(filter)?;
df.show().await?;
Ok(())
} |
Beta Was this translation helpful? Give feedback.
String is not a primitive type and can't use
from_iter_primitive, instead, useListArray::try_new()/new()orListBuilder.