Skip to content

Commit 8395a4b

Browse files
authored
Use List instead of Table for python list type #19 (#64)
* Clean up a few places that may panic. * Simplify handling for `List` type. * Use `List` instead of `Table` for python `list` type.
1 parent 0204038 commit 8395a4b

File tree

6 files changed

+55
-114
lines changed

6 files changed

+55
-114
lines changed

python/cocoindex/typing.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ def _dump_type(t, metadata):
5555
}
5656
elif dataclasses.is_dataclass(elem_type):
5757
encoded_type = {
58-
'kind': 'Table',
58+
'kind': 'List',
5959
'row': { 'fields': _dump_fields_schema(elem_type) },
6060
}
6161
else:

src/base/schema.rs

Lines changed: 17 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -3,11 +3,7 @@ use crate::builder::plan::AnalyzedValueMapping;
33
use super::spec::*;
44
use anyhow::Result;
55
use serde::{Deserialize, Serialize};
6-
use std::{
7-
collections::BTreeMap,
8-
ops::Deref,
9-
sync::{Arc, LazyLock},
10-
};
6+
use std::{collections::BTreeMap, ops::Deref, sync::Arc};
117

128
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
139
pub struct VectorTypeSchema {
@@ -127,21 +123,20 @@ pub struct CollectionSchema {
127123
impl CollectionSchema {
128124
pub fn has_key(&self) -> bool {
129125
match self.kind {
130-
CollectionKind::Collection => false,
131-
CollectionKind::Table | CollectionKind::List => true,
126+
CollectionKind::Table => true,
127+
CollectionKind::Collection | CollectionKind::List => false,
132128
}
133129
}
134130

135131
pub fn key_type(&self) -> Option<&EnrichedValueType> {
136132
match self.kind {
137-
CollectionKind::Collection => None,
138133
CollectionKind::Table => self
139134
.row
140135
.fields
141136
.first()
142137
.as_ref()
143138
.map(|field| &field.value_type),
144-
CollectionKind::List => Some(&LIST_INDEX_FIELD.value_type),
139+
CollectionKind::Collection | CollectionKind::List => None,
145140
}
146141
}
147142

@@ -172,89 +167,21 @@ impl std::fmt::Display for CollectionSchema {
172167
}
173168
}
174169

175-
pub const KEY_FIELD_NAME: &'static str = "__key";
176-
pub const VALUE_FIELD_NAME: &'static str = "__value";
177-
pub const LIST_INDEX_FIELD_NAME: &'static str = "__index";
178-
179-
pub static LIST_INDEX_FIELD: LazyLock<FieldSchema> = LazyLock::new(|| FieldSchema {
180-
name: LIST_INDEX_FIELD_NAME.to_string(),
181-
value_type: EnrichedValueType {
182-
typ: ValueType::Basic(BasicValueType::Int64),
183-
nullable: false,
184-
attrs: Default::default(),
185-
},
186-
});
187-
188170
impl CollectionSchema {
189-
pub fn new_collection(value_name: Option<String>, value: EnrichedValueType) -> Self {
190-
Self {
191-
kind: CollectionKind::Collection,
192-
row: StructSchema {
193-
fields: Arc::new(vec![FieldSchema {
194-
name: value_name.unwrap_or_else(|| VALUE_FIELD_NAME.to_string()),
195-
value_type: value,
196-
}]),
197-
},
198-
collectors: Default::default(),
199-
}
200-
}
201-
202-
pub fn new_table(
203-
key_name: Option<String>,
204-
key: EnrichedValueType,
205-
value_name: Option<String>,
206-
value: EnrichedValueType,
207-
) -> Self {
171+
pub fn new(kind: CollectionKind, fields: Vec<FieldSchema>) -> Self {
208172
Self {
209-
kind: CollectionKind::Table,
173+
kind,
210174
row: StructSchema {
211-
fields: Arc::new(vec![
212-
FieldSchema {
213-
name: key_name.unwrap_or_else(|| KEY_FIELD_NAME.to_string()),
214-
value_type: key,
215-
},
216-
FieldSchema {
217-
name: value_name.unwrap_or_else(|| VALUE_FIELD_NAME.to_string()),
218-
value_type: value,
219-
},
220-
]),
175+
fields: Arc::new(fields),
221176
},
222177
collectors: Default::default(),
223178
}
224179
}
225180

226-
pub fn new_list(value_name: Option<String>, value: EnrichedValueType) -> Self {
227-
Self {
228-
kind: CollectionKind::List,
229-
row: StructSchema {
230-
fields: Arc::new(vec![
231-
LIST_INDEX_FIELD.clone(),
232-
FieldSchema {
233-
name: value_name.unwrap_or_else(|| VALUE_FIELD_NAME.to_string()),
234-
value_type: value,
235-
},
236-
]),
237-
},
238-
collectors: Default::default(),
239-
}
240-
}
241-
242-
pub fn is_table(&self) -> bool {
243-
match self.kind {
244-
CollectionKind::Collection => false,
245-
CollectionKind::Table | CollectionKind::List => true,
246-
}
247-
}
248-
249-
pub fn is_list(&self) -> bool {
250-
self.kind == CollectionKind::List
251-
}
252-
253181
pub fn key_field<'a>(&'a self) -> Option<&'a FieldSchema> {
254-
if self.is_table() {
255-
Some(self.row.fields.first().unwrap())
256-
} else {
257-
None
182+
match self.kind {
183+
CollectionKind::Table => Some(self.row.fields.first().unwrap()),
184+
CollectionKind::Collection | CollectionKind::List => None,
258185
}
259186
}
260187
}
@@ -373,6 +300,13 @@ pub struct FieldSchema<DataType = ValueType> {
373300
}
374301

375302
impl FieldSchema {
303+
pub fn new(name: impl ToString, value_type: EnrichedValueType) -> Self {
304+
Self {
305+
name: name.to_string(),
306+
value_type,
307+
}
308+
}
309+
376310
pub fn without_attrs(&self) -> Self {
377311
Self {
378312
name: self.name.clone(),

src/execution/evaluator.rs

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ fn augmented_value(
115115
.map(|v| ScopeValueBuilder::augmented_from(v, t))
116116
.collect::<Result<Vec<_>>>()?,
117117
),
118-
(val, _) => panic!("Value kind doesn't match the type {val_type}: {val:?}"),
118+
(val, _) => bail!("Value kind doesn't match the type {val_type}: {val:?}"),
119119
};
120120
Ok(value)
121121
}
@@ -157,18 +157,19 @@ impl<'a> ScopeEntry<'a> {
157157
fn get_local_field_schema<'b>(
158158
schema: &'b schema::StructSchema,
159159
indices: &[u32],
160-
) -> &'b schema::FieldSchema {
160+
) -> Result<&'b schema::FieldSchema> {
161161
let field_idx = indices[0] as usize;
162162
let field_schema = &schema.fields[field_idx];
163-
if indices.len() == 1 {
163+
let result = if indices.len() == 1 {
164164
field_schema
165165
} else {
166166
let struct_field_schema = match &field_schema.value_type.typ {
167167
schema::ValueType::Struct(s) => s,
168-
_ => panic!("Expect struct field"),
168+
_ => bail!("Expect struct field"),
169169
};
170-
Self::get_local_field_schema(&struct_field_schema, &indices[1..])
171-
}
170+
Self::get_local_field_schema(&struct_field_schema, &indices[1..])?
171+
};
172+
Ok(result)
172173
}
173174

174175
fn get_local_key_field<'b>(
@@ -229,8 +230,14 @@ impl<'a> ScopeEntry<'a> {
229230
}
230231
}
231232

232-
fn get_field_schema(&self, field_ref: &AnalyzedLocalFieldReference) -> &schema::FieldSchema {
233-
Self::get_local_field_schema(self.schema, &field_ref.fields_idx)
233+
fn get_field_schema(
234+
&self,
235+
field_ref: &AnalyzedLocalFieldReference,
236+
) -> Result<&schema::FieldSchema> {
237+
Ok(Self::get_local_field_schema(
238+
self.schema,
239+
&field_ref.fields_idx,
240+
)?)
234241
}
235242

236243
fn define_field_w_builder(
@@ -329,7 +336,7 @@ async fn evaluate_op_scope(
329336
}
330337

331338
AnalyzedReactiveOp::ForEach(op) => {
332-
let target_field_schema = head_scope.get_field_schema(&op.local_field_ref);
339+
let target_field_schema = head_scope.get_field_schema(&op.local_field_ref)?;
333340
let collection_schema = match &target_field_schema.value_type.typ {
334341
schema::ValueType::Collection(cs) => cs,
335342
_ => panic!("Expect target field to be a collection"),

src/ops/functions/split_recursively.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -265,11 +265,12 @@ impl SimpleFunctionFactoryBase for Factory {
265265
api_bail!("Expect String as input type, got {}", t)
266266
}
267267
}
268-
Ok(make_output_type(CollectionSchema::new_table(
269-
Some("location".to_string()),
270-
make_output_type(BasicValueType::Range),
271-
Some("text".to_string()),
272-
make_output_type(BasicValueType::Str),
268+
Ok(make_output_type(CollectionSchema::new(
269+
CollectionKind::Table,
270+
vec![
271+
FieldSchema::new("location", make_output_type(BasicValueType::Range)),
272+
FieldSchema::new("text", make_output_type(BasicValueType::Str)),
273+
],
273274
))
274275
.with_attr(
275276
field_attrs::CHUNK_BASE_TEXT,

src/ops/py_factory.rs

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@ use std::{collections::BTreeMap, sync::Arc};
33
use axum::async_trait;
44
use blocking::unblock;
55
use futures::FutureExt;
6+
use log::warn;
67
use pyo3::{
78
exceptions::PyException,
89
pyclass, pymethods,
9-
types::{IntoPyDict, PyAnyMethods, PyList, PyString, PyTuple},
10+
types::{IntoPyDict, PyAnyMethods, PyString, PyTuple},
1011
Bound, IntoPyObjectExt, Py, PyAny, PyResult, Python,
1112
};
1213

@@ -156,12 +157,6 @@ fn value_from_py_object<'py>(
156157
),
157158
}
158159
}
159-
_ => {
160-
return Err(PyException::new_err(format!(
161-
"unsupported value type: {}",
162-
typ
163-
)))
164-
}
165160
}
166161
};
167162
Ok(result)

src/ops/sources/local_file.rs

Lines changed: 13 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -77,15 +77,19 @@ impl SourceFactoryBase for Factory {
7777
spec: &Spec,
7878
_context: &FlowInstanceContext,
7979
) -> Result<EnrichedValueType> {
80-
Ok(make_output_type(CollectionSchema::new_table(
81-
Some("filename".to_string()),
82-
make_output_type(BasicValueType::Str),
83-
Some("content".to_string()),
84-
make_output_type(if spec.binary {
85-
BasicValueType::Bytes
86-
} else {
87-
BasicValueType::Str
88-
}),
80+
Ok(make_output_type(CollectionSchema::new(
81+
CollectionKind::Table,
82+
vec![
83+
FieldSchema::new("filename", make_output_type(BasicValueType::Str)),
84+
FieldSchema::new(
85+
"content",
86+
make_output_type(if spec.binary {
87+
BasicValueType::Bytes
88+
} else {
89+
BasicValueType::Str
90+
}),
91+
),
92+
],
8993
)))
9094
}
9195

0 commit comments

Comments
 (0)