Skip to content

Commit d03a639

Browse files
authored
feat: strip U+0000 in writing to Postgres to overcome its limitation (#937)
* feat: strip U+0000 in writing to Postgres to overcome its limitation * docs: document the behavior of stripping 0
1 parent 9bbf299 commit d03a639

File tree

4 files changed

+621
-10
lines changed

4 files changed

+621
-10
lines changed

docs/docs/ops/targets.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ For all other vector types, we map them to `jsonb` columns.
3939

4040
:::
4141

42+
:::info U+0000 (NUL) characters in strings
43+
44+
U+0000 (NUL) is a valid character in Unicode, but Postgres has a limitation that strings (including `text`-like types and strings in `jsonb`) cannot contain them.
45+
CocoIndex automatically strips U+0000 (NUL) characters from strings before exporting to Postgres. For example, if you have a string `"Hello\0World"`, it will be exported as `"HelloWorld"`.
46+
47+
:::
48+
4249
#### Spec
4350

4451
The spec takes the following fields:

src/ops/targets/postgres.rs

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ fn bind_value_field<'arg>(
4343
builder.push_bind(&**v);
4444
}
4545
BasicValue::Str(v) => {
46-
builder.push_bind(&**v);
46+
builder.push_bind(utils::str_sanitize::ZeroCodeStrippedEncode(v.as_ref()));
4747
}
4848
BasicValue::Bool(v) => {
4949
builder.push_bind(v);
@@ -82,7 +82,9 @@ fn bind_value_field<'arg>(
8282
builder.push_bind(v);
8383
}
8484
BasicValue::Json(v) => {
85-
builder.push_bind(sqlx::types::Json(&**v));
85+
builder.push_bind(sqlx::types::Json(
86+
utils::str_sanitize::ZeroCodeStrippedSerialize(&**v),
87+
));
8688
}
8789
BasicValue::Vector(v) => match &field_schema.value_type.typ {
8890
ValueType::Basic(BasicValueType::Vector(vs)) if convertible_to_pgvector(vs) => {
@@ -104,20 +106,24 @@ fn bind_value_field<'arg>(
104106
}
105107
},
106108
BasicValue::UnionVariant { .. } => {
107-
builder.push_bind(sqlx::types::Json(TypedValue {
108-
t: &field_schema.value_type.typ,
109-
v: value,
110-
}));
109+
builder.push_bind(sqlx::types::Json(
110+
utils::str_sanitize::ZeroCodeStrippedSerialize(TypedValue {
111+
t: &field_schema.value_type.typ,
112+
v: value,
113+
}),
114+
));
111115
}
112116
},
113117
Value::Null => {
114118
builder.push("NULL");
115119
}
116120
v => {
117-
builder.push_bind(sqlx::types::Json(TypedValue {
118-
t: &field_schema.value_type.typ,
119-
v,
120-
}));
121+
builder.push_bind(sqlx::types::Json(
122+
utils::str_sanitize::ZeroCodeStrippedSerialize(TypedValue {
123+
t: &field_schema.value_type.typ,
124+
v,
125+
}),
126+
));
121127
}
122128
};
123129
Ok(())

src/utils/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ pub mod db;
33
pub mod fingerprint;
44
pub mod immutable;
55
pub mod retryable;
6+
pub mod str_sanitize;
67
pub mod yaml_ser;

0 commit comments

Comments
 (0)