Skip to content

Commit d3cd6cb

Browse files
committed
update the fixedWidth intrinsic to inherit any type information
1 parent 4a0b5ed commit d3cd6cb

File tree

3 files changed

+25
-7
lines changed

3 files changed

+25
-7
lines changed

tessellate-main/src/main/antora/modules/reference/pages/transforms.adoc

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ NOTE: Many more functions are planned.
8383

8484
Built-in functions on fields can be applied to one or more fields in every tuple in the tuple stream.
8585

86-
`tsid`:: create a unique id as a long or string (using https://github.com/f4b6a3/tsid-creator)
86+
`tsid`:: Create a unique id as a long or string (using https://github.com/f4b6a3/tsid-creator).
8787
Def:::
8888
`^tsid{node:...,nodeCount:...,epoch:...,format:...,counterToZero:...} +> intoField|type`
8989
`type`:::: must be `string` or `long`, defaults to `long`.
@@ -105,7 +105,8 @@ Placeholders:::::
105105
- `%z`: base-62
106106
`counterToZero`:::: Resets the counter portion when the millisecond changes, defaults to `false`.
107107

108-
`fixedWidth`:: pads a row/tuple to a fixed width by inserting nulls at a given index
108+
`fixedWidth`:: Pads a row/tuple to a fixed width by inserting nulls at a given index.
109+
If any, type information is inherited from the current fields.
109110
Def:::
110111
- `^fixedWidth{width:...,insertAt:...} ->` - replace ALL fields with the fixed with result
111112
- `^fixedWidth{width:...,insertAt:...} -> new_field|type + new_field2|type + etc` - name the new fields
@@ -114,7 +115,7 @@ Params:::
114115
`width`:::: The width of the row/tuple, defaults to size of the result fields.
115116
`insertAt`:::: The index to begin inserting the null padding, defaults to `-1` (last element).
116117

117-
`toJson`:: converts a row/tuple to a JSON string
118+
`toJson`:: Converts a row/tuple to a JSON string.
118119
Def:::
119120
- `^toJson{} ->` - replace ALL fields with the JSON string with the default field name `json`
120121
- `from_field1 + from_field2 ^toJson{} +> node` - add the arguments to a new json object named `node`
@@ -123,7 +124,7 @@ Def:::
123124
Def:::
124125
- `^fromJson{} -> to_field1 + to_field2` - the fields names must match the json properties at the root node
125126

126-
`formatFields`:: reformats all the argument fields to a new format.
127+
`formatFields`:: Reformats all the argument fields to a new format.
127128
This is especially useful for remaining compatible with Apache Parquet.
128129
Def:::
129130
- `^formatFields{format:...} ->` - replace ALL fields with the formatted result

tessellate-main/src/main/java/io/clusterless/tessellate/pipeline/intrinsic/FixedWidthIntrinsic.java

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ public Result create(Fields currentFields, Operation operation) {
4949
throw new IllegalArgumentException("result fields width must match fixed width");
5050
}
5151

52+
// currentFields is equivalent to Field.ALL so we can safely copy over the type information
53+
if (!toFields.hasTypes() && currentFields.hasTypes()) {
54+
toFields = toFields.applyTypes(currentFields.getTypes());
55+
}
56+
5257
FixedWidthFunction function = new FixedWidthFunction(toFields, width, insertAt);
5358

5459
return new Result(Fields.ALL, function, toFields);

tessellate-main/src/test/java/io/clusterless/tessellate/pipeline/PipelineTest.java

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
import io.clusterless.tessellate.util.Format;
2222
import io.clusterless.tessellate.util.URIs;
2323
import io.clusterless.tessellate.util.json.JSONUtil;
24+
import org.jetbrains.annotations.NotNull;
2425
import org.junit.jupiter.api.Test;
2526
import org.junit.jupiter.api.extension.ExtendWith;
2627

@@ -92,7 +93,7 @@ void badWidth(@PathForResource("/data/delimited-variable-width.csv") URI input,
9293
"^fixedWidth{ width:5, insertAt:3 } ->"
9394
);
9495

95-
fixedWidthBase(input, output, transform);
96+
fixedWidthBase(input, output, transform, List.of());
9697
}
9798

9899
@Test
@@ -101,19 +102,30 @@ void badWidthWithFields(@PathForResource("/data/delimited-variable-width.csv") U
101102
"^fixedWidth{ width:5, insertAt:3 } -> _0+_1+_2+_3+_4"
102103
);
103104

104-
fixedWidthBase(input, output, transform);
105+
fixedWidthBase(input, output, transform, List.of());
105106
}
106107

107-
private static void fixedWidthBase(URI input, URI output, Transform transform) throws IOException {
108+
@Test
109+
void badWidthWithFieldsAndTypes(@PathForResource("/data/delimited-variable-width.csv") URI input, @PathForOutput URI output) throws IOException {
110+
Transform transform = new Transform(
111+
"^fixedWidth{ width:5, insertAt:3 } -> _0+_1+_2+_3+_4"
112+
);
113+
114+
fixedWidthBase(input, output, transform, Field.asField("a|string", "b|string", "c|string", "d|string", "e|string"));
115+
}
116+
117+
private static void fixedWidthBase(URI input, URI output, Transform transform, List<@NotNull Field> declared) throws IOException {
108118
PipelineOptions pipelineOptions = new PipelineOptions();
109119

110120
PipelineDef def = PipelineDef.builder()
111121
.withName("test")
112122
.withSource(Source.builder()
113123
.withInputs(List.of(input))
114124
.withSchema(Schema.builder()
125+
.withDeclared(declared)
115126
.withFormat(Format.csv)
116127
.withEmbedsSchema(false)
128+
.withStrictParsing(declared.isEmpty())
117129
.build())
118130
.build())
119131
.withTransform(transform)

0 commit comments

Comments
 (0)