Skip to content

Commit 6948929

Browse files
authored
feat(json): Add temporal formatting options when write to JSON (#8482)
# Which issue does this PR close? - Closes #8398 # Rationale for this change There is no method to overwride the temporal format options when writing to JSON. # What changes are included in this PR? **Offers a series of temporal format to overwrite the temporal field** # Are these changes tested? **I added the `with_timestamp_format` option test in the existed test. The functions of the other options are the same.** # Are there any user-facing changes? New APIs
1 parent cbf8045 commit 6948929

File tree

2 files changed

+164
-2
lines changed

2 files changed

+164
-2
lines changed

arrow-json/src/writer/encoder.rs

Lines changed: 73 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,16 @@ pub struct EncoderOptions {
3737
struct_mode: StructMode,
3838
/// An optional hook for customizing encoding behavior.
3939
encoder_factory: Option<Arc<dyn EncoderFactory>>,
40+
/// Optional date format for date arrays
41+
date_format: Option<String>,
42+
/// Optional datetime format for datetime arrays
43+
datetime_format: Option<String>,
44+
/// Optional timestamp format for timestamp arrays
45+
timestamp_format: Option<String>,
46+
/// Optional timestamp format for timestamp with timezone arrays
47+
timestamp_tz_format: Option<String>,
48+
/// Optional time format for time arrays
49+
time_format: Option<String>,
4050
}
4151

4252
impl EncoderOptions {
@@ -72,6 +82,61 @@ impl EncoderOptions {
7282
pub fn encoder_factory(&self) -> Option<&Arc<dyn EncoderFactory>> {
7383
self.encoder_factory.as_ref()
7484
}
85+
86+
/// Set the JSON file's date format
87+
pub fn with_date_format(mut self, format: String) -> Self {
88+
self.date_format = Some(format);
89+
self
90+
}
91+
92+
/// Get the JSON file's date format if set, defaults to RFC3339
93+
pub fn date_format(&self) -> Option<&str> {
94+
self.date_format.as_deref()
95+
}
96+
97+
/// Set the JSON file's datetime format
98+
pub fn with_datetime_format(mut self, format: String) -> Self {
99+
self.datetime_format = Some(format);
100+
self
101+
}
102+
103+
/// Get the JSON file's datetime format if set, defaults to RFC3339
104+
pub fn datetime_format(&self) -> Option<&str> {
105+
self.datetime_format.as_deref()
106+
}
107+
108+
/// Set the JSON file's time format
109+
pub fn with_time_format(mut self, format: String) -> Self {
110+
self.time_format = Some(format);
111+
self
112+
}
113+
114+
/// Get the JSON file's datetime time if set, defaults to RFC3339
115+
pub fn time_format(&self) -> Option<&str> {
116+
self.time_format.as_deref()
117+
}
118+
119+
/// Set the JSON file's timestamp format
120+
pub fn with_timestamp_format(mut self, format: String) -> Self {
121+
self.timestamp_format = Some(format);
122+
self
123+
}
124+
125+
/// Get the JSON file's timestamp format if set, defaults to RFC3339
126+
pub fn timestamp_format(&self) -> Option<&str> {
127+
self.timestamp_format.as_deref()
128+
}
129+
130+
/// Set the JSON file's timestamp tz format
131+
pub fn with_timestamp_tz_format(mut self, tz_format: String) -> Self {
132+
self.timestamp_tz_format = Some(tz_format);
133+
self
134+
}
135+
136+
/// Get the JSON file's timestamp tz format if set, defaults to RFC3339
137+
pub fn timestamp_tz_format(&self) -> Option<&str> {
138+
self.timestamp_tz_format.as_deref()
139+
}
75140
}
76141

77142
/// A trait to create custom encoders for specific data types.
@@ -350,8 +415,14 @@ pub fn make_encoder<'a>(
350415
// characters that would need to be escaped within a JSON string, e.g. `'"'`.
351416
// If support for user-provided format specifications is added, this assumption
352417
// may need to be revisited
353-
let options = FormatOptions::new().with_display_error(true);
354-
let formatter = ArrayFormatter::try_new(array, &options)?;
418+
let fops = FormatOptions::new().with_display_error(true)
419+
.with_date_format(options.date_format.as_deref())
420+
.with_datetime_format(options.datetime_format.as_deref())
421+
.with_timestamp_format(options.timestamp_format.as_deref())
422+
.with_timestamp_tz_format(options.timestamp_tz_format.as_deref())
423+
.with_time_format(options.time_format.as_deref());
424+
425+
let formatter = ArrayFormatter::try_new(array, &fops)?;
355426
let formatter = JsonArrayFormatter::new(formatter);
356427
NullableEncoder::new(Box::new(formatter) as Box<dyn Encoder + 'a>, nulls)
357428
}

arrow-json/src/writer/mod.rs

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,6 +279,36 @@ impl WriterBuilder {
279279
self
280280
}
281281

282+
/// Set the JSON file's date format
283+
pub fn with_date_format(mut self, format: String) -> Self {
284+
self.0 = self.0.with_date_format(format);
285+
self
286+
}
287+
288+
/// Set the JSON file's datetime format
289+
pub fn with_datetime_format(mut self, format: String) -> Self {
290+
self.0 = self.0.with_datetime_format(format);
291+
self
292+
}
293+
294+
/// Set the JSON file's time format
295+
pub fn with_time_format(mut self, format: String) -> Self {
296+
self.0 = self.0.with_time_format(format);
297+
self
298+
}
299+
300+
/// Set the JSON file's timestamp format
301+
pub fn with_timestamp_format(mut self, format: String) -> Self {
302+
self.0 = self.0.with_timestamp_format(format);
303+
self
304+
}
305+
306+
/// Set the JSON file's timestamp tz format
307+
pub fn with_timestamp_tz_format(mut self, tz_format: String) -> Self {
308+
self.0 = self.0.with_timestamp_tz_format(tz_format);
309+
self
310+
}
311+
282312
/// Create a new `Writer` with specified `JsonFormat` and builder options.
283313
pub fn build<W, F>(self, writer: W) -> Writer<W, F>
284314
where
@@ -724,6 +754,21 @@ mod tests {
724754
&buf,
725755
r#"{"micros":"2018-11-13T17:11:10.011375","millis":"2018-11-13T17:11:10.011","name":"a","nanos":"2018-11-13T17:11:10.011375885","secs":"2018-11-13T17:11:10"}
726756
{"name":"b"}
757+
"#,
758+
);
759+
760+
let mut buf = Vec::new();
761+
{
762+
let mut writer = WriterBuilder::new()
763+
.with_timestamp_format("%m-%d-%Y".to_string())
764+
.build::<_, LineDelimited>(&mut buf);
765+
writer.write_batches(&[&batch]).unwrap();
766+
}
767+
768+
assert_json_eq(
769+
&buf,
770+
r#"{"nanos":"11-13-2018","micros":"11-13-2018","millis":"11-13-2018","secs":"11-13-2018","name":"a"}
771+
{"name":"b"}
727772
"#,
728773
);
729774
}
@@ -785,6 +830,21 @@ mod tests {
785830
&buf,
786831
r#"{"micros":"2018-11-13T17:11:10.011375Z","millis":"2018-11-13T17:11:10.011Z","name":"a","nanos":"2018-11-13T17:11:10.011375885Z","secs":"2018-11-13T17:11:10Z"}
787832
{"name":"b"}
833+
"#,
834+
);
835+
836+
let mut buf = Vec::new();
837+
{
838+
let mut writer = WriterBuilder::new()
839+
.with_timestamp_tz_format("%m-%d-%Y %Z".to_string())
840+
.build::<_, LineDelimited>(&mut buf);
841+
writer.write_batches(&[&batch]).unwrap();
842+
}
843+
844+
assert_json_eq(
845+
&buf,
846+
r#"{"nanos":"11-13-2018 +00:00","micros":"11-13-2018 +00:00","millis":"11-13-2018 +00:00","secs":"11-13-2018 +00:00","name":"a"}
847+
{"name":"b"}
788848
"#,
789849
);
790850
}
@@ -832,6 +892,22 @@ mod tests {
832892
&buf,
833893
r#"{"date32":"2018-11-13","date64":"2018-11-13T17:11:10.011","name":"a"}
834894
{"name":"b"}
895+
"#,
896+
);
897+
898+
let mut buf = Vec::new();
899+
{
900+
let mut writer = WriterBuilder::new()
901+
.with_date_format("%m-%d-%Y".to_string())
902+
.with_datetime_format("%m-%d-%Y %Mmin %Ssec %Hhour".to_string())
903+
.build::<_, LineDelimited>(&mut buf);
904+
writer.write_batches(&[&batch]).unwrap();
905+
}
906+
907+
assert_json_eq(
908+
&buf,
909+
r#"{"date32":"11-13-2018","date64":"11-13-2018 11min 10sec 17hour","name":"a"}
910+
{"name":"b"}
835911
"#,
836912
);
837913
}
@@ -875,6 +951,21 @@ mod tests {
875951
&buf,
876952
r#"{"time32sec":"00:02:00","time32msec":"00:00:00.120","time64usec":"00:00:00.000120","time64nsec":"00:00:00.000000120","name":"a"}
877953
{"name":"b"}
954+
"#,
955+
);
956+
957+
let mut buf = Vec::new();
958+
{
959+
let mut writer = WriterBuilder::new()
960+
.with_time_format("%H-%M-%S %f".to_string())
961+
.build::<_, LineDelimited>(&mut buf);
962+
writer.write_batches(&[&batch]).unwrap();
963+
}
964+
965+
assert_json_eq(
966+
&buf,
967+
r#"{"time32sec":"00-02-00 000000000","time32msec":"00-00-00 120000000","time64usec":"00-00-00 000120000","time64nsec":"00-00-00 000000120","name":"a"}
968+
{"name":"b"}
878969
"#,
879970
);
880971
}

0 commit comments

Comments
 (0)