Skip to content

Commit bed2ed8

Browse files
Merge branch 'apache:main' into friendlymatthew/cast-date-to-timestamp-tz
2 parents c19dbc5 + a0c3186 commit bed2ed8

File tree

20 files changed

+347
-65
lines changed

20 files changed

+347
-65
lines changed

arrow-arith/src/temporal.rs

Lines changed: 155 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -47,10 +47,14 @@ pub enum DatePart {
4747
Quarter,
4848
/// Calendar year
4949
Year,
50+
/// ISO year, computed as per ISO 8601
51+
YearISO,
5052
/// Month in the year, in range `1..=12`
5153
Month,
52-
/// ISO week of the year, in range `1..=53`
54+
/// week of the year, in range `1..=53`, computed as per ISO 8601
5355
Week,
56+
/// ISO week of the year, in range `1..=53`
57+
WeekISO,
5458
/// Day of the month, in range `1..=31`
5559
Day,
5660
/// Day of the week, in range `0..=6`, where Sunday is `0`
@@ -91,8 +95,9 @@ where
9195
match part {
9296
DatePart::Quarter => |d| d.quarter() as i32,
9397
DatePart::Year => |d| d.year(),
98+
DatePart::YearISO => |d| d.iso_week().year(),
9499
DatePart::Month => |d| d.month() as i32,
95-
DatePart::Week => |d| d.iso_week().week() as i32,
100+
DatePart::Week | DatePart::WeekISO => |d| d.iso_week().week() as i32,
96101
DatePart::Day => |d| d.day() as i32,
97102
DatePart::DayOfWeekSunday0 => |d| d.num_days_from_sunday(),
98103
DatePart::DayOfWeekMonday0 => |d| d.num_days_from_monday(),
@@ -102,7 +107,7 @@ where
102107
DatePart::Second => |d| d.second() as i32,
103108
DatePart::Millisecond => |d| (d.nanosecond() / 1_000_000) as i32,
104109
DatePart::Microsecond => |d| (d.nanosecond() / 1_000) as i32,
105-
DatePart::Nanosecond => |d| (d.nanosecond()) as i32,
110+
DatePart::Nanosecond => |d| d.nanosecond() as i32,
106111
}
107112
}
108113

@@ -130,9 +135,14 @@ where
130135
/// let input: TimestampMicrosecondArray =
131136
/// vec![Some(1612025847000000), None, Some(1722015847000000)].into();
132137
///
133-
/// let actual = date_part(&input, DatePart::Week).unwrap();
138+
/// let week = date_part(&input, DatePart::Week).unwrap();
139+
/// let week_iso = date_part(&input, DatePart::WeekISO).unwrap();
134140
/// let expected: Int32Array = vec![Some(4), None, Some(30)].into();
135-
/// assert_eq!(actual.as_ref(), &expected);
141+
/// assert_eq!(week.as_ref(), &expected);
142+
/// assert_eq!(week_iso.as_ref(), &expected);
143+
/// let year_iso = date_part(&input, DatePart::YearISO).unwrap();
144+
/// let expected: Int32Array = vec![Some(2021), None, Some(2024)].into();
145+
/// assert_eq!(year_iso.as_ref(), &expected);
136146
/// ```
137147
pub fn date_part(array: &dyn Array, part: DatePart) -> Result<ArrayRef, ArrowError> {
138148
downcast_temporal_array!(
@@ -430,6 +440,8 @@ impl ExtractDatePartExt for PrimitiveArray<IntervalYearMonthType> {
430440

431441
DatePart::Quarter
432442
| DatePart::Week
443+
| DatePart::WeekISO
444+
| DatePart::YearISO
433445
| DatePart::Day
434446
| DatePart::DayOfWeekSunday0
435447
| DatePart::DayOfWeekMonday0
@@ -460,6 +472,8 @@ impl ExtractDatePartExt for PrimitiveArray<IntervalDayTimeType> {
460472

461473
DatePart::Quarter
462474
| DatePart::Year
475+
| DatePart::YearISO
476+
| DatePart::WeekISO
463477
| DatePart::Month
464478
| DatePart::DayOfWeekSunday0
465479
| DatePart::DayOfWeekMonday0
@@ -495,6 +509,8 @@ impl ExtractDatePartExt for PrimitiveArray<IntervalMonthDayNanoType> {
495509
DatePart::Nanosecond => Ok(self.unary_opt(|d| d.nanoseconds.try_into().ok())),
496510

497511
DatePart::Quarter
512+
| DatePart::WeekISO
513+
| DatePart::YearISO
498514
| DatePart::DayOfWeekSunday0
499515
| DatePart::DayOfWeekMonday0
500516
| DatePart::DayOfYear => {
@@ -523,6 +539,8 @@ impl ExtractDatePartExt for PrimitiveArray<DurationSecondType> {
523539
),
524540

525541
DatePart::Year
542+
| DatePart::YearISO
543+
| DatePart::WeekISO
526544
| DatePart::Quarter
527545
| DatePart::Month
528546
| DatePart::DayOfWeekSunday0
@@ -553,6 +571,8 @@ impl ExtractDatePartExt for PrimitiveArray<DurationMillisecondType> {
553571
}
554572

555573
DatePart::Year
574+
| DatePart::YearISO
575+
| DatePart::WeekISO
556576
| DatePart::Quarter
557577
| DatePart::Month
558578
| DatePart::DayOfWeekSunday0
@@ -583,6 +603,8 @@ impl ExtractDatePartExt for PrimitiveArray<DurationMicrosecondType> {
583603
}
584604

585605
DatePart::Year
606+
| DatePart::YearISO
607+
| DatePart::WeekISO
586608
| DatePart::Quarter
587609
| DatePart::Month
588610
| DatePart::DayOfWeekSunday0
@@ -613,6 +635,8 @@ impl ExtractDatePartExt for PrimitiveArray<DurationNanosecondType> {
613635
DatePart::Nanosecond => Ok(self.unary_opt(|d| d.try_into().ok())),
614636

615637
DatePart::Year
638+
| DatePart::YearISO
639+
| DatePart::WeekISO
616640
| DatePart::Quarter
617641
| DatePart::Month
618642
| DatePart::DayOfWeekSunday0
@@ -2072,4 +2096,130 @@ mod tests {
20722096
ensure_returns_error(&DurationMicrosecondArray::from(vec![0]));
20732097
ensure_returns_error(&DurationNanosecondArray::from(vec![0]));
20742098
}
2099+
2100+
const TIMESTAMP_SECOND_1970_01_01: i64 = 0;
2101+
const TIMESTAMP_SECOND_2018_01_01: i64 = 1_514_764_800;
2102+
const TIMESTAMP_SECOND_2019_02_20: i64 = 1_550_636_625;
2103+
const SECONDS_IN_DAY: i64 = 24 * 60 * 60;
2104+
// In 2018 the ISO year and calendar year start on the same date— 2018-01-01 or 2018-W01-1
2105+
#[test]
2106+
fn test_temporal_array_date64_week_iso() {
2107+
let a: PrimitiveArray<Date64Type> = vec![
2108+
Some(TIMESTAMP_SECOND_2018_01_01 * 1000),
2109+
Some(TIMESTAMP_SECOND_2019_02_20 * 1000),
2110+
]
2111+
.into();
2112+
2113+
let b = date_part(&a, DatePart::WeekISO).unwrap();
2114+
let actual = b.as_primitive::<Int32Type>();
2115+
assert_eq!(1, actual.value(0));
2116+
assert_eq!(8, actual.value(1));
2117+
}
2118+
2119+
#[test]
2120+
fn test_temporal_array_date64_year_iso() {
2121+
let a: PrimitiveArray<Date64Type> = vec![
2122+
Some(TIMESTAMP_SECOND_2018_01_01 * 1000),
2123+
Some(TIMESTAMP_SECOND_2019_02_20 * 1000),
2124+
]
2125+
.into();
2126+
2127+
let b = date_part(&a, DatePart::YearISO).unwrap();
2128+
let actual = b.as_primitive::<Int32Type>();
2129+
assert_eq!(2018, actual.value(0));
2130+
assert_eq!(2019, actual.value(1));
2131+
}
2132+
2133+
#[test]
2134+
fn test_temporal_array_timestamp_week_iso() {
2135+
let a = TimestampSecondArray::from(vec![
2136+
TIMESTAMP_SECOND_1970_01_01, // 0 and is Thursday
2137+
SECONDS_IN_DAY * 4, // Monday of week 2
2138+
SECONDS_IN_DAY * 4 - 1, // Sunday of week 1
2139+
]);
2140+
let b = date_part(&a, DatePart::WeekISO).unwrap();
2141+
let actual = b.as_primitive::<Int32Type>();
2142+
assert_eq!(1, actual.value(0));
2143+
assert_eq!(2, actual.value(1));
2144+
assert_eq!(1, actual.value(2));
2145+
}
2146+
2147+
#[test]
2148+
fn test_temporal_array_timestamp_year_iso() {
2149+
let a = TimestampSecondArray::from(vec![
2150+
TIMESTAMP_SECOND_1970_01_01,
2151+
SECONDS_IN_DAY * 4,
2152+
SECONDS_IN_DAY * 4 - 1,
2153+
]);
2154+
let b = date_part(&a, DatePart::YearISO).unwrap();
2155+
let actual = b.as_primitive::<Int32Type>();
2156+
assert_eq!(1970, actual.value(0));
2157+
assert_eq!(1970, actual.value(1));
2158+
assert_eq!(1970, actual.value(2));
2159+
}
2160+
2161+
const TIMESTAMP_SECOND_2015_12_28: i64 = 1_451_260_800;
2162+
const TIMESTAMP_SECOND_2016_01_03: i64 = 1_451_779_200;
2163+
// January 1st 2016 is a Friday, so 2015 week 53 runs from
2164+
// 2015-12-28 to 2016-01-03 inclusive, and
2165+
// 2016 week 1 runs from 2016-01-04 to 2016-01-10 inclusive.
2166+
#[test]
2167+
fn test_temporal_array_date64_week_iso_edge_cases() {
2168+
let a: PrimitiveArray<Date64Type> = vec![
2169+
Some(TIMESTAMP_SECOND_2015_12_28 * 1000),
2170+
Some(TIMESTAMP_SECOND_2016_01_03 * 1000),
2171+
Some((TIMESTAMP_SECOND_2016_01_03 + SECONDS_IN_DAY) * 1000),
2172+
]
2173+
.into();
2174+
2175+
let b = date_part(&a, DatePart::WeekISO).unwrap();
2176+
let actual = b.as_primitive::<Int32Type>();
2177+
assert_eq!(53, actual.value(0));
2178+
assert_eq!(53, actual.value(1));
2179+
assert_eq!(1, actual.value(2));
2180+
}
2181+
2182+
#[test]
2183+
fn test_temporal_array_date64_year_iso_edge_cases() {
2184+
let a: PrimitiveArray<Date64Type> = vec![
2185+
Some(TIMESTAMP_SECOND_2015_12_28 * 1000),
2186+
Some(TIMESTAMP_SECOND_2016_01_03 * 1000),
2187+
Some((TIMESTAMP_SECOND_2016_01_03 + SECONDS_IN_DAY) * 1000),
2188+
]
2189+
.into();
2190+
2191+
let b = date_part(&a, DatePart::YearISO).unwrap();
2192+
let actual = b.as_primitive::<Int32Type>();
2193+
assert_eq!(2015, actual.value(0));
2194+
assert_eq!(2015, actual.value(1));
2195+
assert_eq!(2016, actual.value(2));
2196+
}
2197+
2198+
#[test]
2199+
fn test_temporal_array_timestamp_week_iso_edge_cases() {
2200+
let a = TimestampSecondArray::from(vec![
2201+
TIMESTAMP_SECOND_2015_12_28,
2202+
TIMESTAMP_SECOND_2016_01_03,
2203+
TIMESTAMP_SECOND_2016_01_03 + SECONDS_IN_DAY,
2204+
]);
2205+
let b = date_part(&a, DatePart::WeekISO).unwrap();
2206+
let actual = b.as_primitive::<Int32Type>();
2207+
assert_eq!(53, actual.value(0));
2208+
assert_eq!(53, actual.value(1));
2209+
assert_eq!(1, actual.value(2));
2210+
}
2211+
2212+
#[test]
2213+
fn test_temporal_array_timestamp_year_iso_edge_cases() {
2214+
let a = TimestampSecondArray::from(vec![
2215+
TIMESTAMP_SECOND_2015_12_28,
2216+
TIMESTAMP_SECOND_2016_01_03,
2217+
TIMESTAMP_SECOND_2016_01_03 + SECONDS_IN_DAY,
2218+
]);
2219+
let b = date_part(&a, DatePart::YearISO).unwrap();
2220+
let actual = b.as_primitive::<Int32Type>();
2221+
assert_eq!(2015, actual.value(0));
2222+
assert_eq!(2015, actual.value(1));
2223+
assert_eq!(2016, actual.value(2));
2224+
}
20752225
}

arrow-array/src/array/union_array.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -994,7 +994,7 @@ fn selection_mask(type_ids_chunk: &[i8], type_id: i8) -> u64 {
994994
.copied()
995995
.enumerate()
996996
.fold(0, |packed, (bit_idx, v)| {
997-
packed | ((v == type_id) as u64) << bit_idx
997+
packed | (((v == type_id) as u64) << bit_idx)
998998
})
999999
}
10001000

arrow-buffer/src/bigint/div.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -258,7 +258,7 @@ fn full_shl<const N: usize>(v: &[u64; N], shift: u32) -> ArrayPlusOne<u64, N> {
258258
let mut out = [0u64; N];
259259
out[0] = v[0] << shift;
260260
for i in 1..N {
261-
out[i] = v[i - 1] >> (64 - shift) | v[i] << shift
261+
out[i] = (v[i - 1] >> (64 - shift)) | (v[i] << shift)
262262
}
263263
let carry = v[N - 1] >> (64 - shift);
264264
ArrayPlusOne(out, carry)
@@ -272,7 +272,7 @@ fn full_shr<const N: usize>(a: &ArrayPlusOne<u64, N>, shift: u32) -> [u64; N] {
272272
}
273273
let mut out = [0; N];
274274
for i in 0..N - 1 {
275-
out[i] = a[i] >> shift | a[i + 1] << (64 - shift)
275+
out[i] = (a[i] >> shift) | (a[i + 1] << (64 - shift))
276276
}
277277
out[N - 1] = a[N - 1] >> shift;
278278
out

arrow-buffer/src/bigint/mod.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -475,8 +475,8 @@ impl i256 {
475475
/// Interpret 4 `u64` digits, least significant first, as a [`i256`]
476476
fn from_digits(digits: [u64; 4]) -> Self {
477477
Self::from_parts(
478-
digits[0] as u128 | (digits[1] as u128) << 64,
479-
digits[2] as i128 | (digits[3] as i128) << 64,
478+
digits[0] as u128 | ((digits[1] as u128) << 64),
479+
digits[2] as i128 | ((digits[3] as i128) << 64),
480480
)
481481
}
482482

@@ -746,7 +746,7 @@ impl Shl<u8> for i256 {
746746
self
747747
} else if rhs < 128 {
748748
Self {
749-
high: self.high << rhs | (self.low >> (128 - rhs)) as i128,
749+
high: (self.high << rhs) | (self.low >> (128 - rhs)) as i128,
750750
low: self.low << rhs,
751751
}
752752
} else {
@@ -768,7 +768,7 @@ impl Shr<u8> for i256 {
768768
} else if rhs < 128 {
769769
Self {
770770
high: self.high >> rhs,
771-
low: self.low >> rhs | ((self.high as u128) << (128 - rhs)),
771+
low: (self.low >> rhs) | ((self.high as u128) << (128 - rhs)),
772772
}
773773
} else {
774774
Self {

arrow-csv/src/reader/mod.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,8 @@ impl InferredDataType {
221221
} else {
222222
1 << m
223223
}
224+
} else if string == "NaN" || string == "nan" || string == "inf" || string == "-inf" {
225+
1 << 2 // Float64
224226
} else {
225227
1 << 8 // Utf8
226228
}
@@ -1659,7 +1661,7 @@ mod tests {
16591661
let mut csv = builder.build(file).unwrap();
16601662
let batch = csv.next().unwrap().unwrap();
16611663

1662-
assert_eq!(7, batch.num_rows());
1664+
assert_eq!(10, batch.num_rows());
16631665
assert_eq!(6, batch.num_columns());
16641666

16651667
let schema = batch.schema();
@@ -1803,6 +1805,10 @@ mod tests {
18031805
assert_eq!(infer_field_schema("10.2"), DataType::Float64);
18041806
assert_eq!(infer_field_schema(".2"), DataType::Float64);
18051807
assert_eq!(infer_field_schema("2."), DataType::Float64);
1808+
assert_eq!(infer_field_schema("NaN"), DataType::Float64);
1809+
assert_eq!(infer_field_schema("nan"), DataType::Float64);
1810+
assert_eq!(infer_field_schema("inf"), DataType::Float64);
1811+
assert_eq!(infer_field_schema("-inf"), DataType::Float64);
18061812
assert_eq!(infer_field_schema("true"), DataType::Boolean);
18071813
assert_eq!(infer_field_schema("trUe"), DataType::Boolean);
18081814
assert_eq!(infer_field_schema("false"), DataType::Boolean);
@@ -2372,7 +2378,7 @@ mod tests {
23722378
fn test_buffered() {
23732379
let tests = [
23742380
("test/data/uk_cities.csv", false, 37),
2375-
("test/data/various_types.csv", true, 7),
2381+
("test/data/various_types.csv", true, 10),
23762382
("test/data/decimal_test.csv", false, 10),
23772383
];
23782384

arrow-csv/test/data/various_types.csv

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,7 @@ c_int|c_float|c_string|c_bool|c_date|c_datetime
55
4|4.4||false||
66
5|6.6|""|false|1990-01-01|1990-01-01T03:00:00
77
4|4e6||false||
8-
4|4.0e-6||false||
8+
4|4.0e-6||false||
9+
6|NaN||false||
10+
7|inf||false||
11+
8|-inf||false||

arrow-data/src/data.rs

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1929,7 +1929,13 @@ impl ArrayDataBuilder {
19291929

19301930
/// Creates an array data, without any validation
19311931
///
1932-
/// Note: This is shorthand for `self.skip_validation(true).build().unwrap()`
1932+
/// Note: This is shorthand for
1933+
/// ```rust
1934+
/// # let mut builder = arrow_data::ArrayDataBuilder::new(arrow_schema::DataType::Null);
1935+
/// # let _ = unsafe {
1936+
/// builder.skip_validation(true).build().unwrap()
1937+
/// # };
1938+
/// ```
19331939
///
19341940
/// # Safety
19351941
///

arrow-json/src/reader/decimal_array.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ where
6666
}
6767
TapeElement::I64(high) => match tape.get(*p + 1) {
6868
TapeElement::I32(low) => {
69-
let val = ((high as i64) << 32 | (low as u32) as i64).to_string();
69+
let val = (((high as i64) << 32) | (low as u32) as i64).to_string();
7070
let value = parse_decimal::<D>(&val, self.precision, self.scale)?;
7171
builder.append_value(value)
7272
}
@@ -79,7 +79,7 @@ where
7979
}
8080
TapeElement::F64(high) => match tape.get(*p + 1) {
8181
TapeElement::F32(low) => {
82-
let val = f64::from_bits((high as u64) << 32 | low as u64).to_string();
82+
let val = f64::from_bits(((high as u64) << 32) | low as u64).to_string();
8383
let value = parse_decimal::<D>(&val, self.precision, self.scale)?;
8484
builder.append_value(value)
8585
}

0 commit comments

Comments
 (0)