Skip to content

Commit 0827f2c

Browse files
committed
maybe some actual conversions
1 parent 6205c57 commit 0827f2c

File tree

1 file changed

+120
-0
lines changed

1 file changed

+120
-0
lines changed

r/src/vctr_builder_primitive.h

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,126 @@ class IntBuilder : public VctrBuilder {
8080
public:
8181
explicit IntBuilder(SEXP ptype_sexp) : VctrBuilder(VECTOR_TYPE_INT, ptype_sexp) {}
8282

83+
ArrowErrorCode Init(const ArrowSchema* schema, VctrBuilderOptions options,
84+
ArrowError* error) override {
85+
NANOARROW_RETURN_NOT_OK(VctrBuilder::Init(schema, options, error));
86+
return NANOARROW_OK;
87+
}
88+
89+
ArrowErrorCode Reserve(R_xlen_t n, ArrowError* error) override {
90+
NANOARROW_RETURN_NOT_OK(VctrBuilder::Reserve(n, error));
91+
value_ = PROTECT(Rf_allocVector(INTSXP, n));
92+
SetValue(value_);
93+
UNPROTECT(1);
94+
return NANOARROW_OK;
95+
}
96+
97+
ArrowErrorCode PushNext(const ArrowArray* array, ArrowError* error) override {
98+
int* result = INTEGER(value_);
99+
int64_t n_bad_values = 0;
100+
101+
// True for all the types supported here
102+
const uint8_t* is_valid = array_view_.buffer_views[0].data.as_uint8;
103+
int64_t raw_src_offset = array_view_.array->offset;
104+
R_xlen_t length = array->length;
105+
106+
// Fill the buffer
107+
switch (array_view_.storage_type) {
108+
case NANOARROW_TYPE_NA:
109+
for (R_xlen_t i = 0; i < length; i++) {
110+
result[value_size_ + i] = NA_INTEGER;
111+
}
112+
break;
113+
case NANOARROW_TYPE_INT32:
114+
memcpy(result + value_size_,
115+
array_view_.buffer_views[1].data.as_int32 + raw_src_offset,
116+
length * sizeof(int32_t));
117+
118+
// Set any nulls to NA_INTEGER
119+
if (is_valid != NULL && array_view_.array->null_count != 0) {
120+
for (R_xlen_t i = 0; i < length; i++) {
121+
if (!ArrowBitGet(is_valid, raw_src_offset + i)) {
122+
result[value_size_ + i] = NA_INTEGER;
123+
}
124+
}
125+
}
126+
break;
127+
case NANOARROW_TYPE_BOOL:
128+
ArrowBitsUnpackInt32(array_view_.buffer_views[1].data.as_uint8 + raw_src_offset,
129+
raw_src_offset, length, result + value_size_);
130+
131+
// Set any nulls to NA_LOGICAL
132+
if (is_valid != NULL && array_view_.array->null_count != 0) {
133+
for (R_xlen_t i = 0; i < length; i++) {
134+
if (!ArrowBitGet(is_valid, raw_src_offset + i)) {
135+
result[value_size_ + i] = NA_LOGICAL;
136+
}
137+
}
138+
}
139+
break;
140+
case NANOARROW_TYPE_INT8:
141+
case NANOARROW_TYPE_UINT8:
142+
case NANOARROW_TYPE_INT16:
143+
case NANOARROW_TYPE_UINT16:
144+
// No need to bounds check for these types
145+
for (R_xlen_t i = 0; i < length; i++) {
146+
result[value_size_ + i] = (int32_t)ArrowArrayViewGetIntUnsafe(&array_view_, i);
147+
}
148+
149+
// Set any nulls to NA_INTEGER
150+
if (is_valid != NULL && array_view_.array->null_count != 0) {
151+
for (R_xlen_t i = 0; i < length; i++) {
152+
if (!ArrowBitGet(is_valid, raw_src_offset + i)) {
153+
result[value_size_ + i] = NA_INTEGER;
154+
}
155+
}
156+
}
157+
break;
158+
case NANOARROW_TYPE_UINT32:
159+
case NANOARROW_TYPE_INT64:
160+
case NANOARROW_TYPE_UINT64:
161+
case NANOARROW_TYPE_FLOAT:
162+
case NANOARROW_TYPE_DOUBLE:
163+
// Loop + bounds check. Because we don't know what memory might be
164+
// in a null slot, we have to check nulls if there are any.
165+
if (is_valid != NULL && array_view_.array->null_count != 0) {
166+
for (R_xlen_t i = 0; i < length; i++) {
167+
if (ArrowBitGet(is_valid, raw_src_offset + i)) {
168+
int64_t value = ArrowArrayViewGetIntUnsafe(&array_view_, i);
169+
if (value > INT_MAX || value <= NA_INTEGER) {
170+
result[value_size_ + i] = NA_INTEGER;
171+
n_bad_values++;
172+
} else {
173+
result[value_size_ + i] = (int32_t)value;
174+
}
175+
} else {
176+
result[value_size_ + i] = NA_INTEGER;
177+
}
178+
}
179+
} else {
180+
for (R_xlen_t i = 0; i < length; i++) {
181+
int64_t value = ArrowArrayViewGetIntUnsafe(&array_view_, i);
182+
if (value > INT_MAX || value <= NA_INTEGER) {
183+
result[value_size_ + i] = NA_INTEGER;
184+
n_bad_values++;
185+
} else {
186+
result[value_size_ + i] = (int32_t)value;
187+
}
188+
}
189+
}
190+
break;
191+
192+
default:
193+
return EINVAL;
194+
}
195+
196+
if (n_bad_values > 0) {
197+
warn_lossy_conversion(n_bad_values, "outside integer range set to NA");
198+
}
199+
200+
return NANOARROW_OK;
201+
}
202+
83203
SEXP GetPtype() override { return Rf_allocVector(INTSXP, 0); }
84204
};
85205

0 commit comments

Comments
 (0)