|
24 | 24 |
|
25 | 25 | #include "vctr_builder_base.h" |
26 | 26 |
|
27 | | -class UnspecifiedBuilder : public VctrBuilder { |
28 | | - public: |
29 | | - explicit UnspecifiedBuilder(SEXP ptype_sexp) |
30 | | - : VctrBuilder(VECTOR_TYPE_UNSPECIFIED, ptype_sexp) {} |
31 | | - |
32 | | - ArrowErrorCode Init(const ArrowSchema* schema, VctrBuilderOptions options, |
33 | | - ArrowError* error) override { |
34 | | - NANOARROW_RETURN_NOT_OK(VctrBuilder::Init(schema, options, error)); |
35 | | - switch (schema_view_.type) { |
36 | | - case NANOARROW_TYPE_DICTIONARY: |
37 | | - StopCantConvert(); |
38 | | - default: |
39 | | - break; |
40 | | - } |
41 | | - |
42 | | - return NANOARROW_OK; |
43 | | - } |
44 | | - |
45 | | - ArrowErrorCode Reserve(R_xlen_t n, ArrowError* error) override { |
46 | | - NANOARROW_RETURN_NOT_OK(VctrBuilder::Reserve(n, error)); |
47 | | - SEXP value = PROTECT(Rf_allocVector(LGLSXP, n)); |
48 | | - SetValue(value); |
49 | | - UNPROTECT(1); |
50 | | - return NANOARROW_OK; |
51 | | - } |
52 | | - |
53 | | - ArrowErrorCode PushNext(const ArrowArray* array, ArrowError* error) override { |
54 | | - int64_t not_null_count; |
55 | | - if (array->null_count == -1 && array->buffers[0] == nullptr) { |
56 | | - not_null_count = array->length; |
57 | | - } else if (array->null_count == -1) { |
58 | | - not_null_count = |
59 | | - ArrowBitCountSet(reinterpret_cast<const uint8_t*>(array->buffers[0]), |
60 | | - array->offset, array->length); |
61 | | - } else { |
62 | | - not_null_count = array->length - array->null_count; |
63 | | - } |
64 | | - |
65 | | - if (not_null_count > 0 && array->length > 0) { |
66 | | - NANOARROW_RETURN_NOT_OK( |
67 | | - WarnLossyConvert("that were non-null set to NA", not_null_count)); |
68 | | - } |
69 | | - |
70 | | - int* value_ptr = LOGICAL(value_) + value_size_; |
71 | | - for (int64_t i = 0; i < array->length; i++) { |
72 | | - value_ptr[i] = NA_LOGICAL; |
73 | | - } |
74 | | - |
75 | | - return NANOARROW_OK; |
76 | | - } |
77 | | -}; |
78 | | - |
79 | | -class IntBuilder : public VctrBuilder { |
80 | | - public: |
81 | | - explicit IntBuilder(SEXP ptype_sexp) : VctrBuilder(VECTOR_TYPE_INT, ptype_sexp) {} |
82 | | - |
83 | | - ArrowErrorCode Init(const ArrowSchema* schema, VctrBuilderOptions options, |
84 | | - ArrowError* error) override { |
85 | | - NANOARROW_RETURN_NOT_OK(VctrBuilder::Init(schema, options, error)); |
86 | | - return NANOARROW_OK; |
87 | | - } |
88 | | - |
89 | | - ArrowErrorCode Reserve(R_xlen_t n, ArrowError* error) override { |
90 | | - NANOARROW_RETURN_NOT_OK(VctrBuilder::Reserve(n, error)); |
91 | | - SEXP value = PROTECT(Rf_allocVector(INTSXP, n)); |
92 | | - SetValue(value); |
93 | | - UNPROTECT(1); |
94 | | - return NANOARROW_OK; |
95 | | - } |
96 | | - |
97 | | - ArrowErrorCode PushNext(const ArrowArray* array, ArrowError* error) override { |
98 | | - NANOARROW_RETURN_NOT_OK(VctrBuilder::PushNext(array, error)); |
99 | | - |
100 | | - int* result = INTEGER(value_); |
101 | | - int64_t n_bad_values = 0; |
102 | | - |
103 | | - // True for all the types supported here |
104 | | - const uint8_t* is_valid = array_view_.buffer_views[0].data.as_uint8; |
105 | | - int64_t raw_src_offset = array_view_.offset; |
106 | | - R_xlen_t length = array->length; |
107 | | - |
108 | | - // Fill the buffer |
109 | | - switch (array_view_.storage_type) { |
110 | | - case NANOARROW_TYPE_NA: |
111 | | - for (R_xlen_t i = 0; i < length; i++) { |
112 | | - result[value_size_ + i] = NA_INTEGER; |
113 | | - } |
114 | | - break; |
115 | | - case NANOARROW_TYPE_INT32: |
116 | | - memcpy(result + value_size_, |
117 | | - array_view_.buffer_views[1].data.as_int32 + raw_src_offset, |
118 | | - length * sizeof(int32_t)); |
119 | | - |
120 | | - // Set any nulls to NA_INTEGER |
121 | | - if (is_valid != NULL && array_view_.null_count != 0) { |
122 | | - for (R_xlen_t i = 0; i < length; i++) { |
123 | | - if (!ArrowBitGet(is_valid, raw_src_offset + i)) { |
124 | | - result[value_size_ + i] = NA_INTEGER; |
125 | | - } |
126 | | - } |
127 | | - } |
128 | | - break; |
129 | | - case NANOARROW_TYPE_BOOL: |
130 | | - ArrowBitsUnpackInt32(array_view_.buffer_views[1].data.as_uint8 + raw_src_offset, |
131 | | - raw_src_offset, length, result + value_size_); |
132 | | - |
133 | | - // Set any nulls to NA_LOGICAL |
134 | | - if (is_valid != NULL && array_view_.null_count != 0) { |
135 | | - for (R_xlen_t i = 0; i < length; i++) { |
136 | | - if (!ArrowBitGet(is_valid, raw_src_offset + i)) { |
137 | | - result[value_size_ + i] = NA_LOGICAL; |
138 | | - } |
139 | | - } |
140 | | - } |
141 | | - break; |
142 | | - case NANOARROW_TYPE_INT8: |
143 | | - case NANOARROW_TYPE_UINT8: |
144 | | - case NANOARROW_TYPE_INT16: |
145 | | - case NANOARROW_TYPE_UINT16: |
146 | | - // No need to bounds check for these types |
147 | | - for (R_xlen_t i = 0; i < length; i++) { |
148 | | - result[value_size_ + i] = (int32_t)ArrowArrayViewGetIntUnsafe(&array_view_, i); |
149 | | - } |
150 | | - |
151 | | - // Set any nulls to NA_INTEGER |
152 | | - if (is_valid != NULL && array_view_.null_count != 0) { |
153 | | - for (R_xlen_t i = 0; i < length; i++) { |
154 | | - if (!ArrowBitGet(is_valid, raw_src_offset + i)) { |
155 | | - result[value_size_ + i] = NA_INTEGER; |
156 | | - } |
157 | | - } |
158 | | - } |
159 | | - break; |
160 | | - case NANOARROW_TYPE_UINT32: |
161 | | - case NANOARROW_TYPE_INT64: |
162 | | - case NANOARROW_TYPE_UINT64: |
163 | | - case NANOARROW_TYPE_FLOAT: |
164 | | - case NANOARROW_TYPE_DOUBLE: |
165 | | - // Loop + bounds check. Because we don't know what memory might be |
166 | | - // in a null slot, we have to check nulls if there are any. |
167 | | - if (is_valid != NULL && array_view_.null_count != 0) { |
168 | | - for (R_xlen_t i = 0; i < length; i++) { |
169 | | - if (ArrowBitGet(is_valid, raw_src_offset + i)) { |
170 | | - int64_t value = ArrowArrayViewGetIntUnsafe(&array_view_, i); |
171 | | - if (value > INT_MAX || value <= NA_INTEGER) { |
172 | | - result[value_size_ + i] = NA_INTEGER; |
173 | | - n_bad_values++; |
174 | | - } else { |
175 | | - result[value_size_ + i] = (int32_t)value; |
176 | | - } |
177 | | - } else { |
178 | | - result[value_size_ + i] = NA_INTEGER; |
179 | | - } |
180 | | - } |
181 | | - } else { |
182 | | - for (R_xlen_t i = 0; i < length; i++) { |
183 | | - int64_t value = ArrowArrayViewGetIntUnsafe(&array_view_, i); |
184 | | - if (value > INT_MAX || value <= NA_INTEGER) { |
185 | | - result[value_size_ + i] = NA_INTEGER; |
186 | | - n_bad_values++; |
187 | | - } else { |
188 | | - result[value_size_ + i] = (int32_t)value; |
189 | | - } |
190 | | - } |
191 | | - } |
192 | | - break; |
193 | | - |
194 | | - default: |
195 | | - return EINVAL; |
196 | | - } |
197 | | - |
198 | | - if (n_bad_values > 0) { |
199 | | - warn_lossy_conversion(n_bad_values, "outside integer range set to NA"); |
200 | | - } |
201 | | - |
202 | | - return NANOARROW_OK; |
203 | | - } |
204 | | - |
205 | | - SEXP GetPtype() override { return Rf_allocVector(INTSXP, 0); } |
206 | | -}; |
207 | | - |
208 | 27 | class DblBuilder : public VctrBuilder { |
209 | 28 | public: |
210 | 29 | explicit DblBuilder(SEXP ptype_sexp) : VctrBuilder(VECTOR_TYPE_DBL, ptype_sexp) {} |
|
0 commit comments