Skip to content

Commit aabd3db

Browse files
authored
apacheGH-38422: [MATLAB] Add NumNulls property to arrow.array.Array class (apache#47116)
### Rationale for this change It would be nice if there was a `NumNulls` property on the `arrow.array.Array` base class. Currently, the only way to figure out the number of nulls is count the number of `false` values in the `Valid` array: ```matlab >> a = arrow.array([1 2 NaN 4 5 6 NaN 8 9 10 NaN]); >> invalidValues = ~a.Valid; >> numNulls = nnz(invalidValues) numNulls = 3 ``` It would be nice if `NumNulls` was already a property on the array class. As @ kou mentioned, we can use the `arrow::Array::null_count()` to get the number of nulls. ### What changes are included in this PR? Added `NumNulls` as a property of the `arrow.array.Array` abstract class. `NumNulls` is a scalar `int64` value that returns the number of null elements in the array. **Example Usage** ```matlab >> a = arrow.array([1 2 NaN 3 4 NaN 5 6 NaN]) a = Float64Array with 9 elements and 3 null values: 1 | 2 | null | ... | 5 | 6 | null >> a.NumNulls ans = int64 3 ``` ### Are these changes tested? Yes. Added test cases verifying the `NumNulls` property to these MATLAB test classes: `hNumeric`, `tBooleanArray`, `tTimestampArray`, `tTime32Array`, `tTime64Array`, `tDate32Array`, `tDate64Array`, `tListArray`, `tStringArray`, and `tStructArray`. ### Are there any user-facing changes? Yes. Users can now use the `NumNulls` property to query the number of null elements in an array. ### Future Changes 1. Add `NumNulls` as a property of `arrow.array.ChunkedArray`. * GitHub Issue: apache#38422 Authored-by: Sarah Gilmore <sgilmore@mathworks.com> Signed-off-by: Sarah Gilmore <sgilmore@mathworks.com>
1 parent ce0fa3d commit aabd3db

File tree

13 files changed

+220
-0
lines changed

13 files changed

+220
-0
lines changed

matlab/src/cpp/arrow/matlab/array/proxy/array.cc

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Array::Array(std::shared_ptr<arrow::Array> array) : array{std::move(array)} {
3737
// Register Proxy methods.
3838
REGISTER_METHOD(Array, toString);
3939
REGISTER_METHOD(Array, getNumElements);
40+
REGISTER_METHOD(Array, getNumNulls);
4041
REGISTER_METHOD(Array, getValid);
4142
REGISTER_METHOD(Array, getType);
4243
REGISTER_METHOD(Array, isEqual);
@@ -89,6 +90,12 @@ void Array::getNumElements(libmexclass::proxy::method::Context& context) {
8990
context.outputs[0] = length_mda;
9091
}
9192

93+
void Array::getNumNulls(libmexclass::proxy::method::Context& context) {
94+
::matlab::data::ArrayFactory factory;
95+
auto num_nulls_mda = factory.createScalar(array->null_count());
96+
context.outputs[0] = num_nulls_mda;
97+
}
98+
9299
void Array::getValid(libmexclass::proxy::method::Context& context) {
93100
auto array_length = static_cast<size_t>(array->length());
94101

matlab/src/cpp/arrow/matlab/array/proxy/array.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ class Array : public libmexclass::proxy::Proxy {
3737

3838
void getNumElements(libmexclass::proxy::method::Context& context);
3939

40+
void getNumNulls(libmexclass::proxy::method::Context& context);
41+
4042
void getValid(libmexclass::proxy::method::Context& context);
4143

4244
void getType(libmexclass::proxy::method::Context& context);

matlab/src/matlab/+arrow/+array/Array.m

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323

2424
properties(Dependent, SetAccess=private, GetAccess=public)
2525
NumElements
26+
NumNulls
2627
Valid % Validity bitmap
2728
Type(1, 1) arrow.type.Type
2829
end
@@ -39,6 +40,10 @@
3940
numElements = obj.Proxy.getNumElements();
4041
end
4142

43+
function numNulls = get.NumNulls(obj)
44+
numNulls = obj.Proxy.getNumNulls();
45+
end
46+
4247
function validElements = get.Valid(obj)
4348
validElements = obj.Proxy.getValid();
4449
end

matlab/test/arrow/array/hNumericArray.m

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,5 +208,28 @@ function TestIsEqualFalse(tc)
208208
% Test supplying more than two arrays to isequal
209209
tc.verifyFalse(isequal(array1, array1, array3, array4, array5));
210210
end
211+
212+
function TestNumNulls(tc)
213+
% Verify the NumNulls property returns correct value.
214+
215+
% array1 has 0 null values.
216+
data1 = tc.MatlabArrayFcn(1:10);
217+
array1 = tc.ArrowArrayConstructorFcn(data1);
218+
tc.verifyEqual(array1.NumNulls, int64(0));
219+
220+
% array2 has 8 null values.
221+
array2 = tc.ArrowArrayConstructorFcn(data1, Valid=[1 4]);
222+
tc.verifyEqual(array2.NumNulls, int64(8));
223+
end
224+
225+
function TestNumNullsNoSetter(tc)
226+
% Verify the NumNulls property is read-only.
227+
228+
data = tc.MatlabArrayFcn(1:10);
229+
array = tc.ArrowArrayConstructorFcn(data);
230+
fcn = @() setfield(array, "NumNulls", 1);
231+
tc.verifyError(fcn, "MATLAB:class:SetProhibited");
232+
end
233+
211234
end
212235
end

matlab/test/arrow/array/tBooleanArray.m

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,5 +216,27 @@ function TestIsEqualFalse(tc)
216216
% Test supplying more than two arrays to isequal
217217
tc.verifyFalse(isequal(array1, array1, array3, array4, array5));
218218
end
219+
220+
function TestNumNulls(tc)
221+
% Verify the NumNulls property returns correct value.
222+
223+
% array1 has 0 null values.
224+
data1 = tc.MatlabArrayFcn([true false true false]);
225+
array1 = tc.ArrowArrayConstructorFcn(data1);
226+
tc.verifyEqual(array1.NumNulls, int64(0));
227+
228+
% array2 has 3 null values.
229+
array2 = tc.ArrowArrayConstructorFcn(data1, Valid=3);
230+
tc.verifyEqual(array2.NumNulls, int64(3));
231+
end
232+
233+
function TestNumNullsNoSetter(tc)
234+
% Verify the NumNulls property is read-only.
235+
236+
data = tc.MatlabArrayFcn([true false true false]);
237+
array = tc.ArrowArrayConstructorFcn(data, Valid=[2 3]);
238+
fcn = @() setfield(array, "NumNulls", 1);
239+
tc.verifyError(fcn, "MATLAB:class:SetProhibited");
240+
end
219241
end
220242
end

matlab/test/arrow/array/tDate32Array.m

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,28 @@ function TestNumElements(testCase)
5353
array = testCase.ArrowArrayConstructorFcn(dates);
5454
testCase.verifyEqual(array.NumElements, int64(10));
5555
end
56+
57+
function TestNumNulls(testCase)
58+
% Verify the NumNulls property returns correct value.
59+
60+
% array1 has 0 null values.
61+
dates = datetime(2023, 1, 1) + days(1:5)';
62+
array1 = testCase.ArrowArrayConstructorFcn(dates);
63+
testCase.verifyEqual(array1.NumNulls, int64(0));
64+
65+
% array2 has 3 null values.
66+
array2 = testCase.ArrowArrayConstructorFcn(dates, Valid=[1 2]);
67+
testCase.verifyEqual(array2.NumNulls, int64(3));
68+
end
69+
70+
function TestNumNullsNoSetter(testCase)
71+
% Verify the NumNulls property is read-only.
72+
73+
data = datetime(2023, 1, 1) + days(1:5)';
74+
array = testCase.ArrowArrayConstructorFcn(data, Valid=[2 3]);
75+
fcn = @() setfield(array, "NumNulls", 1);
76+
testCase.verifyError(fcn, "MATLAB:class:SetProhibited");
77+
end
5678

5779
function TestToMATLAB(testCase)
5880
% Verify toMATLAB() round-trips the original datetime array.

matlab/test/arrow/array/tDate64Array.m

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,28 @@ function TestNumElements(testCase)
5454
testCase.verifyEqual(array.NumElements, int64(10));
5555
end
5656

57+
function TestNumNulls(testCase)
58+
% Verify the NumNulls property returns correct value.
59+
60+
% array1 has 0 null values.
61+
dates = datetime(2023, 1, 1) + days(1:5)';
62+
array1 = testCase.ArrowArrayConstructorFcn(dates);
63+
testCase.verifyEqual(array1.NumNulls, int64(0));
64+
65+
% array2 has 3 null values.
66+
array2 = testCase.ArrowArrayConstructorFcn(dates, Valid=[1 2]);
67+
testCase.verifyEqual(array2.NumNulls, int64(3));
68+
end
69+
70+
function TestNumNullsNoSetter(testCase)
71+
% Verify the NumNulls property is read-only.
72+
73+
data = datetime(2023, 1, 1) + days(1:5)';
74+
array = testCase.ArrowArrayConstructorFcn(data, Valid=[2 3]);
75+
fcn = @() setfield(array, "NumNulls", 1);
76+
testCase.verifyError(fcn, "MATLAB:class:SetProhibited");
77+
end
78+
5779
function TestToMATLAB(testCase)
5880
% Verify toMATLAB() round-trips the original datetime array.
5981
dates = testCase.UnixEpoch + days(1:10);

matlab/test/arrow/array/tListArray.m

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
%% Empty (zero-element) list (List<Float64>)
3333
Type = arrow.list(arrow.float64());
3434
NumElements = int64(0);
35+
NumNulls = int64(0);
3536
Valid = logical.empty(0, 1);
3637
Offsets = arrow.array(int32(0));
3738
Values = arrow.array([]);
@@ -44,6 +45,7 @@
4445
Properties=struct(...
4546
Type=Type, ...
4647
NumElements=NumElements, ...
48+
NumNulls=NumNulls, ...
4749
Valid=Valid, ...
4850
Offsets=Offsets, ...
4951
Values=Values ...
@@ -53,6 +55,7 @@
5355
%% List with NULLs (List<String>)
5456
Type = arrow.list(arrow.string());
5557
NumElements = int64(4);
58+
NumNulls = int64(2);
5659
Valid = [true, false, true, false];
5760
Offsets = arrow.array(int32([0, 1, 4, 6, 7]));
5861
Values = arrow.array(["A", missing, "C", "D", "E", missing, "G"]);
@@ -65,6 +68,7 @@
6568
Properties=struct(...
6669
Type=Type, ...
6770
NumElements=NumElements, ...
71+
NumNulls=NumNulls, ...
6872
Valid=Valid, ...
6973
Offsets=Offsets, ...
7074
Values=Values ...
@@ -74,6 +78,7 @@
7478
%% Single-level list (List<Float64>)
7579
Type = arrow.list(arrow.float64());
7680
NumElements = int64(3);
81+
NumNulls = int64(0);
7782
Valid = true(1, NumElements);
7883
Offsets = arrow.array(int32([0, 2, 5, 9]));
7984
Values = arrow.array([1, 2, 3, 4, 5, 6, 7, 8, 9]);
@@ -86,6 +91,7 @@
8691
Properties=struct(...
8792
Type=Type, ...
8893
NumElements=NumElements, ...
94+
NumNulls=NumNulls, ...
8995
Valid=Valid, ...
9096
Offsets=Offsets, ...
9197
Values=Values ...
@@ -95,6 +101,7 @@
95101
%% Multi-level list (List<List<Float64>>)
96102
Type = arrow.list(arrow.list(arrow.float64()));
97103
NumElements = int64(2);
104+
NumNulls = int64(0);
98105
Valid = true(1, NumElements);
99106
Offsets = arrow.array(int32([0, 1, 3]));
100107
Values = TestArrowArray.SingleLevelList.ArrowArray;
@@ -107,6 +114,7 @@
107114
Properties=struct(...
108115
Type=Type, ...
109116
NumElements=NumElements, ...
117+
NumNulls=NumNulls, ...
110118
Valid=Valid, ...
111119
Offsets=Offsets, ...
112120
Values=Values ...

matlab/test/arrow/array/tStringArray.m

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -279,5 +279,27 @@ function TestIsEqualFalse(tc)
279279
% Test supplying more than two arrays to isequal
280280
tc.verifyFalse(isequal(array1, array1, array3, array4, array5));
281281
end
282+
283+
function TestNumNulls(testCase)
284+
% Verify the NumNulls property returns correct value.
285+
286+
% array1 has 0 null values.
287+
data1 = ["A"; "B"; "C"; "D"; "E"; "F"];
288+
array1 = testCase.ArrowArrayConstructorFcn(data1);
289+
testCase.verifyEqual(array1.NumNulls, int64(0));
290+
291+
% array2 has 2 null values.
292+
array2 = testCase.ArrowArrayConstructorFcn(data1, Valid=[1 2 3 4]);
293+
testCase.verifyEqual(array2.NumNulls, int64(2));
294+
end
295+
296+
function TestNumNullsNoSetter(testCase)
297+
% Verify the NumNulls property is read-only.
298+
299+
data = ["A"; "B"; "C"; missing; "D"; "E"; "F"];
300+
array = testCase.ArrowArrayConstructorFcn(data);
301+
fcn = @() setfield(array, "NumNulls", 1);
302+
testCase.verifyError(fcn, "MATLAB:class:SetProhibited");
303+
end
282304
end
283305
end

matlab/test/arrow/array/tStructArray.m

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,26 @@ function NumElementsNoSetter(tc)
142142
tc.verifyError(fcn, "MATLAB:class:SetProhibited");
143143
end
144144

145+
function NumNulls(tc)
146+
% Verify the NumNulls property.
147+
import arrow.array.StructArray
148+
149+
array1 = StructArray.fromArrays(tc.Float64Array, tc.StringArray);
150+
tc.verifyEqual(array1.NumNulls, int64(0));
151+
152+
array2 = StructArray.fromArrays(tc.Float64Array, tc.StringArray, Valid=[1 3 5]);
153+
tc.verifyEqual(array2.NumNulls, int64(2));
154+
end
155+
156+
function NumNullsNoSetter(tc)
157+
% Verify the NumNulls property is read-only.
158+
import arrow.array.StructArray
159+
160+
array = StructArray.fromArrays(tc.Float64Array, tc.StringArray);
161+
fcn = @() setfield(array, "NumNulls", 1);
162+
tc.verifyError(fcn, "MATLAB:class:SetProhibited");
163+
end
164+
145165
function Type(tc)
146166
% Verify the Type property is set to the expected value.
147167
import arrow.array.StructArray

0 commit comments

Comments
 (0)