Skip to content

Commit 5b3af43

Browse files
tabulate: fix categorical and string edge cases
1 parent e0929d2 commit 5b3af43

File tree

1 file changed

+63
-31
lines changed

1 file changed

+63
-31
lines changed

inst/tabulate.m

Lines changed: 63 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -51,9 +51,14 @@
5151

5252
function tbl = tabulate (x)
5353

54+
## Check if categorical type is available
55+
has_categorical = (exist ('iscategorical', 'builtin') == 5) ...
56+
|| (exist ('iscategorical', 'file') == 2);
57+
5458
## Check input for being numeric, string, categorical, cell array or logical
5559
if (! (isnumeric (x) && (isvector (x) || isempty (x))) && ! (iscellstr (x)
56-
&& isvector (x)) && ! ischar (x) && ! iscategorical (x)
60+
&& isvector (x)) && ! ischar (x) ...
61+
&& ! (has_categorical && iscategorical (x))
5762
&& ! isa (x, "string") && ! islogical (x))
5863
error (strcat ("tabulate: X must be either a numeric vector, a", ...
5964
" vector cell array of strings, a character matrix,", ...
@@ -65,7 +70,7 @@
6570
x = x(:);
6671
endif
6772

68-
if (iscategorical (x))
73+
if (has_categorical && iscategorical (x))
6974
## For categorical, we report ALL categories, even if count is 0
7075
vals = categories (x);
7176
nc = length (vals);
@@ -84,7 +89,12 @@
8489
endif
8590

8691
total = sum (counts);
87-
percents = 100 * counts ./ total;
92+
if (total == 0)
93+
percents = zeros (size (counts));
94+
else
95+
percents = 100 * counts ./ total;
96+
endif
97+
8898

8999
## Output format: Cell array
90100
out = cell (length (vals), 3);
@@ -96,40 +106,21 @@
96106
## Handle string arrays
97107
x(ismissing (x)) = [];
98108

99-
## Convert to cellstr and use grp2idx which is robust
100-
[idx, vals] = grp2idx (cellstr (x));
101-
102-
if (isempty (idx))
103-
counts = [];
104-
percents = [];
109+
if (isempty (x))
110+
out = cell (0, 3);
105111
else
106-
counts = accumarray (idx, 1);
107-
total = sum (counts);
108-
percents = 100 * counts ./ total;
109-
endif
110-
111-
## Output format: Cell array
112-
vals_cell = vals;
113-
out = cell (length (vals_cell), 3);
114-
out(:,1) = vals_cell;
115-
out(:,2) = num2cell (counts);
116-
out(:,3) = num2cell (percents);
112+
## Convert to cellstr and use grp2idx which is robust
113+
[idx, vals] = grp2idx (cellstr (x));
117114

118-
if (isempty (idx))
119-
counts = [];
120-
percents = [];
121-
else
122115
counts = accumarray (idx, 1);
123116
total = sum (counts);
124117
percents = 100 * counts ./ total;
125-
endif
126118

127-
## Output format: Cell array
128-
vals_cell = vals;
129-
out = cell (length (vals_cell), 3);
130-
out(:,1) = vals_cell;
131-
out(:,2) = num2cell (counts);
132-
out(:,3) = num2cell (percents);
119+
out = cell (length (vals), 3);
120+
out(:,1) = vals;
121+
out(:,2) = num2cell (counts);
122+
out(:,3) = num2cell (percents);
123+
endif
133124

134125
elseif (islogical (x))
135126
## Handle logical arrays
@@ -407,3 +398,44 @@
407398
%!error<tabulate: X must be either a numeric vector> tabulate ({1, 2, 3, 4})
408399
%!error<tabulate: X must be either a numeric vector> ...
409400
%! tabulate ({"a", "b"; "a", "c"})
401+
402+
%!test
403+
%! ## Categorical: all values undefined → zero counts and zero percents
404+
%! if (! exist ('categorical', 'file'))
405+
%! return;
406+
%! endif
407+
%! x = categorical ({'a','b','c'});
408+
%! x(:) = categorical (missing);
409+
%! tbl = tabulate (x);
410+
%! assert (iscell (tbl));
411+
%! assert ([tbl{:,2}]', [0; 0; 0]);
412+
%! assert ([tbl{:,3}]', [0; 0; 0]);
413+
414+
%!test
415+
%! ## Categorical with defined categories but no data
416+
%! if (! exist ('categorical', 'file'))
417+
%! return;
418+
%! endif
419+
%! x = categorical ({}, {'low','med','high'});
420+
%! tbl = tabulate (x);
421+
%! assert (iscell (tbl));
422+
%! assert ([tbl{:,2}]', [0; 0; 0]);
423+
%! assert ([tbl{:,3}]', [0; 0; 0]);
424+
425+
%!test
426+
%! ## String array: all values missing → empty table
427+
%! if (! exist ('string', 'file'))
428+
%! return;
429+
%! endif
430+
%! x = string ({'a','b'});
431+
%! x(:) = missing;
432+
%! tbl = tabulate (x);
433+
%! assert (iscell (tbl));
434+
%! assert (isempty (tbl));
435+
436+
%!test
437+
%! if (! exist ('categorical', 'file')), return; endif
438+
%! x = categorical ({'a'; 'b'; 'a'});
439+
%! tbl = tabulate (x);
440+
%! assert ([tbl{:,2}]', [2; 1]);
441+

0 commit comments

Comments
 (0)