From b156de97a36013719beee02fc6c912f8a7a1ba3d Mon Sep 17 00:00:00 2001 From: Ivan Hristov <35896427+IvanHristov98@users.noreply.github.com> Date: Fri, 4 Jul 2025 12:24:03 +0300 Subject: [PATCH] Implement UNIQUE function (#5) * Implement UNIQUE function * Reduce memory footprint --------- Co-authored-by: Ivan Hristov --- calc.go | 146 +++++++++++++++++++++++++++++++++++++++++++++++++++ calc_test.go | 81 ++++++++++++++++++++++++++++ 2 files changed, 227 insertions(+) diff --git a/calc.go b/calc.go index c36e500942..e69260c27c 100644 --- a/calc.go +++ b/calc.go @@ -785,6 +785,7 @@ type formulaFuncs struct { // TYPE // UNICHAR // UNICODE +// UNIQUE // UPPER // VALUE // VALUETOTEXT @@ -14439,6 +14440,151 @@ func (fn *formulaFuncs) UNICODE(argsList *list.List) formulaArg { return fn.code("UNICODE", argsList) } +// UNIQUE function returns a list of unique values in a list or range. +// For syntax refer to +// https://support.microsoft.com/en-us/office/unique-function-c5ab87fd-30a3-4ce9-9d1a-40204fb85e1e. +func (fn *formulaFuncs) UNIQUE(argsList *list.List) formulaArg { + args, errArg := getFormulaUniqueArgs(argsList) + if errArg != nil { + return *errArg + } + + if args.byColumn { + args.cellRange, args.cols, args.rows = transposeFormulaArgsList(args.cellRange, args.cols, args.rows) + } + + counts := map[string]int{} + + for i := 0; i < len(args.cellRange); i += args.cols { + key := concatValues(args.cellRange[i : i+args.cols]) + + if _, ok := counts[key]; !ok { + counts[key] = 0 + } + counts[key]++ + } + + uniqueAxes := [][]formulaArg{} + + for i := 0; i < len(args.cellRange); i += args.cols { + key := concatValues(args.cellRange[i : i+args.cols]) + + if (args.exactlyOnce && counts[key] == 1) || (!args.exactlyOnce && counts[key] >= 1) { + uniqueAxes = append(uniqueAxes, args.cellRange[i:i+args.cols]) + } + delete(counts, key) + } + + if args.byColumn { + uniqueAxes = transposeFormulaArgsMatrix(uniqueAxes) + } + + return newMatrixFormulaArg(uniqueAxes) +} + +func transposeFormulaArgsMatrix(args [][]formulaArg) [][]formulaArg { + if len(args) == 0 { + return args + } + + transposedArgs := make([][]formulaArg, len(args[0])) + + for i := 0; i < len(args[0]); i++ { + transposedArgs[i] = make([]formulaArg, len(args)) + } + + for i := 0; i < len(args); i++ { + for j := 0; j < len(args[i]); j++ { + transposedArgs[j][i] = args[i][j] + } + } + + return transposedArgs +} + +func transposeFormulaArgsList(args []formulaArg, cols, rows int) ([]formulaArg, int, int) { + transposedArgs := make([]formulaArg, len(args)) + + for i := 0; i < rows; i++ { + for j := 0; j < cols; j++ { + transposedArgs[j*rows+i] = args[i*cols+j] + } + } + return transposedArgs, rows, cols +} + +func concatValues(args []formulaArg) string { + val := "" + for _, arg := range args { + // Call to Value is cheap. + val += arg.Value() + } + return val +} + +type uniqueArgs struct { + cellRange []formulaArg + cols int + rows int + byColumn bool + exactlyOnce bool +} + +func getFormulaUniqueArgs(argsList *list.List) (uniqueArgs, *formulaArg) { + res := uniqueArgs{} + + argsLen := argsList.Len() + if argsLen == 0 { + errArg := newErrorFormulaArg(formulaErrorVALUE, "UNIQUE requires at least 1 argument") + return res, &errArg + } + + if argsLen > 3 { + msg := fmt.Sprintf("UNIQUE takes at most 3 arguments, received %d arguments", argsLen) + errArg := newErrorFormulaArg(formulaErrorVALUE, msg) + + return res, &errArg + } + + firstArg := argsList.Front() + res.cellRange = firstArg.Value.(formulaArg).ToList() + if len(res.cellRange) == 0 { + errArg := newErrorFormulaArg(formulaErrorVALUE, "missing first argument to UNIQUE") + return res, &errArg + } + if res.cellRange[0].Type == ArgError { + return res, &res.cellRange[0] + } + + rmin, rmax := calcColsRowsMinMax(false, argsList) + cmin, cmax := calcColsRowsMinMax(true, argsList) + res.cols, res.rows = cmax-cmin+1, rmax-rmin+1 + + secondArg := firstArg.Next() + if secondArg == nil { + return res, nil + } + + argByColumn := secondArg.Value.(formulaArg).ToBool() + if argByColumn.Type == ArgError { + return res, &argByColumn + } + res.byColumn = (argByColumn.Value() == "TRUE") + + thirdArg := secondArg.Next() + if thirdArg == nil { + return res, nil + } + + argExactlyOnce := thirdArg.Value.(formulaArg).ToBool() + if argExactlyOnce.Type == ArgError { + return res, &argExactlyOnce + } + res.exactlyOnce = (argExactlyOnce.Value() == "TRUE") + + return res, nil +} + // UPPER converts all characters in a supplied text string to upper case. The // syntax of the function is: // diff --git a/calc_test.go b/calc_test.go index 4aef37094b..b820f17535 100644 --- a/calc_test.go +++ b/calc_test.go @@ -1914,6 +1914,11 @@ func TestCalcCellValue(t *testing.T) { "UNICODE(\"alpha\")": "97", "UNICODE(\"?\")": "63", "UNICODE(\"3\")": "51", + // UNIQUE + "TEXTJOIN(\",\", TRUE, UNIQUE(D2:D9))": "Jan,Feb", + "TEXTJOIN(\",\", TRUE, UNIQUE(D2:D9, FALSE, FALSE))": "Jan,Feb", + "TEXTJOIN(\",\", TRUE, UNIQUE(E2:E9, FALSE, FALSE))": "North 1,North 2,South 1,South 2", + "TEXTJOIN(\",\", TRUE, UNIQUE(D2:D9, FALSE, TRUE))": "", // UPPER "UPPER(\"test\")": "TEST", "UPPER(\"TEST\")": "TEST", @@ -5132,6 +5137,82 @@ func TestCalcCOVAR(t *testing.T) { } } +func TestCalcUniqueExactlyOnce(t *testing.T) { + cellData := [][]interface{}{ + {"Customer name"}, + {"Fife, Grant"}, + {"Pruitt, Barbara"}, + {"Horn, Frances"}, + {"Barrett, Alicia"}, + {"Barrett, Alicia"}, + {"Larson, Lynn"}, + {"Pruitt, Barbara"}, + {"Snook, Anthony"}, + {"Snook, Anthony"}, + {"Horn, Frances"}, + {"Brown, Charity"}, + } + f := prepareCalcData(cellData) + + formulaList := map[string]string{ + "TEXTJOIN(\":\", TRUE, UNIQUE(A2:A12))": "Fife, Grant:Pruitt, Barbara:Horn, Frances:Barrett, Alicia:Larson, Lynn:Snook, Anthony:Brown, Charity", + "TEXTJOIN(\":\", TRUE, UNIQUE(A2:A12,FALSE,TRUE))": "Fife, Grant:Larson, Lynn:Brown, Charity", + "TEXTJOIN(\":\", TRUE, UNIQUE(A2:A12,FALSE,FALSE))": "Fife, Grant:Pruitt, Barbara:Horn, Frances:Barrett, Alicia:Larson, Lynn:Snook, Anthony:Brown, Charity", + } + for formula, expected := range formulaList { + assert.NoError(t, f.SetCellFormula("Sheet1", "C1", formula)) + result, err := f.CalcCellValue("Sheet1", "C1") + assert.NoError(t, err, formula) + assert.Equal(t, expected, result, formula) + } +} + +func TestCalcUniqueMultiColumn(t *testing.T) { + cellData := [][]interface{}{ + {"Player name", "Gender", "Nickname"}, + {"Tom", "M", "Tom"}, + {"Fred", "M", "Fred"}, + {"Amy", "F", "Amy"}, + {"John", "M", "John"}, + {"Malicia", "F", "Malicia"}, + {"Fred", "M", "Fred"}, + } + f := prepareCalcData(cellData) + + formulaList := map[string]string{ + "TEXTJOIN(\":\", TRUE, UNIQUE(A2:C7))": "Tom:M:Tom:Fred:M:Fred:Amy:F:Amy:John:M:John:Malicia:F:Malicia", + "TEXTJOIN(\":\", TRUE, UNIQUE(A2:C7,TRUE))": "Tom:M:Fred:M:Amy:F:John:M:Malicia:F:Fred:M", + "TEXTJOIN(\":\", TRUE, UNIQUE(A2:C7,TRUE, TRUE))": "M:M:F:M:F:M", + } + for formula, expected := range formulaList { + assert.NoError(t, f.SetCellFormula("Sheet1", "C1", formula)) + result, err := f.CalcCellValue("Sheet1", "C1") + assert.NoError(t, err, formula) + assert.Equal(t, expected, result, formula) + } +} + +func TestCalcUniqueErrors(t *testing.T) { + cellData := [][]interface{}{ + {"Player name", "Gender", "Nickname"}, + {"Tom", "M", "Tom"}, + {"Fred", "M", "Fred"}, + } + f := prepareCalcData(cellData) + formulaList := map[string]string{ + "TEXTJOIN(\":\", TRUE, UNIQUE())": "#VALUE!", + "TEXTJOIN(\":\", TRUE, UNIQUE(1, 2, 3, 4))": "#VALUE!", + "TEXTJOIN(\":\", TRUE, UNIQUE(A2:A3, \"Hello\"))": "#VALUE!", + "TEXTJOIN(\":\", TRUE, UNIQUE(A2:A3, TRUE, \"Hello\"))": "#VALUE!", + } + for formula, expected := range formulaList { + assert.NoError(t, f.SetCellFormula("Sheet1", "C1", formula)) + result, err := f.CalcCellValue("Sheet1", "C1") + assert.Error(t, err, formula) + assert.Equal(t, expected, result, formula) + } +} + func TestCalcDatabase(t *testing.T) { cellData := [][]interface{}{ {"Tree", "Height", "Age", "Yield", "Profit", "Height"},