Skip to content

Commit 063c439

Browse files
authored
Merge pull request ClickHouse#80162 from Blargian/array_functions_part_4
Docs: Array functions source code documentation - part 4
2 parents 3cb24f0 + 43a314c commit 063c439

9 files changed

+371
-20
lines changed

src/Functions/array/arrayDifference.cpp

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,28 @@ using FunctionArrayDifference = FunctionArrayMapped<ArrayDifferenceImpl, NameArr
166166

167167
REGISTER_FUNCTION(ArrayDifference)
168168
{
169+
FunctionDocumentation::Description description = R"(
170+
Calculates an array of differences between adjacent array elements.
171+
The first element of the result array will be 0, the second `arr[1] - arr[0]`, the third `arr[2] - arr[1]`, etc.
172+
The type of elements in the result array are determined by the type inference rules for subtraction (e.g. `UInt8` - `UInt8` = `Int16`).
173+
)";
174+
FunctionDocumentation::Syntax syntax = "arrayDifference(arr)";
175+
FunctionDocumentation::Arguments argument = {
176+
{"arr", "Array for which to calculate differences between adjacent elements. [`Array(T)`](/sql-reference/data-types/array)."},
177+
};
178+
FunctionDocumentation::ReturnedValue returned_value = "Returns an array of differences between adjacent array elements. [`UInt*`](/sql-reference/data-types/int-uint#integer-ranges), [`Int*`](/sql-reference/data-types/int-uint#integer-ranges), [`Float*`](/sql-reference/data-types/float).";
179+
FunctionDocumentation::Examples examples = {
180+
{"Usage example", "SELECT arrayDifference([1, 2, 3, 4]);", "[0,1,1,1]"},
181+
{"Example of overflow due to result type Int64", "SELECT arrayDifference([0, 10000000000000000000]);", R"(
182+
┌─arrayDifference([0, 10000000000000000000])─┐
183+
│ [0,-8446744073709551616] │
184+
└────────────────────────────────────────────┘
185+
)"}
186+
};
187+
FunctionDocumentation::IntroducedIn introduced_in = {1, 1};
188+
FunctionDocumentation::Category category = FunctionDocumentation::Category::Array;
189+
FunctionDocumentation documentation = {description, syntax, argument, returned_value, examples, introduced_in, category};
190+
169191
factory.registerFunction<FunctionArrayDifference>();
170192
}
171193

src/Functions/array/arrayDistinct.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -289,7 +289,18 @@ void FunctionArrayDistinct::executeHashed(
289289

290290
REGISTER_FUNCTION(ArrayDistinct)
291291
{
292-
factory.registerFunction<FunctionArrayDistinct>();
292+
FunctionDocumentation::Description description = "Returns an array containing only the distinct elements of an array.";
293+
FunctionDocumentation::Syntax syntax = "arrayDistinct(arr)";
294+
FunctionDocumentation::Arguments argument = {
295+
{"arr", "Array for which to extract distinct elements. [`Array(T)`](/sql-reference/data-types/array)."},
296+
};
297+
FunctionDocumentation::ReturnedValue returned_value = "Returns an array containing the distinct elements. [`Array(T)`](/sql-reference/data-types/array).";
298+
FunctionDocumentation::Examples examples = {{"Usage example", "SELECT arrayDistinct([1, 2, 2, 3, 1]);", "[1,2,3]"}};
299+
FunctionDocumentation::IntroducedIn introduced_in = {1, 1};
300+
FunctionDocumentation::Category category = FunctionDocumentation::Category::Array;
301+
FunctionDocumentation documentation = {description, syntax, argument, returned_value, examples, introduced_in, category};
302+
303+
factory.registerFunction<FunctionArrayDistinct>(documentation);
293304
}
294305

295306
}

src/Functions/array/arrayEnumerateDense.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,17 @@ class FunctionArrayEnumerateDense : public FunctionArrayEnumerateExtended<Functi
1616

1717
REGISTER_FUNCTION(ArrayEnumerateDense)
1818
{
19+
FunctionDocumentation::Description description = "Returns an array of the same size as the source array, indicating where each element first appears in the source array.";
20+
FunctionDocumentation::Syntax syntax = "arrayEnumerateDense(arr)";
21+
FunctionDocumentation::Arguments arguments = {
22+
{"arr", "The array to enumerate. [`Array(T)`](/sql-reference/data-types/array)."}
23+
};
24+
FunctionDocumentation::ReturnedValue returned_value = "Returns an array of the same size as `arr`, indicating where each element first appears in the source array. [`Array(T)`](/sql-reference/data-types/array).";
25+
FunctionDocumentation::Examples examples = {{"Usage example", "SELECT arrayEnumerateDense([10, 20, 10, 30])", "[1,2,1,3]"}};
26+
FunctionDocumentation::IntroducedIn introduced_in = {18, 12};
27+
FunctionDocumentation::Category category = FunctionDocumentation::Category::Array;
28+
FunctionDocumentation documentation = {description, syntax, arguments, returned_value, examples, introduced_in, category};
29+
1930
factory.registerFunction<FunctionArrayEnumerateDense>();
2031
}
2132

src/Functions/array/arrayEnumerateDenseRanked.cpp

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,45 @@ class FunctionArrayEnumerateDenseRanked : public FunctionArrayEnumerateRankedExt
1616

1717
REGISTER_FUNCTION(ArrayEnumerateDenseRanked)
1818
{
19+
FunctionDocumentation::Description description = "Returns an array the same size as the source array, indicating where each element first appears in the source array. It allows for enumeration of a multidimensional array with the ability to specify how deep to look inside the array.";
20+
FunctionDocumentation::Syntax syntax = "arrayEnumerateDenseRanked(clear_depth, arr, max_array_depth)";
21+
FunctionDocumentation::Arguments arguments = {
22+
{"clear_depth", "Enumerate elements at the specified level separately. Positive [Integer](../data-types/int-uint.md) less than or equal to `max_arr_depth`."},
23+
{"arr", "N-dimensional array to enumerate. [`Array(T)`](/sql-reference/data-types/array)."},
24+
{"max_array_depth", "The maximum effective depth. Positive [(U)Int*](../data-types/int-uint.md) less than or equal to the depth of `arr`."},
25+
};
26+
FunctionDocumentation::ReturnedValue returned_value = "Returns an array denoting where each element first appears in the source array. [Array](/sql-reference/data-types/array).";
27+
FunctionDocumentation::Examples examples = {
28+
{"Basic usage", R"(
29+
With `clear_depth=1` and `max_array_depth=1`, the result is identical to what [arrayEnumerateDense](#arrayenumeratedense) would give.
30+
31+
```sql
32+
SELECT arrayEnumerateDenseRanked(1,[10, 20, 10, 30],1);
33+
```
34+
)", "[1,2,1,3]"},
35+
{"Usage with a multidimensional array", R"(
36+
In this example, `arrayEnumerateDenseRanked` is used to obtain an array indicating, for each element of the multidimensional array, what its position is among elements of the same value.
37+
For the first row of the passed array,`[10,10,30,20]`, the corresponding first row of the result is `[1,1,2,3]`, indicating that `10` is the first number encountered in position 1 and 2, `30` the second number encountered in position 3 and `20` is the third number encountered in position 4.
38+
For the second row, `[40, 50, 10, 30]`, the corresponding second row of the result is `[4,5,1,2]`, indicating that `40` and `50` are the fourth and fifth numbers encountered in position 1 and 2 of that row, that another `10` (the first encountered number) is in position 3 and `30` (the second number encountered) is in the last position.
39+
40+
```sql
41+
SELECT arrayEnumerateDenseRanked(1,[[10,10,30,20],[40,50,10,30]],2);
42+
```
43+
)", "[[1,1,2,3],[4,5,1,2]]"
44+
},
45+
{"Example with increased clear_depth", R"(
46+
Changing `clear_depth=2` results in the enumeration occurring separately for each row anew.
47+
48+
```sql
49+
SELECT arrayEnumerateDenseRanked(2,[[10,10,30,20],[40,50,10,30]],2);
50+
```
51+
)", "[[1,1,2,3],[1,2,3,4]]"
52+
}
53+
};
54+
FunctionDocumentation::IntroducedIn introduced_in = {20, 1};
55+
FunctionDocumentation::Category category = FunctionDocumentation::Category::Array;
56+
FunctionDocumentation documentation = {description, syntax, arguments, returned_value, examples, introduced_in, category};
57+
1958
factory.registerFunction<FunctionArrayEnumerateDenseRanked>();
2059
}
2160

src/Functions/array/arrayIntersect.cpp

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -766,9 +766,69 @@ using ArraySymmetricDifference = FunctionArrayIntersect<ArrayModeSymmetricDiffer
766766

767767
REGISTER_FUNCTION(ArrayIntersect)
768768
{
769-
factory.registerFunction<ArrayIntersect>();
770-
factory.registerFunction<ArrayUnion>();
771-
factory.registerFunction<ArraySymmetricDifference>();
769+
FunctionDocumentation::Description intersect_description = "Takes multiple arrays and returns an array with elements which are present in all source arrays. The result contains only unique values.";
770+
FunctionDocumentation::Syntax intersect_syntax = "arrayIntersect(arr, arr1, ..., arrN)";
771+
FunctionDocumentation::Arguments intersect_argument = {{"arrN", "N arrays from which to make the new array. [`Array(T)`](/sql-reference/data-types/array)."}};
772+
FunctionDocumentation::ReturnedValue intersect_returned_value = "Returns an array with distinct elements that are present in all N arrays. [`Array(T)`](/sql-reference/data-types/array).";
773+
FunctionDocumentation::Examples intersect_example = {{"Usage example",
774+
R"(SELECT
775+
arrayIntersect([1, 2], [1, 3], [2, 3]) AS empty_intersection,
776+
arrayIntersect([1, 2], [1, 3], [1, 4]) AS non_empty_intersection
777+
)", R"(
778+
┌─non_empty_intersection─┬─empty_intersection─┐
779+
│ [] │ [1] │
780+
└────────────────────────┴────────────────────┘
781+
)"}};
782+
FunctionDocumentation::IntroducedIn intersect_introduced_in = {1, 1};
783+
FunctionDocumentation::Category intersect_category = FunctionDocumentation::Category::Array;
784+
FunctionDocumentation intersect_documentation = {intersect_description, intersect_syntax, intersect_argument, intersect_returned_value, intersect_example, intersect_introduced_in, intersect_category};
785+
786+
factory.registerFunction<ArrayIntersect>(intersect_documentation);
787+
788+
FunctionDocumentation::Description union_description = "Takes multiple arrays and returns an array which contains all elements that are present in one of the source arrays.The result contains only unique values.";
789+
FunctionDocumentation::Syntax union_syntax = "arrayUnion(arr1, arr2, ..., arrN)";
790+
FunctionDocumentation::Arguments union_argument = {{"arrN", "N arrays from which to make the new array. [`Array(T)`](/sql-reference/data-types/array)."}};
791+
FunctionDocumentation::ReturnedValue union_returned_value = "Returns an array with distinct elements from the source arrays. [`Array(T)`](/sql-reference/data-types/array).";
792+
FunctionDocumentation::Examples union_example = {{"Usage example",
793+
R"(SELECT
794+
arrayUnion([-2, 1], [10, 1], [-2], []) as num_example,
795+
arrayUnion(['hi'], [], ['hello', 'hi']) as str_example,
796+
arrayUnion([1, 3, NULL], [2, 3, NULL]) as null_example
797+
)",R"(
798+
┌─num_example─┬─str_example────┬─null_example─┐
799+
│ [10,-2,1] │ ['hello','hi'] │ [3,2,1,NULL] │
800+
└─────────────┴────────────────┴──────────────┘
801+
)"}};
802+
FunctionDocumentation::IntroducedIn union_introduced_in = {24, 10};
803+
FunctionDocumentation::Category union_category = FunctionDocumentation::Category::Array;
804+
FunctionDocumentation union_documentation = {union_description, union_syntax, union_argument, union_returned_value, union_example, union_introduced_in, union_category};
805+
806+
factory.registerFunction<ArrayUnion>(union_documentation);
807+
808+
FunctionDocumentation::Description symdiff_description = R"(Takes multiple arrays and returns an array with elements that are not present in all source arrays. The result contains only unique values.
809+
810+
:::note
811+
The symmetric difference of _more than two sets_ is [mathematically defined](https://en.wikipedia.org/wiki/Symmetric_difference#n-ary_symmetric_difference)
812+
as the set of all input elements which occur in an odd number of input sets.
813+
In contrast, function `arraySymmetricDifference` simply returns the set of input elements which do not occur in all input sets.
814+
:::
815+
)";
816+
FunctionDocumentation::Syntax symdiff_syntax = "arraySymmetricDifference(arr1, arr2, ... , arrN)";
817+
FunctionDocumentation::Arguments symdiff_argument = {{"arrN", "N arrays from which to make the new array. [`Array(T)`](/sql-reference/data-types/array)."}};
818+
FunctionDocumentation::ReturnedValue symdiff_returned_value = "Returns an array of distinct elements not present in all source arrays. [`Array(T)`](/sql-reference/data-types/array).";
819+
FunctionDocumentation::Examples symdiff_example = {{"Usage example", R"(SELECT
820+
arraySymmetricDifference([1, 2], [1, 2], [1, 2]) AS empty_symmetric_difference,
821+
arraySymmetricDifference([1, 2], [1, 2], [1, 3]) AS non_empty_symmetric_difference;
822+
)", R"(
823+
┌─empty_symmetric_difference─┬─non_empty_symmetric_difference─┐
824+
│ [] │ [3] │
825+
└────────────────────────────┴────────────────────────────────┘
826+
)"}};
827+
FunctionDocumentation::IntroducedIn symdiff_introduced_in = {25, 4};
828+
FunctionDocumentation::Category symdiff_category = FunctionDocumentation::Category::Array;
829+
FunctionDocumentation symdiff_documentation = {symdiff_description, symdiff_syntax, symdiff_argument, symdiff_returned_value, symdiff_example, symdiff_introduced_in, symdiff_category};
830+
831+
factory.registerFunction<ArraySymmetricDifference>(symdiff_documentation);
772832
}
773833

774834
}

src/Functions/array/arrayJoin.cpp

Lines changed: 183 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,189 @@ class FunctionArrayJoin : public IFunction
7676

7777
REGISTER_FUNCTION(ArrayJoin)
7878
{
79-
factory.registerFunction<FunctionArrayJoin>();
79+
FunctionDocumentation::Description description = R"(
80+
The `arrayJoin` function takes a row that contains an array and unfolds it, generating multiple rows – one for each element in the array.
81+
This is in contrast to Regular Functions in ClickHouse which map input values to output values within the same row,
82+
and Aggregate Functions which take a group of rows and "compress" or "reduce" them into a single summary row
83+
(or a single value within a summary row if used with `GROUP BY`).
84+
85+
All the values in the columns are simply copied, except the values in the column where this function is applied;
86+
these are replaced with the corresponding array value.
87+
)";
88+
FunctionDocumentation::Syntax syntax = "arrayJoin(arr)";
89+
FunctionDocumentation::Arguments arguments = {
90+
{"arr", "An array to unfold. [`Array(T)`](/sql-reference/data-types/array)."}
91+
};
92+
FunctionDocumentation::ReturnedValue returned_value = "Returns a set of rows unfolded from `arr`.";
93+
FunctionDocumentation::Examples examples = {
94+
{"Basic usage", R"(SELECT arrayJoin([1, 2, 3] AS src) AS dst, 'Hello', src)", R"(
95+
┌─dst─┬─\'Hello\'─┬─src─────┐
96+
│ 1 │ Hello │ [1,2,3] │
97+
│ 2 │ Hello │ [1,2,3] │
98+
│ 3 │ Hello │ [1,2,3] │
99+
└─────┴───────────┴─────────┘
100+
)"},
101+
{"arrayJoin affects all sections of the query", R"(
102+
The `arrayJoin` function affects all sections of the query, including the `WHERE` section. Notice the result 2, even though the subquery returned 1 row.
103+
104+
```sql
105+
SELECT sum(1) AS impressions
106+
FROM
107+
(
108+
SELECT ['Istanbul', 'Berlin', 'Bobruisk'] AS cities
109+
)
110+
WHERE arrayJoin(cities) IN ['Istanbul', 'Berlin'];
111+
```
112+
)", R"(
113+
┌─impressions─┐
114+
│ 2 │
115+
└─────────────┘
116+
)"},
117+
{"Using multiple arrayJoin functions", R"(
118+
A query can use multiple `arrayJoin` functions. In this case, the transformation is performed multiple times and the rows are multiplied.
119+
120+
```sql
121+
SELECT
122+
sum(1) AS impressions,
123+
arrayJoin(cities) AS city,
124+
arrayJoin(browsers) AS browser
125+
FROM
126+
(
127+
SELECT
128+
['Istanbul', 'Berlin', 'Bobruisk'] AS cities,
129+
['Firefox', 'Chrome', 'Chrome'] AS browsers
130+
)
131+
GROUP BY
132+
2,
133+
3
134+
```
135+
)", R"(
136+
┌─impressions─┬─city─────┬─browser─┐
137+
│ 2 │ Istanbul │ Chrome │
138+
│ 1 │ Istanbul │ Firefox │
139+
│ 2 │ Berlin │ Chrome │
140+
│ 1 │ Berlin │ Firefox │
141+
│ 2 │ Bobruisk │ Chrome │
142+
│ 1 │ Bobruisk │ Firefox │
143+
└─────────────┴──────────┴─────────┘
144+
)"
145+
},
146+
{"Unexpected results due to optimizations", R"(
147+
Using multiple `arrayJoin` with the same expression may not produce the expected result due to optimizations.
148+
For these cases, consider modifying the repeated array expression with extra operations that do not affect join result.
149+
e.g. `arrayJoin(arraySort(arr))`, `arrayJoin(arrayConcat(arr, []))`
150+
151+
```sql
152+
SELECT
153+
arrayJoin(dice) as first_throw,
154+
/* arrayJoin(dice) as second_throw */ -- is technically correct, but will annihilate result set
155+
arrayJoin(arrayConcat(dice, [])) as second_throw -- intentionally changed expression to force re-evaluation
156+
FROM (
157+
SELECT [1, 2, 3, 4, 5, 6] as dice
158+
);
159+
```
160+
)", R"(
161+
┌─first_throw─┬─second_throw─┐
162+
│ 1 │ 1 │
163+
│ 1 │ 2 │
164+
│ 1 │ 3 │
165+
│ 1 │ 4 │
166+
│ 1 │ 5 │
167+
│ 1 │ 6 │
168+
│ 2 │ 1 │
169+
│ 2 │ 2 │
170+
│ 2 │ 3 │
171+
│ 2 │ 4 │
172+
│ 2 │ 5 │
173+
│ 2 │ 6 │
174+
│ 3 │ 1 │
175+
│ 3 │ 2 │
176+
│ 3 │ 3 │
177+
│ 3 │ 4 │
178+
│ 3 │ 5 │
179+
│ 3 │ 6 │
180+
│ 4 │ 1 │
181+
│ 4 │ 2 │
182+
│ 4 │ 3 │
183+
│ 4 │ 4 │
184+
│ 4 │ 5 │
185+
│ 4 │ 6 │
186+
│ 5 │ 1 │
187+
│ 5 │ 2 │
188+
│ 5 │ 3 │
189+
│ 5 │ 4 │
190+
│ 5 │ 5 │
191+
│ 5 │ 6 │
192+
│ 6 │ 1 │
193+
│ 6 │ 2 │
194+
│ 6 │ 3 │
195+
│ 6 │ 4 │
196+
│ 6 │ 5 │
197+
│ 6 │ 6 │
198+
└─────────────┴──────────────┘
199+
)"
200+
},
201+
{"Using the ARRAY JOIN syntax", R"(
202+
Note the [`ARRAY JOIN`](../statements/select/array-join.md) syntax in the `SELECT` query below, which provides broader possibilities.
203+
`ARRAY JOIN` allows you to convert multiple arrays with the same number of elements at a time.
204+
205+
```sql
206+
SELECT
207+
sum(1) AS impressions,
208+
city,
209+
browser
210+
FROM
211+
(
212+
SELECT
213+
['Istanbul', 'Berlin', 'Bobruisk'] AS cities,
214+
['Firefox', 'Chrome', 'Chrome'] AS browsers
215+
)
216+
ARRAY JOIN
217+
cities AS city,
218+
browsers AS browser
219+
GROUP BY
220+
2,
221+
3
222+
```
223+
)", R"(
224+
┌─impressions─┬─city─────┬─browser─┐
225+
│ 1 │ Istanbul │ Firefox │
226+
│ 1 │ Berlin │ Chrome │
227+
│ 1 │ Bobruisk │ Chrome │
228+
└─────────────┴──────────┴─────────┘
229+
)"
230+
},
231+
{"Using Tuple", R"(
232+
You can also use [Tuple](../data-types/tuple.md):
233+
234+
```sql
235+
SELECT
236+
sum(1) AS impressions,
237+
(arrayJoin(arrayZip(cities, browsers)) AS t).1 AS city,
238+
t.2 AS browser
239+
FROM
240+
(
241+
SELECT
242+
['Istanbul', 'Berlin', 'Bobruisk'] AS cities,
243+
['Firefox', 'Chrome', 'Chrome'] AS browsers
244+
)
245+
GROUP BY
246+
2,
247+
3
248+
```
249+
)", R"(
250+
┌─impressions─┬─city─────┬─browser─┐
251+
│ 1 │ Istanbul │ Firefox │
252+
│ 1 │ Berlin │ Chrome │
253+
│ 1 │ Bobruisk │ Chrome │
254+
└─────────────┴──────────┴─────────┘
255+
)"
256+
}
257+
};
258+
FunctionDocumentation::IntroducedIn introduced_in = {1, 1};
259+
FunctionDocumentation::Category category = FunctionDocumentation::Category::Array;
260+
FunctionDocumentation documentation = {description, syntax, arguments, returned_value, examples, introduced_in, category};
261+
factory.registerFunction<FunctionArrayJoin>(documentation);
80262
}
81263

82264
}

0 commit comments

Comments
 (0)