Skip to content

Commit 9da84c9

Browse files
committed
Merge branch 'master' of github.com:ClickHouse/ClickHouse
2 parents fa5f1c8 + f60cdb2 commit 9da84c9

File tree

25 files changed

+524
-228
lines changed

25 files changed

+524
-228
lines changed

ci/jobs/scripts/check_style/aspell-ignore/en/aspell-dict.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ CapContains
152152
CapUnion
153153
CapnProto
154154
CapnProtoEnumComparingMode
155+
CardSecondary
155156
CatBoost
156157
CellAreaM
157158
CellAreaRads

ci/praktika/info.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,10 @@ def is_merge_queue_event(self):
113113
def is_push_event(self):
114114
return self.env.EVENT_TYPE == "push"
115115

116+
@property
117+
def is_dispatch_event(self):
118+
return self.env.EVENT_TYPE == "dispatch"
119+
116120
@property
117121
def instance_lifecycle(self):
118122
return self.env.INSTANCE_LIFE_CYCLE

ci/praktika/job.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ def parametrize(
9292
== len(timeout)
9393
== len(provides)
9494
== len(requires)
95-
), f"Parametrization lists must be of the same size [{len(parameter)}, {len(runs_on)}, {len(timeout)}, {len(provides)}, {len(requires)}]"
95+
), f"Parametrization lists for job [{self.name}] must be of the same size [{len(parameter)}, {len(runs_on)}, {len(timeout)}, {len(provides)}, {len(requires)}]"
9696

9797
res = []
9898
for parameter_, runs_on_, timeout_, provides_, requires_ in zip(

ci/praktika/runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ def generate_local_run_environment(workflow, job, pr=None, sha=None):
3636
BRANCH="branch_name",
3737
SHA=sha or Shell.get_output("git rev-parse HEAD"),
3838
PR_NUMBER=pr or -1,
39-
EVENT_TYPE="",
39+
EVENT_TYPE=workflow.event,
4040
JOB_OUTPUT_STREAM="",
4141
EVENT_FILE_PATH="",
4242
CHANGE_URL="",

ci/praktika/validator.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -56,7 +56,7 @@ def validate(cls):
5656
for job in workflow.jobs:
5757
cls.evaluate_check(
5858
isinstance(job, Job.Config),
59-
f"Invalid job type [{job}]",
59+
f"Invalid job type [{job}]: type [{type(job)}]",
6060
workflow.name,
6161
)
6262

@@ -158,7 +158,7 @@ def validate(cls):
158158
artifact.is_s3_artifact()
159159
), f"All artifacts must be of S3 type if enable_cache|enable_html=True, artifact [{artifact.name}], type [{artifact.type}], workflow [{workflow.name}]"
160160

161-
if workflow.dockers:
161+
if workflow.dockers and not workflow.disable_dockers_build:
162162
assert (
163163
Settings.DOCKERHUB_USERNAME
164164
), f"Settings.DOCKERHUB_USERNAME must be provided if workflow has dockers, workflow [{workflow.name}]"

ci/praktika/workflow.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,3 +107,4 @@ class InputConfig:
107107
description: str
108108
is_required: bool
109109
default_value: str
110+
options: Optional[List] = None

ci/praktika/yaml_generator.py

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,14 @@ class Templates:
112112
default: {DEFAULT_VALUE}\
113113
"""
114114

115+
TEMPLATE_OPTIONS_INPUT = """
116+
{NAME}:
117+
description: {DESCRIPTION}
118+
type: choice
119+
options: {OPTIONS}
120+
default: {DEFAULT_VALUE}\
121+
"""
122+
115123
TEMPLATE_SECRET_CONFIG = """\
116124
{SECRET_NAME}:
117125
required: true
@@ -378,12 +386,22 @@ def generate(self):
378386
# for dispatch workflows only
379387
dispatch_inputs = ""
380388
for input_item in self.workflow_config.dispatch_inputs:
381-
dispatch_inputs += YamlGenerator.Templates.TEMPLATE_INPUT.format(
382-
NAME=input_item.name,
383-
DESCRIPTION=input_item.description,
384-
IS_REQUIRED="true" if input_item.is_required else "false",
385-
DEFAULT_VALUE=input_item.default_value or "''",
386-
)
389+
if not input_item.options:
390+
dispatch_inputs += YamlGenerator.Templates.TEMPLATE_INPUT.format(
391+
NAME=input_item.name,
392+
DESCRIPTION=input_item.description,
393+
IS_REQUIRED="true" if input_item.is_required else "false",
394+
DEFAULT_VALUE=input_item.default_value or "''",
395+
)
396+
else:
397+
dispatch_inputs += (
398+
YamlGenerator.Templates.TEMPLATE_OPTIONS_INPUT.format(
399+
NAME=input_item.name,
400+
DESCRIPTION=input_item.description,
401+
OPTIONS=input_item.options,
402+
DEFAULT_VALUE=input_item.default_value or "''",
403+
)
404+
)
387405

388406
if self.workflow_config.event in (
389407
Workflow.Event.PULL_REQUEST,

docs/en/sql-reference/data-types/newjson.md

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,19 @@ slug: /sql-reference/data-types/newjson
88
title: 'JSON Data Type'
99
---
1010

11+
import {CardSecondary} from '@clickhouse/click-ui/bundled';
12+
13+
<CardSecondary
14+
badgeState="success"
15+
badgeText=""
16+
description="Check out our JSON best practice guide for examples, advanced features and considerations for using the JSON type."
17+
icon="book"
18+
infoText="Read more"
19+
infoUrl="/docs/best-practices/use-json-where-appropriate"
20+
title="Looking for a guide?"
21+
/>
22+
<br/>
23+
1124
The `JSON` type stores JavaScript Object Notation (JSON) documents in a single column.
1225

1326
If you want to use the `JSON` type, and for the examples on this page, please use:

docs/en/sql-reference/functions/ip-address-functions.md

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -248,21 +248,22 @@ SELECT IPv6CIDRToRange(toIPv6('2001:0db8:0000:85a3:0000:0000:ac1f:8001'), 32);
248248

249249
## toIPv4 {#toipv4}
250250

251-
Like [`IPv4StringToNum`](#IPv4StringToNum) but takes a string form of IPv4 address and returns value of [IPv4](../data-types/ipv4.md) type.
251+
Converts a string or a UInt32 form of IPv4 address to [IPv4](../data-types/ipv4.md) type.
252+
Similar to [`IPv4StringToNum`](#IPv4StringToNum) and [IPv4NumToString](#IPv4NumToString) functions but it supports both string and unsigned integer data types as input arguments.
252253

253254
**Syntax**
254255

255256
```sql
256-
toIPv4(string)
257+
toIPv4(x)
257258
```
258259

259260
**Arguments**
260261

261-
- `string` — IPv4 address. [String](../data-types/string.md).
262+
- `x` — IPv4 address. [`String`](../data-types/string.md), [`UInt8/16/32`](../data-types/int-uint.md).
262263

263264
**Returned value**
264265

265-
- `string` converted to the IPv4 address. [IPv4](../data-types/ipv4.md).
266+
- IPv4 address. [IPv4](../data-types/ipv4.md).
266267

267268
**Examples**
268269

@@ -298,6 +299,21 @@ Result:
298299
└───────────────────────────────────┴──────────────────────────┘
299300
```
300301

302+
303+
Query:
304+
305+
```sql
306+
SELECT toIPv4(2130706433);
307+
```
308+
309+
Result:
310+
311+
```text
312+
┌─toIPv4(2130706433)─┐
313+
│ 127.0.0.1 │
314+
└────────────────────┘
315+
```
316+
301317
## toIPv4OrDefault {#toipv4ordefault}
302318

303319
Same as `toIPv4`, but if the IPv4 address has an invalid format, it returns `0.0.0.0` (0 IPv4), or the provided IPv4 default.
@@ -412,7 +428,7 @@ Result:
412428
## toIPv6 {#toipv6}
413429

414430
Converts a string or a UInt128 form of IPv6 address to [IPv6](../data-types/ipv6.md) type. For strings, if the IPv6 address has an invalid format, returns an empty value.
415-
Similar to [IPv6StringToNum](#ipv6stringtonum) function, which converts IPv6 address to binary format.
431+
Similar to [IPv6StringToNum](#ipv6stringtonum) and [IPv6NumToString](#ipv6numtostringx) functions, which convert IPv6 address to and from binary format (i.e. `FixedString(16)`).
416432

417433
If the input string contains a valid IPv4 address, then the IPv6 equivalent of the IPv4 address is returned.
418434

@@ -425,7 +441,7 @@ toIPv6(UInt128)
425441

426442
**Argument**
427443

428-
- `string` or `UInt128` — IP address. [String](../data-types/string.md).
444+
- `x` — IP address. [`String`](../data-types/string.md) or [`UInt128`](../data-types/int-uint.md).
429445

430446
**Returned value**
431447

docs/en/sql-reference/functions/splitting-merging-functions.md

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ slug: /sql-reference/functions/splitting-merging-functions
66
title: 'Functions for Splitting Strings'
77
---
88

9+
import DeprecatedBadge from '@theme/badges/DeprecatedBadge';
10+
911
# Functions for Splitting Strings
1012

1113
## splitByChar {#splitbychar}
@@ -347,9 +349,14 @@ Result:
347349

348350
## ngrams {#ngrams}
349351

352+
<DeprecatedBadge/>
353+
354+
350355
Splits a UTF-8 string into n-grams of `ngramsize` symbols.
356+
This function is deprecated. Prefer to use [tokens](#tokens) with the `ngram` tokenizer.
357+
The function might be removed at some point in future.
351358

352-
**Syntax**
359+
**Syntax**
353360

354361
```sql
355362
ngrams(string, ngramsize)
@@ -380,18 +387,23 @@ Result:
380387

381388
## tokens {#tokens}
382389

383-
Splits a string into tokens using non-alphanumeric ASCII characters as separators.
390+
Splits a string into tokens using the given tokenizer.
391+
The default tokenizer uses non-alphanumeric ASCII characters as separators.
384392

385393
**Arguments**
386394

387-
- `input_string` — Any set of bytes represented as the [String](../data-types/string.md) data type object.
395+
- `value` — The input string. [String](../data-types/string.md) or [FixedString](../data-types/fixedstring.md).
396+
- `tokenizer` — The tokenizer to use. Valid arguments are `default`, `ngram`, and `noop`. Optional, if not set explicitly, defaults to `default`. [const String](../data-types/string.md)
397+
- `ngrams` — Only relevant if argument `tokenizer` is `ngram`: An optional parameter which defines the length of the ngrams. If not set explicitly, defaults to `3`. [UInt8](../data-types/int-uint.md).
388398

389399
**Returned value**
390400

391401
- The resulting array of tokens from input string. [Array](../data-types/array.md).
392402

393403
**Example**
394404

405+
Using the default settings:
406+
395407
```sql
396408
SELECT tokens('test1,;\\ test2,;\\ test3,;\\ test4') AS tokens;
397409
```
@@ -403,3 +415,17 @@ Result:
403415
│ ['test1','test2','test3','test4'] │
404416
└───────────────────────────────────┘
405417
```
418+
419+
Using the ngram tokenizer with ngram length 3:
420+
421+
```sql
422+
SELECT tokens('abc def', 'ngram', 3) AS tokens;
423+
```
424+
425+
Result:
426+
427+
```text
428+
┌─tokens──────────────────────────┐
429+
│ ['abc','bc ','c d',' de','def'] │
430+
└─────────────────────────────────┘
431+
```

0 commit comments

Comments
 (0)