Skip to content

Commit c4e8013

Browse files
authored
Merge pull request #33324 from def-/pr-nightly-skip2
ci: Fix nightly and add a lint to prevent problems
2 parents d047331 + 1cccc56 commit c4e8013

File tree

14 files changed

+168
-158
lines changed

14 files changed

+168
-158
lines changed

ci/mkpipeline.py

Lines changed: 60 additions & 101 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ def main() -> int:
8282
so it is executed.""",
8383
)
8484

85+
parser.add_argument("--dry-run", action="store_true")
8586
parser.add_argument("--coverage", action="store_true")
8687
parser.add_argument(
8788
"--sanitizer",
@@ -193,6 +194,7 @@ def fetch_hashes() -> None:
193194
args.bazel_remote_cache,
194195
bazel_lto,
195196
)
197+
truncate_skip_length(pipeline)
196198
handle_sanitizer_skip(pipeline, args.sanitizer)
197199
increase_agents_timeouts(pipeline, args.sanitizer, args.coverage)
198200
prioritize_pipeline(pipeline, args.priority)
@@ -221,9 +223,10 @@ def fetch_hashes() -> None:
221223

222224
print("--- Uploading new pipeline:")
223225
print(yaml.dump(pipeline))
224-
spawn.runv(
225-
["buildkite-agent", "pipeline", "upload"], stdin=yaml.dump(pipeline).encode()
226-
)
226+
cmd = ["buildkite-agent", "pipeline", "upload"]
227+
if args.dry_run:
228+
cmd.append("--dry-run")
229+
spawn.runv(cmd, stdin=yaml.dump(pipeline).encode())
227230

228231
return 0
229232

@@ -264,56 +267,48 @@ def prioritize_pipeline(pipeline: Any, priority: int) -> None:
264267
if build_author == "Dependabot":
265268
priority -= 40
266269

267-
def visit(config: Any) -> None:
270+
for step in steps(pipeline):
271+
if "trigger" in step or "wait" in step or "group" in step:
272+
# Trigger and Wait steps do not allow priorities.
273+
continue
268274
# Increase priority for larger Hetzner-based tests so that they get
269275
# preferential treatment on the agents which also accept smaller jobs.
270276
agent_priority = 0
271-
if "agents" in config:
272-
agent = config["agents"].get("queue", None)
277+
if "agents" in step:
278+
agent = step["agents"].get("queue", None)
273279
if agent == "hetzner-aarch64-8cpu-16gb":
274280
agent_priority = 1
275281
if agent == "hetzner-aarch64-16cpu-32gb":
276282
agent_priority = 2
277-
config["priority"] = config.get("priority", 0) + priority + agent_priority
283+
step["priority"] = step.get("priority", 0) + priority + agent_priority
278284

279-
for config in pipeline["steps"]:
280-
if "trigger" in config or "wait" in config:
281-
# Trigger and Wait steps do not allow priorities.
282-
continue
283-
if "group" in config:
284-
for inner_config in config.get("steps", []):
285-
visit(inner_config)
286-
continue
287-
visit(config)
285+
286+
def truncate_skip_length(pipeline: Any) -> None:
287+
for step in steps(pipeline):
288+
if len(str(step.get("skip", ""))) > 70:
289+
step["skip"] = step["skip"][:70]
288290

289291

290292
def handle_sanitizer_skip(pipeline: Any, sanitizer: Sanitizer) -> None:
291293
if sanitizer != Sanitizer.none:
292294
pipeline.setdefault("env", {})["CI_SANITIZER"] = sanitizer.value
293295

294-
def visit(step: dict[str, Any]) -> None:
296+
for step in steps(pipeline):
295297
if step.get("sanitizer") == "skip":
296298
step["skip"] = True
297299

298300
else:
299301

300-
def visit(step: dict[str, Any]) -> None:
302+
for step in steps(pipeline):
301303
if step.get("sanitizer") == "only":
302304
step["skip"] = True
303305

304-
for step in pipeline["steps"]:
305-
visit(step)
306-
if "group" in step:
307-
for inner_step in step.get("steps", []):
308-
visit(inner_step)
309-
310306

311307
def increase_agents_timeouts(
312308
pipeline: Any, sanitizer: Sanitizer, coverage: bool
313309
) -> None:
314310
if sanitizer != Sanitizer.none or os.getenv("CI_SYSTEM_PARAMETERS", "") == "random":
315-
316-
def visit(step: dict[str, Any]) -> None:
311+
for step in steps(pipeline):
317312
# Most sanitizer runs, as well as random permutations of system
318313
# parameters, are slower and need more memory. The default system
319314
# parameters in CI are chosen to be efficient for execution, while
@@ -359,13 +354,6 @@ def visit(step: dict[str, Any]) -> None:
359354
agent = "hetzner-x86-64-dedi-48cpu-192gb"
360355
step["agents"] = {"queue": agent}
361356

362-
for step in pipeline["steps"]:
363-
visit(step)
364-
# Groups can't be nested, so handle them explicitly here instead of recursing
365-
if "group" in step:
366-
for inner_step in step.get("steps", []):
367-
visit(inner_step)
368-
369357
if coverage:
370358
pipeline["env"]["CI_COVERAGE_ENABLED"] = 1
371359

@@ -487,92 +475,81 @@ def switch_jobs_to_aws(pipeline: Any, priority: int) -> None:
487475

488476
print(f"Queues stuck in Hetzner, switching to AWS or another arch: {stuck}")
489477

490-
def visit(config: Any) -> None:
491-
if "agents" not in config:
492-
return
478+
for step in steps(pipeline):
479+
# Trigger and Wait steps don't have agents
480+
if "trigger" in step or "wait" in step or "group" in step:
481+
continue
493482

494-
agent = config["agents"].get("queue", None)
483+
if "agents" not in step:
484+
continue
485+
486+
agent = step["agents"].get("queue", None)
495487
if not agent in stuck:
496-
return
488+
continue
497489

498490
if agent == "hetzner-aarch64-2cpu-4gb":
499491
if "hetzner-x86-64-2cpu-4gb" not in stuck:
500-
config["agents"]["queue"] = "hetzner-x86-64-2cpu-4gb"
501-
if config.get("depends_on") == "build-aarch64":
502-
config["depends_on"] = "build-x86_64"
492+
step["agents"]["queue"] = "hetzner-x86-64-2cpu-4gb"
493+
if step.get("depends_on") == "build-aarch64":
494+
step["depends_on"] = "build-x86_64"
503495
else:
504-
config["agents"]["queue"] = "linux-aarch64"
496+
step["agents"]["queue"] = "linux-aarch64"
505497
elif agent == "hetzner-aarch64-4cpu-8gb":
506498
if "hetzner-x86-64-4cpu-8gb" not in stuck:
507-
config["agents"]["queue"] = "hetzner-x86-64-4cpu-8gb"
508-
if config.get("depends_on") == "build-aarch64":
509-
config["depends_on"] = "build-x86_64"
499+
step["agents"]["queue"] = "hetzner-x86-64-4cpu-8gb"
500+
if step.get("depends_on") == "build-aarch64":
501+
step["depends_on"] = "build-x86_64"
510502
else:
511-
config["agents"]["queue"] = "linux-aarch64"
503+
step["agents"]["queue"] = "linux-aarch64"
512504
elif agent == "hetzner-aarch64-8cpu-16gb":
513505
if "hetzner-x86-64-8cpu-16gb" not in stuck:
514-
config["agents"]["queue"] = "hetzner-x86-64-8cpu-16gb"
515-
if config.get("depends_on") == "build-aarch64":
516-
config["depends_on"] = "build-x86_64"
506+
step["agents"]["queue"] = "hetzner-x86-64-8cpu-16gb"
507+
if step.get("depends_on") == "build-aarch64":
508+
step["depends_on"] = "build-x86_64"
517509
else:
518-
config["agents"]["queue"] = "linux-aarch64-medium"
510+
step["agents"]["queue"] = "linux-aarch64-medium"
519511

520512
elif agent == "hetzner-aarch64-16cpu-32gb":
521513
if "hetzner-x86-64-16cpu-32gb" not in stuck:
522-
config["agents"]["queue"] = "hetzner-x86-64-16cpu-32gb"
523-
if config.get("depends_on") == "build-aarch64":
524-
config["depends_on"] = "build-x86_64"
514+
step["agents"]["queue"] = "hetzner-x86-64-16cpu-32gb"
515+
if step.get("depends_on") == "build-aarch64":
516+
step["depends_on"] = "build-x86_64"
525517
else:
526-
config["agents"]["queue"] = "linux-aarch64-medium"
518+
step["agents"]["queue"] = "linux-aarch64-medium"
527519

528520
elif agent in ("hetzner-x86-64-4cpu-8gb", "hetzner-x86-64-2cpu-4gb"):
529-
config["agents"]["queue"] = "linux-x86_64"
521+
step["agents"]["queue"] = "linux-x86_64"
530522
elif agent in ("hetzner-x86-64-8cpu-16gb", "hetzner-x86-64-16cpu-32gb"):
531-
config["agents"]["queue"] = "linux-x86_64-medium"
523+
step["agents"]["queue"] = "linux-x86_64-medium"
532524
elif agent == "hetzner-x86-64-dedi-2cpu-8gb":
533-
config["agents"]["queue"] = "linux-x86_64"
525+
step["agents"]["queue"] = "linux-x86_64"
534526
elif agent == "hetzner-x86-64-dedi-4cpu-16gb":
535-
config["agents"]["queue"] = "linux-x86_64-medium"
527+
step["agents"]["queue"] = "linux-x86_64-medium"
536528
elif agent in (
537529
"hetzner-x86-64-dedi-8cpu-32gb",
538530
"hetzner-x86-64-dedi-16cpu-64gb",
539531
):
540-
config["agents"]["queue"] = "linux-x86_64-large"
532+
step["agents"]["queue"] = "linux-x86_64-large"
541533
elif agent in (
542534
"hetzner-x86-64-dedi-32cpu-128gb",
543535
"hetzner-x86-64-dedi-48cpu-192gb",
544536
):
545-
config["agents"]["queue"] = "builder-linux-x86_64"
546-
547-
for config in pipeline["steps"]:
548-
if "trigger" in config or "wait" in config:
549-
# Trigger and Wait steps don't have agents
550-
continue
551-
if "group" in config:
552-
for inner_config in config.get("steps", []):
553-
visit(inner_config)
554-
continue
555-
visit(config)
537+
step["agents"]["queue"] = "builder-linux-x86_64"
556538

557539

558540
def permit_rerunning_successful_steps(pipeline: Any) -> None:
559-
def visit(step: Any) -> None:
541+
for step in steps(pipeline):
542+
if "trigger" in step or "wait" in step or "group" in step or "block" in step:
543+
continue
560544
step.setdefault("retry", {}).setdefault("manual", {}).setdefault(
561545
"permit_on_passed", True
562546
)
563547

564-
for config in pipeline["steps"]:
565-
if "trigger" in config or "wait" in config or "block" in config:
566-
continue
567-
if "group" in config:
568-
for inner_config in config.get("steps", []):
569-
visit(inner_config)
570-
continue
571-
visit(config)
572-
573548

574549
def set_retry_on_agent_lost(pipeline: Any) -> None:
575-
def visit(step: Any) -> None:
550+
for step in steps(pipeline):
551+
if "trigger" in step or "wait" in step or "group" in step or "block" in step:
552+
continue
576553
step.setdefault("retry", {}).setdefault("automatic", []).extend(
577554
[
578555
{
@@ -591,15 +568,6 @@ def visit(step: Any) -> None:
591568
]
592569
)
593570

594-
for config in pipeline["steps"]:
595-
if "trigger" in config or "wait" in config or "block" in config:
596-
continue
597-
if "group" in config:
598-
for inner_config in config.get("steps", []):
599-
visit(inner_config)
600-
continue
601-
visit(config)
602-
603571

604572
def set_default_agents_queue(pipeline: Any) -> None:
605573
for step in steps(pipeline):
@@ -614,19 +582,10 @@ def set_default_agents_queue(pipeline: Any) -> None:
614582

615583

616584
def set_parallelism_name(pipeline: Any) -> None:
617-
def visit(step: Any) -> None:
585+
for step in steps(pipeline):
618586
if step.get("parallelism", 1) > 1:
619587
step["label"] += " %N"
620588

621-
for config in pipeline["steps"]:
622-
if "trigger" in config or "wait" in config or "block" in config:
623-
continue
624-
if "group" in config:
625-
for inner_config in config.get("steps", []):
626-
visit(inner_config)
627-
continue
628-
visit(config)
629-
630589

631590
def check_depends_on(pipeline: Any, pipeline_name: str) -> None:
632591
if pipeline_name not in ("test", "nightly", "release-qualification"):
@@ -639,7 +598,7 @@ def check_depends_on(pipeline: Any, pipeline_name: str) -> None:
639598
# has completed, without waiting for block or wait steps unless those
640599
# are also explicit dependencies.
641600
if step.get("id") in ("analyze", "deploy", "coverage-pr-analyze"):
642-
return
601+
continue
643602

644603
if (
645604
"depends_on" not in step

ci/nightly/pipeline.template.yml

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -425,8 +425,7 @@ steps:
425425
depends_on: build-aarch64
426426
timeout_in_minutes: 150
427427
parallelism: 8
428-
# disabled by default
429-
skip: true
428+
skip: "disabled by default"
430429
agents:
431430
queue: hetzner-aarch64-8cpu-16gb
432431
plugins:
@@ -1073,7 +1072,7 @@ steps:
10731072
# Uses .td-file based parallelism instead
10741073
args: [-m=long, test/cloudtest/test_upgrade.py, --no-test-parallelism]
10751074
sanitizer: skip
1076-
skip: "TODO(def-): Reenable in one version when labels are fixed in old version"
1075+
skip: "TODO(def-) Reenable in one version when labels are fixed in old version"
10771076

10781077
- group: "K8s node recovery cloudtest"
10791078
key: k8s-node-recovery
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
#!/usr/bin/env bash
2+
3+
# Copyright Materialize, Inc. and contributors. All rights reserved.
4+
#
5+
# Use of this software is governed by the Business Source License
6+
# included in the LICENSE file at the root of this repository.
7+
#
8+
# As of the Change Date specified in that file, in accordance with
9+
# the Business Source License, use of this software will be governed
10+
# by the Apache License, Version 2.0.
11+
#
12+
# check-pipeline.sh: Sanity check for pipelines
13+
14+
set -euo pipefail
15+
16+
cd "$(dirname "$0")/../../../.."
17+
18+
. misc/shlib/shlib.bash
19+
20+
: "${CI:=0}"
21+
22+
if ! is_truthy "$CI"; then
23+
# Requires buildkite agent-access-token, which won't be available locally
24+
exit
25+
fi
26+
27+
unset CI_TEST_IDS
28+
unset CI_TEST_SELECTION
29+
unset CI_SANITIZER
30+
unset CI_COVERAGE_ENABLED
31+
unset CI_WAITING_FOR_BUILD
32+
33+
pids=()
34+
for pipeline in $(find ci -name "pipeline.template.yml" -not -path "ci/test/pipeline.template.yml" -exec dirname {} \; | cut -d/ -f2); do
35+
bin/pyactivate -m ci.mkpipeline "$pipeline" --dry-run &
36+
pids+=($!)
37+
done
38+
39+
for pid in "${pids[@]}"; do
40+
try wait "$pid"
41+
done
42+
43+
try_status_report

misc/python/materialize/checks/all_checks/sink.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -709,6 +709,9 @@ def validate(self) -> Testdrive:
709709
710710
# We check the contents of the sink topics by re-ingesting them.
711711
712+
# Still needs to sleep some before the topic exists
713+
$ sleep-is-probably-flaky-i-have-justified-my-need-with-a-comment duration="5s"
714+
712715
> CREATE SOURCE sink_view_comments1_src
713716
FROM KAFKA CONNECTION kafka_conn (TOPIC 'sink-sink-comments1')
714717
> CREATE TABLE sink_view_comments1 FROM SOURCE sink_view_comments1_src (REFERENCE "sink-sink-comments1")

src/persist-client/src/internal/state.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4372,6 +4372,7 @@ pub(crate) mod tests {
43724372
/// This golden will have to be updated each time we change State, but
43734373
/// that's a feature, not a bug.
43744374
#[mz_ore::test]
4375+
#[cfg_attr(miri, ignore)] // too slow
43754376
fn state_inspect_serde_json() {
43764377
const STATE_SERDE_JSON: &str = include_str!("state_serde.json");
43774378
let mut runner = proptest::test_runner::TestRunner::deterministic();

test/pubsub-disruption/mzcompose.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
136136
input=SCHEMA
137137
+ dedent(
138138
"""
139+
$ set-sql-timeout duration=120s
140+
139141
> UPDATE t1 SET f2 = 3;
140142
$ kafka-ingest format=avro key-format=avro topic=pubsub-disruption schema=${schema} key-schema=${keyschema} start-iteration=1 repeat=1000000
141143
{"f1": ${kafka-ingest.iteration}} {"f2": 3}
@@ -167,6 +169,7 @@ def workflow_default(c: Composition, parser: WorkflowArgumentParser) -> None:
167169
input=SCHEMA
168170
+ dedent(
169171
"""
172+
$ set-sql-timeout duration=120s
170173
> UPDATE t1 SET f2 = 4;
171174
$ kafka-ingest format=avro key-format=avro topic=pubsub-disruption schema=${schema} key-schema=${keyschema} start-iteration=1 repeat=1000000
172175
{"f1": ${kafka-ingest.iteration}} {"f2": 4}

test/sqllogictest/materialized_views.slt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1568,7 +1568,7 @@ c_schedule_5 manual NULL
15681568
c_schedule_hydration_time_estimate on-refresh 00:16:35
15691569

15701570
statement ok
1571-
SELECT mz_unsafe.mz_sleep(4);
1571+
SELECT mz_unsafe.mz_sleep(8);
15721572

15731573
query TTTTBT rowsort
15741574
SELECT DISTINCT

0 commit comments

Comments
 (0)