Skip to content

Commit 4394106

Browse files
authored
chore: add xtask to download regression tests (#482)
EDIT: Removed the tests - this PR is just the `xtask` now :) can't tag you as a reviewer for some reason, but this is ready @sbdchd adds the postgress regression test suite to stress-test the parser: - a `xtask` that downloads and preprocesses the sql files from the postgres repo - a test in the `squawk_parser` crate that runs all of them through the parser In the preprocessing step, we remove everything that we do not want to support (yet), mainly plpgsql commands such as `\gset`. Right now, all test fail. I peeked through some of the tests and it's mainly due to `:name` vars. I think it makes sense to lex them as `IDENT` tokens? Happy to add support for this. Overall I think this test suite can help stabilise the parser. My hope is that it becomes stable enough to replace `libpg_query` in the postgres language server. Also: not sure where to put the tests since currently they all are part of the crate itself. For this kind of tests, I usually integrate them as [integration tests](https://doc.rust-lang.org/rust-by-example/testing/integration_testing.html). Let me know what you prefer! :) also happy to not add them at all, or maybe not do snapshot testing, or maybe download the files and run the tests on-demand to not pollute the codebase! maybe a separate xtask that can be executed on-demand in CI / locally makes sense too - like https://areweturboyet.com
1 parent cb38845 commit 4394106

File tree

5 files changed

+145
-0
lines changed

5 files changed

+145
-0
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,3 +7,5 @@
77
# IntelliJ IDE users
88
.idea
99
dist
10+
11+
.DS_Store

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/xtask/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ clap.workspace = true
1313
enum-iterator.workspace = true
1414
reqwest = { version = "0.12.9", features = ["blocking", "json"] }
1515
serde.workspace = true
16+
serde_json.workspace = true
1617
convert_case.workspace = true
1718
camino.workspace = true
1819

Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
use anyhow::{bail, Result};
2+
use camino::Utf8PathBuf;
3+
use std::fs::{create_dir_all, remove_dir_all, File};
4+
use std::io::{BufRead, Cursor, Write};
5+
use std::process::Command;
6+
7+
pub(crate) fn download_regression_tests() -> Result<()> {
8+
let target_dir = Utf8PathBuf::from("crates/squawk_parser/tests/data/regression_suite");
9+
10+
if target_dir.exists() {
11+
println!("Cleaning target directory: {:?}", target_dir);
12+
remove_dir_all(&target_dir)?;
13+
}
14+
15+
create_dir_all(&target_dir)?;
16+
17+
let urls = fetch_download_urls()?;
18+
let total_files = urls.len();
19+
20+
for (index, url) in urls.iter().enumerate() {
21+
let filename = url.split('/').last().unwrap();
22+
let filepath = target_dir.join(filename);
23+
24+
println!(
25+
"[{}/{}] Downloading {}... ",
26+
index + 1,
27+
total_files,
28+
filename
29+
);
30+
31+
let output = Command::new("curl").args(["-s", url]).output()?;
32+
33+
if !output.status.success() {
34+
let error_msg = String::from_utf8_lossy(&output.stderr);
35+
bail!(anyhow::anyhow!(
36+
"Failed to download '{}': {}",
37+
url,
38+
error_msg
39+
));
40+
}
41+
42+
let mut processed_content = Vec::new();
43+
44+
let cursor = Cursor::new(&output.stdout);
45+
46+
if let Err(e) = preprocess_sql(cursor, &mut processed_content) {
47+
eprintln!("Error: Failed to process file: {}", e);
48+
continue;
49+
}
50+
51+
let mut dest = File::create(&filepath)?;
52+
dest.write_all(&processed_content)?
53+
}
54+
55+
Ok(())
56+
}
57+
58+
fn fetch_download_urls() -> Result<Vec<String>> {
59+
// Fetch list of SQL file URLs
60+
println!("Fetching SQL file URLs...");
61+
let output = Command::new("gh")
62+
.args([
63+
"api",
64+
"-H",
65+
"Accept: application/vnd.github+json",
66+
"/repos/postgres/postgres/contents/src/test/regress/sql",
67+
])
68+
.output()?;
69+
70+
if !output.status.success() {
71+
bail!(anyhow::anyhow!(
72+
"Failed to fetch SQL files: {}",
73+
String::from_utf8_lossy(&output.stderr)
74+
));
75+
}
76+
77+
let json_str = String::from_utf8(output.stdout)?;
78+
let files: Vec<serde_json::Value> = serde_json::from_str(&json_str)?;
79+
80+
// Extract download URLs for SQL files
81+
let urls: Vec<String> = files
82+
.into_iter()
83+
.filter(|file| {
84+
file["name"]
85+
.as_str()
86+
.map(|name| name.ends_with(".sql"))
87+
.unwrap_or(false)
88+
})
89+
.filter_map(|file| file["download_url"].as_str().map(String::from))
90+
.collect();
91+
92+
if urls.is_empty() {
93+
bail!(anyhow::anyhow!("No SQL files found"));
94+
}
95+
96+
Ok(urls)
97+
}
98+
99+
fn preprocess_sql<R: BufRead, W: Write>(source: R, mut dest: W) -> Result<()> {
100+
let mut skipping_copy_block = false;
101+
102+
for line in source.lines() {
103+
let mut line = line?;
104+
105+
// Detect the start of the COPY block
106+
if line.starts_with("COPY ") && line.to_lowercase().contains("from stdin") {
107+
skipping_copy_block = true;
108+
continue;
109+
}
110+
111+
// Detect the end of the COPY block
112+
if skipping_copy_block && (line.starts_with("\\.") || line.is_empty()) {
113+
skipping_copy_block = false;
114+
continue;
115+
}
116+
117+
// Skip lines if inside a COPY block
118+
if skipping_copy_block {
119+
continue;
120+
}
121+
122+
if line.starts_with("\\") {
123+
// Skip plpgsql commands (for now)
124+
continue;
125+
}
126+
127+
// replace "\gset" with ";"
128+
if line.contains("\\gset") {
129+
line = line.replace("\\gset", ";");
130+
}
131+
132+
// Write the cleaned line
133+
writeln!(dest, "{}", line)?;
134+
}
135+
136+
Ok(())
137+
}

crates/xtask/src/main.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use generate_keywords::generate_keywords;
55
use new_rule::new_lint;
66
use sync_kwlist::sync_kwlist;
77

8+
mod download_regression_tests;
89
mod generate_keywords;
910
mod new_rule;
1011
mod path_util;
@@ -18,6 +19,8 @@ enum TaskName {
1819
SyncKwlist,
1920
#[command(long_about = "Create a new linter rule")]
2021
NewRule(NewRuleArgs),
22+
#[command(long_about = "Download and process regression tests from Postgres")]
23+
DownloadRegressionTests,
2124
}
2225

2326
#[derive(Args, Debug)]
@@ -40,5 +43,6 @@ fn main() -> Result<()> {
4043
TaskName::GenerateKeywords => generate_keywords(),
4144
TaskName::SyncKwlist => sync_kwlist(),
4245
TaskName::NewRule(args) => new_lint(args),
46+
TaskName::DownloadRegressionTests => download_regression_tests::download_regression_tests(),
4347
}
4448
}

0 commit comments

Comments
 (0)