Skip to content

Commit 70584ec

Browse files
authored
infra: download regression tests speed up (#756)
5s instead of 40s
1 parent cd4f162 commit 70584ec

File tree

3 files changed

+470
-59
lines changed

3 files changed

+470
-59
lines changed

.gitignore

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,4 +9,3 @@
99
dist/
1010

1111
.DS_Store
12-
temp.sql

crates/xtask/src/download_regression_tests.rs

Lines changed: 50 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -121,33 +121,63 @@ fn download_regression_suite() -> Result<Utf8PathBuf> {
121121

122122
create_dir_all(&target_dir)?;
123123

124-
let urls = fetch_download_urls()?;
125-
let total_files = urls.len();
124+
let clone_dir = Utf8PathBuf::try_from(std::env::temp_dir())
125+
.map_err(|_| anyhow::anyhow!("temp dir path is not valid UTF-8"))?
126+
.join("postgres_sparse_clone");
126127

127-
for (index, url) in urls.iter().enumerate() {
128-
let filename = url.split('/').next_back().unwrap();
129-
if filename.contains("psql") {
130-
continue;
131-
}
132-
let filepath = target_dir.join(filename);
128+
if clone_dir.exists() {
129+
remove_dir_all(&clone_dir)?;
130+
}
133131

134-
println!(
135-
"[{}/{}] Downloading {}... ",
136-
index + 1,
137-
total_files,
138-
filename
139-
);
132+
println!("Cloning postgres repository with sparse checkout...");
140133

141-
let output = Command::new("curl").args(["-s", url]).output()?;
134+
let status = Command::new("git")
135+
.args([
136+
"clone",
137+
"--filter=blob:none",
138+
"--depth=1",
139+
"--sparse",
140+
"https://github.com/postgres/postgres.git",
141+
])
142+
.arg(clone_dir.as_str())
143+
.status()?;
142144

143-
if !output.status.success() {
144-
let error_msg = String::from_utf8_lossy(&output.stderr);
145-
bail!("Failed to download '{}': {}", url, error_msg);
146-
}
145+
if !status.success() {
146+
bail!("Failed to clone postgres repository");
147+
}
148+
149+
println!("Setting up sparse checkout for src/test/regress/sql...");
147150

148-
File::create(&filepath)?.write_all(&output.stdout)?;
151+
let status = Command::new("git")
152+
.args(["sparse-checkout", "set", "src/test/regress/sql"])
153+
.current_dir(&clone_dir)
154+
.status()?;
155+
156+
if !status.success() {
157+
bail!("Failed to set sparse checkout");
149158
}
150159

160+
println!("Copying SQL files...");
161+
let source_dir = clone_dir.join("src/test/regress/sql");
162+
163+
let mut file_count = 0;
164+
for entry in std::fs::read_dir(&source_dir)? {
165+
let entry = entry?;
166+
let path = Utf8PathBuf::try_from(entry.path())?;
167+
if path.extension() == Some("sql") {
168+
let filename = path.file_name().unwrap();
169+
if !filename.contains("psql") {
170+
std::fs::copy(&path, target_dir.join(filename))?;
171+
file_count += 1;
172+
}
173+
}
174+
}
175+
176+
println!("Copied {file_count} SQL files");
177+
178+
println!("Cleaning up clone directory...");
179+
remove_dir_all(&clone_dir)?;
180+
151181
Ok(target_dir)
152182
}
153183

@@ -195,44 +225,6 @@ fn transform_regression_suite(input_dir: &Utf8PathBuf) -> Result<()> {
195225
Ok(())
196226
}
197227

198-
fn fetch_download_urls() -> Result<Vec<String>> {
199-
println!("Fetching SQL file URLs...");
200-
let output = Command::new("gh")
201-
.args([
202-
"api",
203-
"-H",
204-
"Accept: application/vnd.github+json",
205-
"/repos/postgres/postgres/contents/src/test/regress/sql",
206-
])
207-
.output()?;
208-
209-
if !output.status.success() {
210-
bail!(
211-
"Failed to fetch SQL files: {}",
212-
String::from_utf8_lossy(&output.stderr)
213-
);
214-
}
215-
216-
let json_str = String::from_utf8(output.stdout)?;
217-
let files: Vec<serde_json::Value> = serde_json::from_str(&json_str)?;
218-
219-
let urls: Vec<String> = files
220-
.into_iter()
221-
.filter(|file| {
222-
file["name"]
223-
.as_str()
224-
.is_some_and(|name| name.ends_with(".sql"))
225-
})
226-
.filter_map(|file| file["download_url"].as_str().map(String::from))
227-
.collect();
228-
229-
if urls.is_empty() {
230-
bail!("No SQL files found");
231-
}
232-
233-
Ok(urls)
234-
}
235-
236228
// The regression suite from postgres has a mix of valid and invalid sql. We
237229
// don't have a good way to determine what is what, so we munge the data to
238230
// comment out any problematic code.

0 commit comments

Comments
 (0)