Skip to content

Commit e57e357

Browse files
authored
Merge pull request #1882 from dolthub/daylon/import-regressions-files
Added files for import regression testing
2 parents 593cc85 + 3e25689 commit e57e357

File tree

106 files changed

+187087
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

106 files changed

+187087
-0
lines changed

server/config/parameters_list.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3718,6 +3718,17 @@ var postgresConfigParameters = map[string]sql.SystemVariable{
37183718
ResetVal: int8(0),
37193719
Scope: GetPgsqlScope(PsqlScopeSession),
37203720
},
3721+
"transaction_timeout": &Parameter{
3722+
Name: "transaction_timeout",
3723+
Default: int64(0), // Unit: "ms"
3724+
Category: "Client Connection Defaults / Statement Behavior",
3725+
ShortDesc: "Sets the maximum allowed duration of any transaction within a session (not a prepared transaction).",
3726+
Context: ParameterContextUser,
3727+
Type: types.NewSystemIntType("transaction_timeout", 0, math.MaxInt32, false),
3728+
Source: ParameterSourceDefault,
3729+
ResetVal: int64(0),
3730+
Scope: GetPgsqlScope(PsqlScopeSession),
3731+
},
37213732
"transform_null_equals": &Parameter{
37223733
Name: "transform_null_equals",
37233734
Default: int8(0),

testing/dumps/scraper/main.go

Lines changed: 245 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,245 @@
1+
// Copyright 2025 Dolthub, Inc.
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
package main
16+
17+
import (
18+
"context"
19+
"encoding/json"
20+
"fmt"
21+
"io"
22+
"net/http"
23+
"net/url"
24+
"os"
25+
"path/filepath"
26+
"runtime"
27+
"strconv"
28+
"strings"
29+
"time"
30+
)
31+
32+
const (
33+
query = `extension:sql pg_dump`
34+
downloadCount = 110
35+
)
36+
37+
// RepoName simply contains the name of the repository.
38+
type RepoName struct {
39+
FullName string `json:"full_name"`
40+
}
41+
42+
// Item is a SQL file (hopefully) containing a pg_dump.
43+
type Item struct {
44+
Name string `json:"name"`
45+
Path string `json:"path"`
46+
HtmlURL string `json:"html_url"`
47+
ContentsURL string `json:"url"`
48+
Repository RepoName `json:"repository"`
49+
}
50+
51+
// CodeSearchResult contains the result of a code search.
52+
type CodeSearchResult struct {
53+
TotalCount int `json:"total_count"`
54+
IncompleteResults bool `json:"incomplete_results"`
55+
Items []Item `json:"items"`
56+
Message string `json:"message"` // Only used when there's an error
57+
}
58+
59+
// ContentFile is all of the information about a SQL file, including how to retrieve it.
60+
type ContentFile struct {
61+
Type string `json:"type"`
62+
Name string `json:"name"`
63+
Path string `json:"path"`
64+
SHA string `json:"sha"`
65+
Size int64 `json:"size"`
66+
HTMLURL string `json:"html_url"`
67+
DownloadURL string `json:"download_url"`
68+
}
69+
70+
func main() {
71+
ctx := context.Background()
72+
httpClient := &http.Client{Timeout: 30 * time.Second}
73+
token := os.Getenv("GITHUB_TOKEN")
74+
if len(token) == 0 {
75+
fmt.Println("Must provide a GITHUB_TOKEN as an environment variable")
76+
os.Exit(1)
77+
}
78+
79+
_, currentFileLocation, _, ok := runtime.Caller(0)
80+
if !ok {
81+
fmt.Println("Unable to find the folder where this file is located")
82+
os.Exit(1)
83+
}
84+
dumpsFolder := filepath.Clean(filepath.Join(filepath.Dir(currentFileLocation), "../sql"))
85+
86+
var saved int
87+
page := 1
88+
89+
OuterLoop:
90+
for {
91+
remaining := downloadCount - saved
92+
items, err := SearchCode(ctx, httpClient, token, page, min(50, remaining))
93+
if err != nil {
94+
fmt.Println(err)
95+
os.Exit(1)
96+
}
97+
if len(items) == 0 {
98+
break
99+
}
100+
101+
for _, item := range items {
102+
cf, err := GetContent(ctx, httpClient, token, item.ContentsURL)
103+
if err != nil {
104+
fmt.Printf("warn: %s/%s: %v\n", item.Repository.FullName, item.Path, err)
105+
continue
106+
}
107+
if cf.Type != "file" || cf.DownloadURL == "" {
108+
continue
109+
}
110+
111+
dest := filepath.Join(dumpsFolder, SanitizePath(item.Repository.FullName)+filepath.Ext(cf.Path))
112+
if _, err = os.Stat(dest); err == nil {
113+
continue
114+
}
115+
if err = DownloadFile(ctx, httpClient, item, cf.DownloadURL, dest); err != nil {
116+
fmt.Printf("download error: %s -> %v\n", dest, err)
117+
continue
118+
}
119+
fmt.Printf("saved: %s (%d bytes)\n", dest, cf.Size)
120+
121+
saved++
122+
if saved >= downloadCount {
123+
break OuterLoop
124+
}
125+
time.Sleep(6500 * time.Millisecond) // We sleep to mitigate rate limits
126+
}
127+
page++
128+
}
129+
}
130+
131+
// SearchCode executes the query against the API, returning all items that were found.
132+
func SearchCode(ctx context.Context, hc *http.Client, token string, page int, perPage int) ([]Item, error) {
133+
params := url.Values{}
134+
params.Set("q", query)
135+
params.Set("page", strconv.Itoa(page))
136+
params.Set("per_page", strconv.Itoa(perPage))
137+
138+
req, _ := http.NewRequestWithContext(ctx, "GET", "https://api.github.com/search/code?"+params.Encode(), nil)
139+
SetHeaders(req, token)
140+
resp, err := hc.Do(req)
141+
if err != nil {
142+
return nil, err
143+
}
144+
defer resp.Body.Close()
145+
if HandleRate(resp) {
146+
return SearchCode(ctx, hc, token, page, perPage)
147+
}
148+
var sr CodeSearchResult
149+
if err = json.NewDecoder(resp.Body).Decode(&sr); err != nil {
150+
return nil, err
151+
}
152+
if resp.StatusCode != 200 {
153+
if sr.Message != "" {
154+
return nil, fmt.Errorf("search error: %s (HTTP %d)", sr.Message, resp.StatusCode)
155+
}
156+
return nil, fmt.Errorf("search error: HTTP %d", resp.StatusCode)
157+
}
158+
return sr.Items, nil
159+
}
160+
161+
// GetContent gets the ContentFile from the given URL.
162+
func GetContent(ctx context.Context, hc *http.Client, token string, contentsURL string) (*ContentFile, error) {
163+
req, _ := http.NewRequestWithContext(ctx, "GET", contentsURL, nil)
164+
SetHeaders(req, token)
165+
resp, err := hc.Do(req)
166+
if err != nil {
167+
return nil, err
168+
}
169+
defer resp.Body.Close()
170+
if HandleRate(resp) {
171+
return GetContent(ctx, hc, token, contentsURL)
172+
}
173+
if resp.StatusCode != 200 {
174+
b, _ := io.ReadAll(resp.Body)
175+
return nil, fmt.Errorf("contents error: HTTP %d: %s", resp.StatusCode, strings.TrimSpace(string(b)))
176+
}
177+
var cf ContentFile
178+
if err = json.NewDecoder(resp.Body).Decode(&cf); err != nil {
179+
return nil, err
180+
}
181+
return &cf, nil
182+
}
183+
184+
// DownloadFile downloads the given SQL file to the destination.
185+
func DownloadFile(ctx context.Context, hc *http.Client, item Item, rawURL string, dest string) error {
186+
req, _ := http.NewRequestWithContext(ctx, "GET", rawURL, nil)
187+
req.Header.Set("User-Agent", "gh-pg-dump-finder/1.0")
188+
resp, err := hc.Do(req)
189+
if err != nil {
190+
return err
191+
}
192+
defer resp.Body.Close()
193+
if resp.StatusCode != 200 {
194+
return fmt.Errorf("download HTTP %d", resp.StatusCode)
195+
}
196+
out, err := os.Create(dest)
197+
if err != nil {
198+
return err
199+
}
200+
defer out.Close()
201+
_, _ = io.WriteString(out, fmt.Sprintf("-- Downloaded from: %s\n", item.HtmlURL))
202+
_, err = io.Copy(out, resp.Body)
203+
return err
204+
}
205+
206+
// SetHeaders sets the appropriate headers for a request.
207+
func SetHeaders(req *http.Request, token string) {
208+
req.Header.Set("Accept", "application/vnd.github.v3+json")
209+
req.Header.Set("User-Agent", "gh-pg-dump-finder/1.0")
210+
req.Header.Set("Authorization", "Bearer "+token)
211+
}
212+
213+
// HandleRate handles potential rate limits.
214+
func HandleRate(resp *http.Response) bool {
215+
if resp.StatusCode == 403 {
216+
if ra := resp.Header.Get("Retry-After"); ra != "" {
217+
if secs, _ := strconv.Atoi(ra); secs > 0 {
218+
sleepTime := time.Duration(secs) * time.Second
219+
fmt.Printf("rate limited (%s), retrying\n", sleepTime.String())
220+
time.Sleep(sleepTime)
221+
return true
222+
}
223+
}
224+
if reset := resp.Header.Get("X-RateLimit-Reset"); reset != "" {
225+
if ts, _ := strconv.ParseInt(reset, 10, 64); ts > 0 {
226+
wait := time.Until(time.Unix(ts+5, 0))
227+
if wait > 0 && wait < 5*time.Minute {
228+
fmt.Printf("rate limited (%s), retrying\n", wait.String())
229+
time.Sleep(wait)
230+
return true
231+
}
232+
}
233+
}
234+
}
235+
return false
236+
}
237+
238+
// SanitizePath removes potentially invalid file system characters.
239+
func SanitizePath(s string) string {
240+
illegal := `<>:"\|/?*`
241+
for _, r := range illegal {
242+
s = strings.ReplaceAll(s, string(r), "_")
243+
}
244+
return s
245+
}

0 commit comments

Comments
 (0)