Skip to content

Commit 0e0a622

Browse files
committed
fix(go): debug stuck ingestion job
1 parent 8e2e298 commit 0e0a622

File tree

8 files changed

+90
-315
lines changed

8 files changed

+90
-315
lines changed

.github/workflows/go_test.yaml

Lines changed: 1 addition & 278 deletions
Original file line numberDiff line numberDiff line change
@@ -52,111 +52,6 @@ defaults:
5252
shell: bash
5353

5454
jobs:
55-
test:
56-
name: "Test/${{ matrix.platform }}_${{ matrix.arch }}"
57-
runs-on: ${{ matrix.runner }}
58-
strategy:
59-
fail-fast: true
60-
matrix:
61-
include:
62-
- { platform: linux, arch: amd64, runner: ubuntu-latest }
63-
- { platform: macos, arch: arm64, runner: macos-latest }
64-
- { platform: win, arch: amd64, runner: windows-latest }
65-
environment: BigQuery CI
66-
permissions:
67-
contents: read
68-
id-token: write
69-
steps:
70-
- name: free up disk space
71-
if: runner.os != 'Windows'
72-
run: |
73-
# Preinstalled tools use a lot of disk space, free up some space
74-
# https://github.com/actions/runner-images/issues/2840
75-
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
76-
77-
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
78-
with:
79-
fetch-depth: 0
80-
persist-credentials: false
81-
82-
- uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6.1.0
83-
with:
84-
cache-dependency-path: go/go.sum
85-
check-latest: true
86-
go-version-file: go/go.mod
87-
88-
- uses: prefix-dev/setup-pixi@82d477f15f3a381dbcc8adc1206ce643fe110fb7 # v0.9.3
89-
with:
90-
pixi-version: v0.50.2
91-
run-install: false
92-
93-
- name: Google Cloud Login
94-
uses: google-github-actions/auth@7c6bc770dae815cd3e89ee6cdf493a5fab2cc093 # v3.0.0
95-
with:
96-
service_account: ${{ secrets.gcloud_service_account }}
97-
workload_identity_provider: ${{ secrets.gcloud_workload_identity_provider }}
98-
99-
- name: Build
100-
working-directory: go
101-
run: |
102-
go build ./...
103-
104-
- name: Start Test Dependencies
105-
# Can't use Docker on macOS AArch64 runners, and Windows containers
106-
# work but often the container doesn't support Windows
107-
if: runner.os == 'Linux'
108-
working-directory: go
109-
run: |
110-
if [[ -f compose.yaml ]]; then
111-
if ! docker compose up --detach --wait test-service; then
112-
echo "Service failed to start"
113-
echo "Logs:"
114-
docker compose logs test-service
115-
exit 1
116-
fi
117-
fi
118-
119-
- name: Test
120-
if: runner.os == 'Linux'
121-
working-directory: go
122-
run: |
123-
set -a
124-
if [[ -f .env ]]; then
125-
source .env
126-
fi
127-
if [[ -f .env.ci ]]; then
128-
source .env.ci
129-
fi
130-
set +a
131-
132-
if [[ -n "${{ secrets.environment }}" ]]; then
133-
echo "Loading secret environment variables"
134-
eval "${{ secrets.environment }}"
135-
fi
136-
137-
if [[ -f ci/scripts/pre-test.sh ]]; then
138-
echo "Loading pre-test"
139-
./ci/scripts/pre-test.sh
140-
fi
141-
142-
go test -tags assert -v ./...
143-
144-
if [[ -f ci/scripts/post-test.sh ]]; then
145-
./ci/scripts/post-test.sh
146-
fi
147-
148-
- name: go mod tidy
149-
if: runner.os == 'Linux'
150-
working-directory: go
151-
run: |
152-
go mod tidy --diff
153-
154-
- name: Test
155-
if: runner.os != 'Linux'
156-
working-directory: go
157-
run: |
158-
go test -tags assert -v ./...
159-
16055
validate:
16156
name: "Validate/${{ matrix.platform }}_${{ matrix.arch }}"
16257
runs-on: ${{ matrix.runner }}
@@ -252,6 +147,7 @@ jobs:
252147
fi
253148
254149
docker ps
150+
export ADBC_DRIVER_BIGQUERY_LOG_LEVEL=debug
255151
pixi run validate
256152
257153
if [[ -f ci/scripts/post-test.sh ]]; then
@@ -268,176 +164,3 @@ jobs:
268164
name: docs
269165
path: "go/generated/bigquery.md"
270166
retention-days: 2
271-
272-
build:
273-
name: "Build bigquery/${{ matrix.platform }}_${{ matrix.arch }}"
274-
needs: test
275-
runs-on: ${{ matrix.runner }}
276-
strategy:
277-
fail-fast: true
278-
matrix:
279-
include:
280-
- { platform: linux, arch: amd64, runner: ubuntu-latest }
281-
- { platform: linux, arch: arm64, runner: ubuntu-24.04-arm }
282-
- { platform: macos, arch: arm64, runner: macos-latest }
283-
- { platform: windows, arch: amd64, runner: windows-latest }
284-
permissions:
285-
contents: read
286-
packages: read
287-
288-
steps:
289-
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
290-
with:
291-
fetch-depth: 0
292-
persist-credentials: false
293-
294-
- uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6.1.0
295-
with:
296-
cache-dependency-path: go/go.sum
297-
check-latest: true
298-
go-version-file: go/go.mod
299-
300-
- uses: prefix-dev/setup-pixi@82d477f15f3a381dbcc8adc1206ce643fe110fb7 # v0.9.3
301-
with:
302-
pixi-version: v0.50.2
303-
run-install: false
304-
305-
- name: Install dev tools
306-
working-directory: go
307-
run: |
308-
pixi install
309-
310-
- name: Log in to ghcr.io
311-
if: runner.os == 'Linux'
312-
run: |
313-
echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u ${{ github.actor }} --password-stdin
314-
315-
- name: Build Library
316-
working-directory: go
317-
run: |
318-
if [[ -f ci/scripts/pre-build.sh ]]; then
319-
./ci/scripts/pre-build.sh release ${{ matrix.platform }} ${{ matrix.arch }}
320-
fi
321-
set -a
322-
if [[ -f .env.release ]]; then
323-
source .env.release
324-
fi
325-
set +a
326-
pixi run adbc-make check CI=true VERBOSE=true DRIVER=bigquery
327-
328-
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
329-
with:
330-
name: drivers-${{ matrix.platform }}-${{ matrix.arch }}
331-
path: "go/build/libadbc_driver_bigquery.*"
332-
retention-days: 2
333-
334-
package:
335-
name: "Generate Packages"
336-
runs-on: ubuntu-latest
337-
needs: build
338-
permissions:
339-
contents: read
340-
341-
steps:
342-
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
343-
with:
344-
fetch-depth: 0
345-
persist-credentials: false
346-
347-
- uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6.1.0
348-
with:
349-
check-latest: true
350-
go-version: "stable"
351-
352-
- uses: prefix-dev/setup-pixi@82d477f15f3a381dbcc8adc1206ce643fe110fb7 # v0.9.3
353-
with:
354-
pixi-version: v0.50.2
355-
run-install: false
356-
357-
- uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
358-
with:
359-
pattern: "drivers-*"
360-
path: "~/drivers"
361-
362-
- name: Install tools
363-
working-directory: go
364-
run: |
365-
# XXX: can't install go-licenses under go 1.25
366-
# https://github.com/google/go-licenses/issues/312
367-
git clone --depth=1 https://github.com/google/go-licenses
368-
git config --global --add 'url.https://github.com/.insteadOf' ssh://git@github.com/
369-
pushd go-licenses
370-
go get -u
371-
# Silent break: https://github.com/spf13/cobra/pull/2303
372-
# Manually edit the go.mod to get around this for now
373-
sed -i 's|github.com/spf13/pflag v1.0.8|github.com/spf13/pflag v1.0.7|g' go.mod
374-
go mod tidy
375-
go install .
376-
popd
377-
378-
- name: Generate packages
379-
working-directory: go
380-
run: |
381-
pixi install
382-
383-
pixi run adbc-gen-package \
384-
--name bigquery \
385-
--root $(pwd) \
386-
--manifest-template manifest.toml \
387-
${{ (inputs.release && '--release') || '' }}\
388-
-o ~/packages \
389-
~/drivers/drivers-*-*/
390-
391-
ls ~/packages
392-
393-
- uses: actions/upload-artifact@b7c566a772e6b6bfb58ed0dc250532a479d7789f # v6.0.0
394-
with:
395-
name: all-packages
396-
path: ~/packages
397-
retention-days: 7
398-
399-
release:
400-
name: "Release (Dry Run)"
401-
runs-on: ubuntu-latest
402-
needs:
403-
- package
404-
- validate
405-
permissions:
406-
contents: read
407-
408-
steps:
409-
- uses: actions/checkout@8e8c483db84b4bee98b60c0593521ed34d9990e8 # v6.0.1
410-
with:
411-
fetch-depth: 0
412-
persist-credentials: false
413-
414-
- uses: actions/setup-go@4dc6199c7b1a012772edbd06daecab0f50c9053c # v6.1.0
415-
with:
416-
check-latest: true
417-
go-version: "stable"
418-
419-
- uses: prefix-dev/setup-pixi@82d477f15f3a381dbcc8adc1206ce643fe110fb7 # v0.9.3
420-
with:
421-
pixi-version: v0.50.2
422-
run-install: false
423-
424-
- uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
425-
with:
426-
name: "all-packages"
427-
path: "~/packages"
428-
429-
- uses: actions/download-artifact@37930b1c2abaa49bbe596cd826c3c89aef350131 # v7.0.0
430-
with:
431-
name: "docs"
432-
path: "~/packages"
433-
434-
- name: Release (dry-run)
435-
env:
436-
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
437-
working-directory: go
438-
run: |
439-
git tag go/v1000.0.0
440-
tag=go/v1000.0.0
441-
442-
pixi run release --dry-run $(pwd) $tag
443-
echo gh release upload $tag $(find ~/packages -name '*.tar.gz') $(find ~/packages -name 'manifest.yaml') $(find ~/packages -name '*.md')

go/bulk_ingest.go

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ import (
2121
"io"
2222
"log/slog"
2323
"os"
24+
// "runtime/pprof"
2425
"strings"
26+
// "time"
2527

2628
"cloud.google.com/go/bigquery"
2729
"github.com/adbc-drivers/driverbase-go/driverbase"
@@ -30,6 +32,15 @@ import (
3032
"github.com/google/uuid"
3133
)
3234

35+
// func init() {
36+
// go func() {
37+
// for {
38+
// time.Sleep(time.Minute)
39+
// _ = pprof.Lookup("goroutine").WriteTo(os.Stdout, 2)
40+
// }
41+
// }()
42+
// }
43+
3344
type bigqueryBulkIngestSink struct {
3445
f *os.File
3546
path string
@@ -86,6 +97,7 @@ func (bi *bigqueryBulkIngestImpl) Close() {
8697

8798
func (bi *bigqueryBulkIngestImpl) Copy(ctx context.Context, chunk driverbase.BulkIngestPendingCopy) error {
8899
pendingFile := chunk.(*bigqueryBulkIngestSink)
100+
bi.logger.DebugContext(ctx, "prepare COPY", "file", pendingFile.path)
89101

90102
source := bigquery.NewReaderSource(pendingFile.f)
91103
source.ParquetOptions = &bigquery.ParquetOptions{
@@ -115,7 +127,9 @@ func (bi *bigqueryBulkIngestImpl) Copy(ctx context.Context, chunk driverbase.Bul
115127
if err != nil {
116128
return errToAdbcErr(adbc.StatusIO, err, "run loader")
117129
}
118-
status, err := safeWaitForJob(ctx, job)
130+
bi.logger.DebugContext(ctx, "created COPY job", "file", pendingFile.path)
131+
status, err := safeWaitForJob(ctx, bi.logger, job)
132+
bi.logger.DebugContext(ctx, "finished COPY job", "file", pendingFile.path, "error", err)
119133
if err != nil {
120134
return err
121135
}
@@ -149,12 +163,15 @@ func (bi *bigqueryBulkIngestImpl) CreateTable(ctx context.Context, schema *arrow
149163
if err != nil {
150164
return err
151165
}
152-
js, err := safeWaitForJob(ctx, job)
166+
js, err := safeWaitForJob(ctx, bi.logger, job)
153167
if err != nil {
168+
bi.logger.Debug("failed to create table", "table", bi.options.TableName, "stmt", stmt, "error", err)
154169
return err
155170
} else if err = js.Err(); err != nil {
171+
bi.logger.Debug("failed to create table", "table", bi.options.TableName, "stmt", stmt, "error", err)
156172
return errToAdbcErr(adbc.StatusInternal, err, "create table")
157173
} else if !js.Done() {
174+
bi.logger.Debug("failed to create table", "table", bi.options.TableName, "stmt", stmt, "error", "did not complete")
158175
return adbc.Error{
159176
Code: adbc.StatusInternal,
160177
Msg: "[bq] CREATE TABLE query did not complete",

go/connection.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -326,7 +326,7 @@ func (c *connectionImpl) exec(ctx context.Context, stmt string, config func(*big
326326
if err != nil {
327327
return nil, err
328328
}
329-
status, err := safeWaitForJob(ctx, job)
329+
status, err := safeWaitForJob(ctx, c.Logger, job)
330330
if err != nil {
331331
return nil, err
332332
} else if err := status.Err(); err != nil {

go/pkg/driver.go

Lines changed: 6 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)