Skip to content

Commit 3b76f56

Browse files
authored
Merge pull request #10791 from dolthub/zachmu/adaptive
enable adaptive encoding with an env var
2 parents e07daa7 + f445758 commit 3b76f56

File tree

15 files changed

+477
-13
lines changed

15 files changed

+477
-13
lines changed
Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
name: Test Bats Unix (Adaptive Encoding)
2+
3+
on:
4+
pull_request:
5+
branches: [ main ]
6+
paths:
7+
- 'go/**'
8+
- 'integration-tests/**'
9+
- '.github/workflows/ci-bats-unix-adaptive.yaml'
10+
11+
concurrency:
12+
group: ci-bats-unix-adaptive-${{ github.event.pull_request.number || github.ref }}
13+
cancel-in-progress: true
14+
15+
jobs:
16+
test:
17+
name: Bats tests (adaptive encoding)
18+
defaults:
19+
run:
20+
shell: bash
21+
runs-on: ubuntu-22.04
22+
env:
23+
DOLT_USE_ADAPTIVE_ENCODING: "true"
24+
use_credentials: ${{ secrets.AWS_SECRET_ACCESS_KEY != '' && secrets.AWS_ACCESS_KEY_ID != '' }}
25+
steps:
26+
- name: Free disk space
27+
run: |
28+
NAME="DISK-CLEANUP"
29+
echo "[${NAME}] Starting background cleanup..."
30+
[ -d /usr/share/dotnet ] && sudo rm -rf /usr/share/dotnet &
31+
[ -d /usr/local/lib/android ] && sudo rm -rf /usr/local/lib/android &
32+
[ -d /opt/ghc ] && sudo rm -rf /opt/ghc &
33+
[ -d /usr/local/share/boost ] && sudo rm -rf /usr/local/share/boost &
34+
35+
- name: Conditionally Set ENV VARS for AWS tests
36+
run: |
37+
if [[ $use_credentials == true ]]; then
38+
echo "AWS_SDK_LOAD_CONFIG=1" >> $GITHUB_ENV
39+
echo "AWS_REGION=us-west-2" >> $GITHUB_ENV
40+
echo "DOLT_BATS_AWS_TABLE=dolt-ci-bats-manifests-us-west-2" >> $GITHUB_ENV
41+
echo "DOLT_BATS_AWS_BUCKET=dolt-ci-bats-chunks-us-west-2" >> $GITHUB_ENV
42+
echo "DOLT_BATS_AWS_EXISTING_REPO=aws_remote_bats_tests__dolt__" >> $GITHUB_ENV
43+
fi
44+
- name: Configure filter tags for lambda bats
45+
if: ${{ env.use_credentials == 'true' }}
46+
run: |
47+
echo "BATS_FILTER=--filter-tags no_lambda" >> $GITHUB_ENV
48+
- name: Configure AWS Credentials
49+
if: ${{ env.use_credentials == 'true' }}
50+
uses: aws-actions/configure-aws-credentials@v4
51+
with:
52+
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
53+
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
54+
aws-region: us-west-2
55+
role-to-assume: ${{ secrets.AWS_ROLE_TO_ASSUME }}
56+
role-duration-seconds: 10800 # 3 hours D:
57+
- uses: actions/checkout@v6
58+
- name: Setup Go 1.x
59+
uses: actions/setup-go@v5
60+
with:
61+
go-version-file: go/go.mod
62+
id: go
63+
- name: Setup Python 3.x
64+
uses: actions/setup-python@v5
65+
with:
66+
python-version: "3.10"
67+
- uses: actions/setup-node@v4
68+
with:
69+
node-version: ^16
70+
- name: Create CI Bin
71+
run: |
72+
mkdir -p ./.ci_bin
73+
echo "$(pwd)/.ci_bin" >> $GITHUB_PATH
74+
- name: Install Bats
75+
run: |
76+
npm i bats
77+
echo "$(pwd)/node_modules/.bin" >> $GITHUB_PATH
78+
working-directory: ./.ci_bin
79+
- name: Install Python Deps
80+
run: |
81+
pip install mysql-connector-python
82+
pip install pandas
83+
pip install pyarrow
84+
- name: Install Dolt
85+
working-directory: ./go
86+
run: |
87+
go build -mod=readonly -o ../.ci_bin/dolt ./cmd/dolt/.
88+
89+
go build -mod=readonly -o ../.ci_bin/remotesrv ./utils/remotesrv/.
90+
go build -mod=readonly -o ../.ci_bin/noms ./store/cmd/noms/.
91+
- name: Setup Dolt Config
92+
run: |
93+
dolt config --global --add user.name 'Dolthub Actions'
94+
dolt config --global --add user.email 'actions@dolthub.com'
95+
- name: Install expect
96+
run: sudo apt-get install -y expect
97+
- name: Install pcre2grep
98+
run: sudo apt-get install -y pcre2-utils
99+
- name: Install Maven
100+
run: sudo apt-get install -y maven
101+
- name: Install Hadoop
102+
working-directory: ./.ci_bin
103+
run: |
104+
curl -LO https://downloads.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz
105+
tar xvf hadoop-3.3.6.tar.gz
106+
echo "$(pwd)/hadoop-3.3.6/bin" >> $GITHUB_PATH
107+
- name: Install parquet-cli
108+
id: parquet_cli
109+
working-directory: ./.ci_bin
110+
run: |
111+
curl -OL https://github.com/apache/parquet-mr/archive/refs/tags/apache-parquet-1.12.3.tar.gz
112+
tar zxvf apache-parquet-1.12.3.tar.gz
113+
cd parquet-java-apache-parquet-1.12.3/parquet-cli
114+
mvn clean install -DskipTests
115+
runtime_jar="$(pwd)"/target/parquet-cli-1.12.3-runtime.jar
116+
echo "runtime_jar=$runtime_jar" >> $GITHUB_OUTPUT
117+
- name: Check expect
118+
run: expect -v
119+
- name: Test all Unix
120+
env:
121+
SQL_ENGINE: "local-engine"
122+
PARQUET_RUNTIME_JAR: ${{ steps.parquet_cli.outputs.runtime_jar }}
123+
BATS_TEST_RETRIES: "3"
124+
run: |
125+
bats --print-output-on-failure --tap $BATS_FILTER .
126+
working-directory: ./integration-tests/bats
127+
- name: Test all Unix, SQL_ENGINE=remote-engine
128+
if: ${{ env.use_credentials == 'true' }}
129+
env:
130+
SQL_ENGINE: "remote-engine"
131+
PARQUET_RUNTIME_JAR: ${{ steps.parquet_cli.outputs.runtime_jar }}
132+
BATS_TEST_RETRIES: "3"
133+
run: |
134+
bats --print-output-on-failure --tap $BATS_FILTER .
135+
working-directory: ./integration-tests/bats

.github/workflows/ci-binlog-tests.yaml

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,9 @@ jobs:
2222
fail-fast: false
2323
matrix:
2424
os: [ubuntu-22.04]
25+
# TODO: bin log doesn't yet handle adapative encoding
26+
# adaptive_encoding: ["false", "true"]
27+
adaptive_encoding: ["false"]
2528
steps:
2629
- uses: actions/checkout@v6
2730
- name: Set up Go 1.x
@@ -36,12 +39,20 @@ jobs:
3639
go test -vet=off -timeout 60m ./libraries/doltcore/sqle/binlogreplication/...
3740
env:
3841
MATRIX_OS: ${{ matrix.os }}
42+
DOLT_USE_ADAPTIVE_ENCODING: ${{ matrix.adaptive_encoding }}
3943
binlog-race-test:
4044
name: Binlog tests - race
4145
defaults:
4246
run:
4347
shell: bash
44-
runs-on: ubuntu-22.04
48+
runs-on: ${{ matrix.os }}
49+
strategy:
50+
fail-fast: false
51+
matrix:
52+
os: [ubuntu-22.04]
53+
# TODO: bin log doesn't yet handle adapative encoding
54+
# adaptive_encoding: ["false", "true"]
55+
adaptive_encoding: ["false"]
4556
steps:
4657
- uses: actions/checkout@v6
4758
- name: Set up Go 1.x
@@ -55,4 +66,5 @@ jobs:
5566
# Test binlog packages with race detector
5667
go test -vet=off -timeout 60m -race ./libraries/doltcore/sqle/binlogreplication/...
5768
env:
58-
MATRIX_OS: ubuntu-22.04
69+
MATRIX_OS: ${{ matrix.os }}
70+
DOLT_USE_ADAPTIVE_ENCODING: ${{ matrix.adaptive_encoding }}

.github/workflows/ci-compatibility-tests.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ jobs:
1919
fail-fast: true
2020
matrix:
2121
os: [ ubuntu-22.04 ]
22+
adaptive_encoding: [ "false", "true" ]
2223
steps:
2324
- uses: actions/checkout@v6
2425
- name: Setup Go 1.x
@@ -49,3 +50,5 @@ jobs:
4950
- name: Test all
5051
run: ./runner.sh
5152
working-directory: ./integration-tests/compatibility
53+
env:
54+
DOLT_USE_ADAPTIVE_ENCODING: ${{ matrix.adaptive_encoding }}

.github/workflows/ci-go-race-tests.yaml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ jobs:
1919
matrix:
2020
os: [ ubuntu-22.04 ]
2121
dolt_fmt: [ "__DOLT__" ]
22+
adaptive_encoding: [ "false", "true" ]
2223
steps:
2324
- uses: actions/checkout@v6
2425
- name: Set up Go 1.x
@@ -32,7 +33,10 @@ jobs:
3233
DOLT_SKIP_PREPARED_ENGINETESTS=1 go test -vet=off -v -race -timeout 30m github.com/dolthub/dolt/go/libraries/doltcore/sqle/enginetest
3334
env:
3435
DOLT_DEFAULT_BIN_FORMAT: ${{ matrix.dolt_fmt }}
36+
DOLT_USE_ADAPTIVE_ENCODING: ${{ matrix.adaptive_encoding }}
3537
- name: Test concurrentmap
3638
working-directory: ./go
3739
run: |
3840
go test -vet=off -v -race -timeout 1m github.com/dolthub/dolt/go/libraries/utils/concurrentmap
41+
env:
42+
DOLT_USE_ADAPTIVE_ENCODING: ${{ matrix.adaptive_encoding }}
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
name: Test Go (Adaptive Encoding)
2+
3+
on:
4+
pull_request:
5+
branches: [ main ]
6+
paths:
7+
- 'go/**'
8+
- '.github/workflows/ci-go-tests-adaptive.yaml'
9+
workflow_dispatch:
10+
11+
concurrency:
12+
group: ci-go-tests-adaptive-${{ github.event.pull_request.number || github.ref }}
13+
cancel-in-progress: true
14+
15+
jobs:
16+
test:
17+
name: Go tests (adaptive encoding)
18+
defaults:
19+
run:
20+
shell: bash
21+
runs-on: ubuntu-22.04
22+
env:
23+
DOLT_USE_ADAPTIVE_ENCODING: "true"
24+
steps:
25+
- uses: actions/checkout@v6
26+
- name: Set up Go 1.x
27+
uses: actions/setup-go@v5
28+
with:
29+
go-version-file: go/go.mod
30+
id: go
31+
- name: Test All
32+
working-directory: ./go
33+
run: |
34+
files=$(go list ./...)
35+
SAVEIFS=$IFS
36+
IFS=$'\n'
37+
file_arr=($files)
38+
IFS=$SAVEIFS
39+
40+
for (( i=0; i<${#file_arr[@]}; i++ ))
41+
do
42+
# Skip binlog tests as they run in a separate CI job
43+
if [[ "${file_arr[$i]}" == *binlogreplication* ]]; then
44+
echo "Skipping binlog package: ${file_arr[$i]} (runs in separate CI)"
45+
continue
46+
fi
47+
48+
echo "Testing Package: ${file_arr[$i]}"
49+
if [[ "${file_arr[$i]}" != *enginetest* ]]; then
50+
go test -vet=off -timeout 45m -race "${file_arr[$i]}"
51+
else
52+
echo "skipping enginetests for -race"
53+
fi
54+
succeeded=$(echo "$?")
55+
if [ "$succeeded" -ne 0 ]; then
56+
echo "Testing failed in package ${file_arr[$i]}"
57+
exit 1;
58+
fi
59+
done
60+
noracetest:
61+
name: Go tests - no race (adaptive encoding)
62+
defaults:
63+
run:
64+
shell: bash
65+
runs-on: ubuntu-22.04
66+
env:
67+
DOLT_USE_ADAPTIVE_ENCODING: "true"
68+
steps:
69+
- uses: actions/checkout@v6
70+
- name: Set up Go 1.x
71+
uses: actions/setup-go@v5
72+
with:
73+
go-version-file: go/go.mod
74+
id: go
75+
- name: Test All
76+
working-directory: ./go
77+
run: |
78+
go test -vet=off -timeout 30m ./libraries/doltcore/sqle/integration_test
79+
env:
80+
DOLT_TEST_RUN_NON_RACE_TESTS: "true"

.github/workflows/ci-sql-server-integration-tests.yaml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ jobs:
2222
fail-fast: true
2323
matrix:
2424
os: [ubuntu-22.04]
25+
adaptive_encoding: ["false", "true"]
2526
steps:
2627
- uses: actions/checkout@v6
2728
- name: Setup Go 1.x
@@ -41,3 +42,5 @@ jobs:
4142
export DOLT_BIN_PATH="$(pwd)/../../.ci_bin/dolt"
4243
go test .
4344
working-directory: ./integration-tests/go-sql-server-driver
45+
env:
46+
DOLT_USE_ADAPTIVE_ENCODING: ${{ matrix.adaptive_encoding }}

go/gen/fb/serial/schema.go

Lines changed: 16 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go/libraries/doltcore/schema/encoding/serialization.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,15 @@ func serializeSchemaColumns(b *fb.Builder, sch schema.Schema) fb.UOffsetT {
269269
if onUpdateVal != "" {
270270
serial.ColumnAddOnUpdateValue(b, ou)
271271
}
272+
273+
// Only write the adaptive encoding field if the column uses adaptive encoding. This will force older clients that
274+
// don't know about this field to update in order to read it. Older versions of Dolt ignored the serialized
275+
// |encoding| field and inferred the encoding based on column type, which means they would try to interpret an
276+
// adaptive encoded field as a literal value.
277+
if usesAdaptiveEncoding(col) {
278+
serial.ColumnAddUsesAdaptiveEncoding(b, true)
279+
}
280+
272281
serial.ColumnAddHidden(b, false)
273282
offs[i] = serial.ColumnEnd(b)
274283
}
@@ -281,6 +290,16 @@ func serializeSchemaColumns(b *fb.Builder, sch schema.Schema) fb.UOffsetT {
281290
return b.EndVector(len(offs))
282291
}
283292

293+
func usesAdaptiveEncoding(col schema.Column) bool {
294+
switch col.TypeInfo.Encoding() {
295+
// val.ExtendedAdaptiveEnc is absent from this list because the extended types have their own ser / deser logic
296+
case val.BytesAdaptiveEnc, val.StringAdaptiveEnc:
297+
return true
298+
default:
299+
return false
300+
}
301+
}
302+
284303
func serializeHiddenKeylessColumns(b *fb.Builder) (id, card fb.UOffsetT) {
285304
// cardinality column
286305
no := b.CreateString(keylessCardCol)

go/libraries/doltcore/schema/typeinfo/typeinfo.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ package typeinfo
1616

1717
import (
1818
"fmt"
19+
"os"
1920

2021
"github.com/dolthub/go-mysql-server/sql"
2122
gmstypes "github.com/dolthub/go-mysql-server/sql/types"
@@ -30,6 +31,12 @@ import (
3031
// columns. Extended types will always use adaptive encoding for TEXT and BLOB types regardless of this value.
3132
var UseAdaptiveEncoding = false
3233

34+
func init() {
35+
if envVal, ok := os.LookupEnv("DOLT_USE_ADAPTIVE_ENCODING"); ok && envVal == "true" {
36+
UseAdaptiveEncoding = true
37+
}
38+
}
39+
3340
// TypeInfo is an interface used for encoding type information.
3441
type TypeInfo interface {
3542
// Equals returns whether the given TypeInfo is equivalent to this TypeInfo.

go/serial/schema.fbs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,10 @@ table Column {
6868

6969
// sql on update value
7070
on_update_value:string;
71+
72+
// a marker value only written when the column uses adaptive encoding, which will force older clients to upgrade
73+
// before reading this data.
74+
uses_adaptive_encoding:bool;
7175
}
7276

7377
table Index {

0 commit comments

Comments
 (0)