fix(go): resolve intermittent EOF errors fetching large results (#192) (#194)

jadewang-db · Jade Wang · claude · web-flow · commit 6890ee87f5ca · 2026-01-28T11:18:27.000+09:00
## 🥞 Stacked PR Use this [link](https://github.com/adbc-drivers/databricks/pull/194/files) to review incremental changes. - [**stack/githud_192**](#194) [[Files changed](https://github.com/adbc-drivers/databricks/pull/194/files)] --------- Fix intermittent "arrow/ipc: could not read continuation indicator: EOF" errors when fetching large result sets (30M+ rows) from Databricks. Root cause: The code called SchemaBytes() before any data fetch. In databricks-sql-go, when query results are large (no "direct results" in the response), the schema is populated lazily during the first Next() call. Calling SchemaBytes() before Next() returned empty bytes, causing the Arrow IPC reader to fail with EOF. The fix changes the initialization order to: 1. First call loadNextReader() which triggers data fetch 2. Get schema from the loaded IPC reader (which always has schema) 3. Fall back to SchemaBytes() only for empty result sets Fixes: #192 Co-Authored-By: Claude (databricks-claude-opus-4-5) <noreply@anthropic.com> --------- Co-authored-by: Jade Wang <jade.wang+data@databricks.com> Co-authored-by: Claude (databricks-claude-opus-4-5) <noreply@anthropic.com> Co-authored-by: David Li <li.davidm96@gmail.com>
diff --git a/go/database.go b/go/database.go
@@ -27,6 +27,7 @@ import (
 	"database/sql"
 	"errors"
 	"fmt"
+	"net"
 	"strconv"
 	"strings"
 	"time"
@@ -148,6 +149,10 @@ func (d *databaseImpl) resolveConnectionOptions() ([]dbsql.ConnOption, error) {
 	}
 
 	// TLS/SSL handling
+	// Configure a custom transport with proper timeout settings when custom
+	// TLS config is needed. These settings match the defaults from
+	// databricks-sql-go's PooledTransport to ensure reliable connections
+	// for large result set downloads.
 	if d.sslCertPool != nil || d.sslInsecure {
 		tlsConfig := &tls.Config{
 			MinVersion: tls.VersionTLS12,
@@ -162,7 +167,19 @@ func (d *databaseImpl) resolveConnectionOptions() ([]dbsql.ConnOption, error) {
 		}
 
 		transport := &http.Transport{
-			TLSClientConfig: tlsConfig,
+			Proxy: http.ProxyFromEnvironment,
+			DialContext: (&net.Dialer{
+				Timeout:   30 * time.Second,
+				KeepAlive: 30 * time.Second,
+			}).DialContext,
+			TLSClientConfig:       tlsConfig,
+			ForceAttemptHTTP2:     true,
+			MaxIdleConns:          100,
+			IdleConnTimeout:       180 * time.Second,
+			TLSHandshakeTimeout:   10 * time.Second,
+			ExpectContinueTimeout: 1 * time.Second,
+			MaxIdleConnsPerHost:   10,
+			MaxConnsPerHost:       100,
 		}
 		opts = append(opts, dbsql.WithTransport(transport))
 	}
diff --git a/go/driver_test.go b/go/driver_test.go
@@ -33,6 +33,7 @@ import (
 	"github.com/adbc-drivers/driverbase-go/validation"
 	"github.com/apache/arrow-adbc/go/adbc"
 	"github.com/apache/arrow-go/v18/arrow"
+	"github.com/apache/arrow-go/v18/arrow/array"
 	"github.com/apache/arrow-go/v18/arrow/memory"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@@ -867,6 +868,7 @@ func (suite *DatabricksTests) TestDecimalTypes() {
 }
 
 func (suite *DatabricksTests) TestMultiBatch() {
+	// Regression test for issue reported directly to us
 	query := fmt.Sprintf("CREATE OR REPLACE TABLE `%s`.`%s`.`test_multi_batch` (founder STRING, born STRING)", suite.Quirks.catalogName, suite.Quirks.schemaName)
 	suite.Require().NoError(suite.stmt.SetSqlQuery(query))
 	_, err := suite.stmt.ExecuteUpdate(suite.ctx)
@@ -877,15 +879,16 @@ func (suite *DatabricksTests) TestMultiBatch() {
 		born    string
 	}
 
-	for _, r := range []row{
+	rows := []row{
 		{"Ali Ghodsi", "Iran"},
 		{"Ion Stoica", "Romania"},
 		{"Matei Zaharia", "Romania"},
 		{"Patrick Wendell", "USA"},
 		{"Reynold Xin", "China"},
 		{"Andy Konwinski", "USA"},
 		{"Arsalan Tavakoli-Shiraji", "Iran"},
-	} {
+	}
+	for _, r := range rows {
 		query = fmt.Sprintf("INSERT INTO `%s`.`%s`.`test_multi_batch` VALUES ('%s', '%s')", suite.Quirks.catalogName, suite.Quirks.schemaName, r.founder, r.born)
 		suite.Require().NoError(suite.stmt.SetSqlQuery(query))
 		_, err = suite.stmt.ExecuteUpdate(suite.ctx)
@@ -898,14 +901,30 @@ func (suite *DatabricksTests) TestMultiBatch() {
 	suite.Require().NoError(err)
 	defer rdr.Release()
 
-	// Databricks appears to put each into its own batch
-	suite.True(rdr.Next())
-	suite.True(rdr.Next())
-	suite.True(rdr.Next())
-	suite.True(rdr.Next())
-	suite.True(rdr.Next())
-	suite.True(rdr.Next())
-	suite.True(rdr.Next())
-	suite.False(rdr.Next())
+	// This used to only return one row.
+	rowCount := 0
+	seen := map[string]string{}
+	for rdr.Next() {
+		batch := rdr.RecordBatch()
+		rowCount += int(batch.NumRows())
+
+		founder := batch.Column(0).(*array.String)
+		born := batch.Column(1).(*array.String)
+
+		for i := range int(batch.NumRows()) {
+			seen[founder.Value(i)] = born.Value(i)
+		}
+	}
 	suite.NoError(rdr.Err())
+	suite.Equal(len(rows), rowCount)
+
+	suite.EqualValues(map[string]string{
+		"Ali Ghodsi":               "Iran",
+		"Ion Stoica":               "Romania",
+		"Matei Zaharia":            "Romania",
+		"Patrick Wendell":          "USA",
+		"Reynold Xin":              "China",
+		"Andy Konwinski":           "USA",
+		"Arsalan Tavakoli-Shiraji": "Iran",
+	}, seen)
 }
diff --git a/go/ipc_reader_adapter.go b/go/ipc_reader_adapter.go
@@ -69,47 +69,64 @@ func newIPCReaderAdapter(ctx context.Context, rows driver.Rows) (array.RecordRea
 		}
 	}
 
-	schema_bytes, err := ipcIterator.SchemaBytes()
-	if err != nil {
-		return nil, adbc.Error{
-			Code: adbc.StatusInternal,
-			Msg:  fmt.Sprintf("failed to get schema bytes: %v", err),
-		}
+	adapter := &ipcReaderAdapter{
+		rows:        rows,
+		refCount:    1,
+		ipcIterator: ipcIterator,
 	}
 
-	// Read schema from bytes
-	reader, err := ipc.NewReader(bytes.NewReader(schema_bytes))
-	if err != nil {
+	// Load the first IPC stream to get the schema.
+	// Note: SchemaBytes() may return empty bytes if no direct results were
+	// returned with the query response. The schema is populated lazily
+	// during the first data fetch in databricks-sql-go. By loading the
+	// first reader, we ensure the schema is available.
+	err = adapter.loadNextReader()
+	if err != nil && err != io.EOF {
 		return nil, adbc.Error{
 			Code: adbc.StatusInternal,
-			Msg:  fmt.Sprintf("failed to get schema reader: %v", err),
+			Msg:  fmt.Sprintf("failed to initialize IPC reader: %v", err),
 		}
 	}
-	defer reader.Release()
 
-	schema := reader.Schema()
-	if schema == nil {
-		return nil, adbc.Error{
-			Code: adbc.StatusInternal,
-			Msg:  "schema is nil",
+	// Get schema from the first reader, or fall back to SchemaBytes() if
+	// the result set is empty (no readers available)
+	if adapter.currentReader != nil {
+		adapter.schema = adapter.currentReader.Schema()
+	} else {
+		// Empty result set - try to get schema from SchemaBytes()
+		schema_bytes, err := ipcIterator.SchemaBytes()
+		if err != nil {
+			return nil, adbc.Error{
+				Code: adbc.StatusInternal,
+				Msg:  fmt.Sprintf("failed to get schema bytes: %v", err),
+			}
 		}
-	}
 
-	adapter := &ipcReaderAdapter{
-		rows:        rows,
-		refCount:    1,
-		ipcIterator: ipcIterator,
-		schema:      schema,
+		if len(schema_bytes) == 0 {
+			return nil, adbc.Error{
+				Code: adbc.StatusInternal,
+				Msg:  "schema bytes are empty and no data available",
+			}
+		}
+
+		reader, err := ipc.NewReader(bytes.NewReader(schema_bytes))
+		if err != nil {
+			return nil, adbc.Error{
+				Code: adbc.StatusInternal,
+				Msg:  fmt.Sprintf("failed to read schema: %v", err),
+			}
+		}
+		adapter.schema = reader.Schema()
+		reader.Release()
 	}
 
-	// Initialize the first reader
-	err = adapter.loadNextReader()
-	if err != nil && err != io.EOF {
+	if adapter.schema == nil {
 		return nil, adbc.Error{
 			Code: adbc.StatusInternal,
-			Msg:  fmt.Sprintf("failed to initialize IPC reader: %v", err),
+			Msg:  "schema is nil",
 		}
 	}
+
 	return adapter, nil
 }