Skip to content

Commit 3870d0c

Browse files
author
Jesse
authored
Fix: FetchResults truncates rows returned by the server (#18)
Apache Hive thrift always sends HasMoreResults==false This change updates the Next() method to continuing fetching pages of results until an empty page is received. Also updates the changelog and version number Signed-off-by: Jesse Whitehouse <[email protected]>
1 parent f620360 commit 3870d0c

File tree

4 files changed

+55
-7
lines changed

4 files changed

+55
-7
lines changed

CHANGELOG.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,11 @@
22

33
## 0.1.x (Unreleased)
44

5+
## 0.1.4 (2022-07-30)
6+
7+
- Fix: Could not fetch rowsets greater than the value of `maxRows` (#18)
8+
- Updated default user agent
9+
- Updated README and CONTRIBUTING
510

611
## 0.1.3 (2022-06-16)
712

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ The DSN format is:
4242
databricks://:[your token]@[Workspace hostname][Endpoint HTTP Path]
4343
```
4444

45-
You can set HTTP Timeout value by appending a `timeout` query parameter (in milliseconds) and you can set max rows to retrieve by setting the `maxRows` query parameter:
45+
You can set HTTP Timeout value by appending a `timeout` query parameter (in milliseconds) and you can set max rows to retrieve per network request by setting the `maxRows` query parameter:
4646

4747
```
4848
databricks://:[your token]@[Workspace hostname][Endpoint HTTP Path]?timeout=1000&maxRows=1000

connection_test.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
package dbsql
22

33
import (
4+
"context"
45
"database/sql"
56
"fmt"
67
"os"
@@ -61,6 +62,38 @@ func TestSelect(t *testing.T) {
6162
}
6263
}
6364

65+
func TestExhaustCursor(t *testing.T) {
66+
// GIVEN: Session MaxRows < expected result length
67+
// WHEN: Caller fetches all results
68+
// THEN: The expected result length is pulled by successive calls to
69+
70+
// MaxRows is defined in databricks.go as 10_000
71+
72+
db := open(t)
73+
defer db.Close()
74+
75+
// Pull more results than the known MaxRows value
76+
ctx := context.Background()
77+
rows, _ := db.QueryContext(ctx, "SELECT id FROM RANGE(100000)")
78+
79+
rowIds := make([]int, 0)
80+
81+
// Fetch all results from the `rows` iterator
82+
var rowId int
83+
for rows.Next() {
84+
rows.Scan(&rowId)
85+
rowIds = append(rowIds, rowId)
86+
}
87+
88+
expectedLength := 100_000
89+
actualLength := len(rowIds)
90+
91+
if expectedLength != actualLength {
92+
t.Errorf("Result length mismatch. Expected %d, Actual: %d", expectedLength, actualLength)
93+
94+
}
95+
}
96+
6497
func open(t *testing.T) *sql.DB {
6598
dsn := os.Getenv("DATABRICKS_DSN")
6699
if dsn == "" {

hive/result_set.go

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,19 +22,26 @@ type ResultSet struct {
2222

2323
// Next ...
2424
func (rs *ResultSet) Next(dest []driver.Value) error {
25-
if rs.idx >= rs.length {
26-
if !rs.more {
27-
return io.EOF
28-
}
2925

26+
var thisPageExhausted bool = rs.idx >= length(rs.result)
27+
28+
if thisPageExhausted {
29+
30+
// Fetch another page
3031
resp, err := rs.fetchfn()
32+
3133
if err != nil {
3234
return err
3335
}
36+
37+
// Replace previous page of results with new page of results
3438
rs.result = resp.Results
35-
rs.more = resp.GetHasMoreRows()
36-
rs.idx = 0
39+
40+
// Replace previous page length with new page length
3741
rs.length = length(rs.result)
42+
43+
// Reset index to the start of the new page
44+
rs.idx = 0
3845
}
3946

4047
if rs.length == 0 {
@@ -43,11 +50,14 @@ func (rs *ResultSet) Next(dest []driver.Value) error {
4350

4451
for i := range dest {
4552
val, err := value(rs.result.Columns[i], rs.schema.Columns[i], rs.idx)
53+
4654
if err != nil {
4755
return err
4856
}
57+
4958
dest[i] = val
5059
}
60+
5161
rs.idx++
5262
return nil
5363
}

0 commit comments

Comments
 (0)