Skip to content

Commit eef0cbd

Browse files
committed
⚡️ Improve Clickhouse performance
Now, Anyquery only queries the column requested by the user to ClickHouse. This speeds up queries because ClickHouse doesn't have to read all columns (while being columnar-based).
1 parent d3f0cbd commit eef0cbd

File tree

1 file changed

+50
-40
lines changed

1 file changed

+50
-40
lines changed

module/clickhouse.go

Lines changed: 50 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ type ClickHouseCursor struct {
9393
currentRow []interface{}
9494
rowsReturned int64
9595
limit int64
96+
query SQLQueryToExecute
9697
}
9798

9899
func (m *ClickHouseModule) Create(c *sqlite3.SQLiteConn, args []string) (sqlite3.VTab, error) {
@@ -310,11 +311,44 @@ func (t *ClickHouseTable) Open() (sqlite3.VTabCursor, error) {
310311
if err != nil {
311312
return nil, fmt.Errorf("error getting a new connection: %v", err)
312313
}
314+
315+
values := make([]interface{}, len(t.schema))
316+
for i := range values {
317+
values[i] = new(interface{})
318+
switch t.schema[i].Type {
319+
case "INTEGER":
320+
if t.schema[i].RemoteType[0] == 'u' {
321+
// For unsigned integers, we use NullUint64
322+
values[i] = new(NullUint64) // ClickHouse does not have unsigned integers, so we use NullInt64
323+
} else {
324+
values[i] = new(sql.NullInt64)
325+
}
326+
case "REAL":
327+
values[i] = new(sql.NullFloat64)
328+
case "TEXT":
329+
if t.schema[i].RemoteType == "ipv4" || t.schema[i].RemoteType == "ipv6" {
330+
// For IPv4 and IPv6, we use a net.IP type
331+
values[i] = new(net.IP)
332+
} else {
333+
values[i] = new(sql.NullString)
334+
}
335+
case "BLOB":
336+
values[i] = new([]byte)
337+
case "DATE":
338+
values[i] = new(timeMySQL)
339+
case "DATETIME":
340+
values[i] = new(timeMySQL)
341+
default:
342+
values[i] = new(interface{})
343+
}
344+
}
345+
313346
return &ClickHouseCursor{
314347
connection: conn,
315348
tableName: t.tableName,
316349
schema: t.schema,
317350
limit: -1,
351+
currentRow: values,
318352
}, nil
319353
}
320354

@@ -341,7 +375,7 @@ func (t *ClickHouseTable) Destroy() error {
341375
// To find the method, we will ask the database to explain the query and return the best method
342376
func (t *ClickHouseTable) BestIndex(cst []sqlite3.InfoConstraint, ob []sqlite3.InfoOrderBy, info sqlite3.IndexInformation) (*sqlite3.IndexResult, error) {
343377
// Create the SQL query
344-
queryBuilder, limitCstIndex, offsetCstIndex, used := constructSQLQuery(cst, ob, t.schema, t.tableName)
378+
queryBuilder, limitCstIndex, offsetCstIndex, used := efficientConstructSQLQuery(cst, ob, t.schema, t.tableName, info.ColUsed)
345379
queryBuilder.SetFlavor(sqlbuilder.ClickHouse)
346380
rawQuery, args := queryBuilder.Build()
347381
rawQuery += sqlQuerySuffix
@@ -391,6 +425,7 @@ func (t *ClickHouseTable) BestIndex(cst []sqlite3.InfoConstraint, ob []sqlite3.I
391425
Args: args,
392426
LimitIndex: limitCstIndex,
393427
OffsetIndex: offsetCstIndex,
428+
ColumnsUsed: info.ColUsed,
394429
}
395430

396431
// Serialize the query as a JSON object
@@ -474,7 +509,6 @@ func (t *ClickHouseCursor) resetCursor() error {
474509
t.limit = -1
475510
t.rowsReturned = 0
476511
t.exhausted = false
477-
t.currentRow = nil
478512

479513
return nil
480514
}
@@ -493,6 +527,9 @@ func (t *ClickHouseCursor) Filter(idxNum int, idxStr string, vals []interface{})
493527
return fmt.Errorf("error unmarshalling the query: %v", err)
494528
}
495529

530+
// Set the query for the cursor
531+
t.query = query
532+
496533
// Get the LIMIT AND OFFSET values
497534
// and remove them from the query so that we can pass these arguments to the query
498535
limit := int64(-1)
@@ -539,49 +576,22 @@ func (t *ClickHouseCursor) Next() error {
539576
}
540577
if hasMoreRows {
541578
var err error
542-
// Init an array of the same size as the number of columns
543-
values := make([]interface{}, len(t.schema))
544-
for i := range values {
545-
values[i] = new(interface{})
546-
switch t.schema[i].Type {
547-
case "INTEGER":
548-
if t.schema[i].RemoteType[0] == 'u' {
549-
// For unsigned integers, we use NullUint64
550-
values[i] = new(NullUint64) // ClickHouse does not have unsigned integers, so we use NullInt64
551-
} else {
552-
values[i] = new(sql.NullInt64)
553-
}
554-
case "REAL":
555-
values[i] = new(sql.NullFloat64)
556-
case "TEXT":
557-
if t.schema[i].RemoteType == "ipv4" || t.schema[i].RemoteType == "ipv6" {
558-
// For IPv4 and IPv6, we use a net.IP type
559-
values[i] = new(net.IP)
560-
} else {
561-
values[i] = new(sql.NullString)
562-
}
563-
case "BLOB":
564-
values[i] = new([]byte)
565-
case "DATE":
566-
values[i] = new(timeMySQL)
567-
case "DATETIME":
568-
values[i] = new(timeMySQL)
569-
default:
570-
values[i] = new(interface{})
579+
580+
dest := make([]interface{}, 0, len(t.schema))
581+
for i := range t.schema {
582+
// ColumnsUsed is a bitmask that indicates which columns are used in the query
583+
// If the last bit is set, it means that the rest of the columns are used
584+
if t.query.ColumnsUsed&(1<<i) == 0 && i < 62 {
585+
continue
571586
}
587+
588+
dest = append(dest, &t.currentRow[i])
572589
}
573-
err = t.rows.Scan(values...)
590+
591+
err = t.rows.Scan(dest...)
574592
if err != nil {
575593
return fmt.Errorf("error scanning the row: %v", err)
576594
}
577-
t.currentRow = make([]interface{}, len(values))
578-
for i, v := range values {
579-
if v == nil {
580-
t.currentRow[i] = nil
581-
continue
582-
}
583-
t.currentRow[i] = v
584-
}
585595

586596
} else {
587597
t.currentRow = nil

0 commit comments

Comments
 (0)