Skip to content

Commit f38166f

Browse files
hughdbrownclaudewesm
authored
refactor(query): extract shared helpers to eliminate duckdb/sqlite duplication (#137)
``` # Summary - Extract 7 duplicate data-mapping methods from duckdb.go and sqlite.go into a new shared.go file with a tablePrefix parameter ("" for SQLite, "sqlite_db." for DuckDB's sqlite_scan fallback) - Reduce duplicate code from ~191 lines across 7 regions to ~80 lines across 2 regions (residual duplication is in ListMessages row scanning and GetGmailIDsByFilter query construction, which differ structurally between engines) - Remove unused imports (bytes, compress/zlib, io, mime) from both engine files # Extracted helpers ┌───────────────────────────┬────────────────────────────────────────────────────────┐ │ Function │ Purpose │ ├───────────────────────────┼────────────────────────────────────────────────────────┤ │ fetchLabelsForMessageList │ Batch label fetching for message list views │ ├───────────────────────────┼────────────────────────────────────────────────────────┤ │ fetchMessageLabelsDetail │ Label fetching for single message detail │ ├───────────────────────────┼────────────────────────────────────────────────────────┤ │ fetchParticipantsShared │ Participant fetching for message detail │ ├───────────────────────────┼────────────────────────────────────────────────────────┤ │ fetchAttachmentsShared │ Attachment fetching for message detail │ ├───────────────────────────┼────────────────────────────────────────────────────────┤ │ extractBodyFromRawShared │ MIME body extraction from compressed raw data │ ├───────────────────────────┼────────────────────────────────────────────────────────┤ │ getMessageByQueryShared │ Full message detail retrieval (orchestrates the above) │ ├───────────────────────────┼────────────────────────────────────────────────────────┤ │ collectGmailIDs │ Row scanning for Gmail ID queries │ └───────────────────────────┴────────────────────────────────────────────────────────┘ # Test plan - go build ./... compiles cleanly - go vet ./... passes - make test — all tests pass - samesame --match 20 internal/query/*.go confirms duplicate regions eliminated (7 → 2) 🤖 Generated with Claude Code ``` --------- Co-authored-by: Claude Opus 4.6 <noreply@anthropic.com> Co-authored-by: Wes McKinney <wesmckinn+git@gmail.com>
1 parent 7d0732e commit f38166f

File tree

4 files changed

+289
-517
lines changed

4 files changed

+289
-517
lines changed

.githooks/post-commit

Lines changed: 0 additions & 8 deletions
This file was deleted.

internal/query/duckdb.go

Lines changed: 3 additions & 252 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,10 @@
11
package query
22

33
import (
4-
"bytes"
5-
"compress/zlib"
64
"context"
75
"database/sql"
86
"encoding/json"
97
"fmt"
10-
"io"
118
"log"
129
"path/filepath"
1310
"runtime"
@@ -17,7 +14,6 @@ import (
1714
"time"
1815

1916
_ "github.com/marcboeker/go-duckdb"
20-
"github.com/wesm/msgvault/internal/mime"
2117
"github.com/wesm/msgvault/internal/search"
2218
)
2319

@@ -1161,41 +1157,7 @@ func (e *DuckDBEngine) fetchLabelsForMessages(ctx context.Context, messages []Me
11611157
return nil
11621158
}
11631159

1164-
// Build message ID list
1165-
ids := make([]interface{}, len(messages))
1166-
placeholders := make([]string, len(messages))
1167-
idToIndex := make(map[int64]int)
1168-
for i, msg := range messages {
1169-
ids[i] = msg.ID
1170-
placeholders[i] = "?"
1171-
idToIndex[msg.ID] = i
1172-
}
1173-
1174-
query := fmt.Sprintf(`
1175-
SELECT ml.message_id, l.name
1176-
FROM sqlite_db.message_labels ml
1177-
JOIN sqlite_db.labels l ON l.id = ml.label_id
1178-
WHERE ml.message_id IN (%s)
1179-
`, strings.Join(placeholders, ","))
1180-
1181-
rows, err := e.db.QueryContext(ctx, query, ids...)
1182-
if err != nil {
1183-
return err
1184-
}
1185-
defer rows.Close()
1186-
1187-
for rows.Next() {
1188-
var msgID int64
1189-
var labelName string
1190-
if err := rows.Scan(&msgID, &labelName); err != nil {
1191-
return err
1192-
}
1193-
if idx, ok := idToIndex[msgID]; ok {
1194-
messages[idx].Labels = append(messages[idx].Labels, labelName)
1195-
}
1196-
}
1197-
1198-
return rows.Err()
1160+
return fetchLabelsForMessageList(ctx, e.db, "sqlite_db.", messages)
11991161
}
12001162

12011163
// GetMessage retrieves a full message from SQLite.
@@ -1240,205 +1202,7 @@ func (e *DuckDBEngine) GetAttachment(ctx context.Context, id int64) (*Attachment
12401202
}
12411203

12421204
func (e *DuckDBEngine) getMessageByQuery(ctx context.Context, whereClause string, args ...interface{}) (*MessageDetail, error) {
1243-
query := fmt.Sprintf(`
1244-
SELECT
1245-
m.id,
1246-
m.source_message_id,
1247-
m.conversation_id,
1248-
COALESCE(c.source_conversation_id, ''),
1249-
COALESCE(m.subject, ''),
1250-
COALESCE(m.snippet, ''),
1251-
m.sent_at,
1252-
m.received_at,
1253-
COALESCE(m.size_estimate, 0),
1254-
m.has_attachments
1255-
FROM sqlite_db.messages m
1256-
LEFT JOIN sqlite_db.conversations c ON c.id = m.conversation_id
1257-
WHERE %s
1258-
`, whereClause)
1259-
1260-
var msg MessageDetail
1261-
var sentAt, receivedAt sql.NullTime
1262-
err := e.db.QueryRowContext(ctx, query, args...).Scan(
1263-
&msg.ID,
1264-
&msg.SourceMessageID,
1265-
&msg.ConversationID,
1266-
&msg.SourceConversationID,
1267-
&msg.Subject,
1268-
&msg.Snippet,
1269-
&sentAt,
1270-
&receivedAt,
1271-
&msg.SizeEstimate,
1272-
&msg.HasAttachments,
1273-
)
1274-
if err == sql.ErrNoRows {
1275-
return nil, nil
1276-
}
1277-
if err != nil {
1278-
return nil, fmt.Errorf("get message: %w", err)
1279-
}
1280-
1281-
if sentAt.Valid {
1282-
msg.SentAt = sentAt.Time
1283-
}
1284-
if receivedAt.Valid {
1285-
t := receivedAt.Time
1286-
msg.ReceivedAt = &t
1287-
}
1288-
1289-
// Fetch body from separate table (PK lookup)
1290-
var bodyText, bodyHTML sql.NullString
1291-
err = e.db.QueryRowContext(ctx, `
1292-
SELECT body_text, body_html FROM sqlite_db.message_bodies WHERE message_id = ?
1293-
`, msg.ID).Scan(&bodyText, &bodyHTML)
1294-
if err == nil {
1295-
if bodyText.Valid {
1296-
msg.BodyText = bodyText.String
1297-
}
1298-
if bodyHTML.Valid {
1299-
msg.BodyHTML = bodyHTML.String
1300-
}
1301-
} else if err != sql.ErrNoRows {
1302-
return nil, fmt.Errorf("get message body: %w", err)
1303-
}
1304-
1305-
// If body is empty, try to extract from raw MIME
1306-
if msg.BodyText == "" && msg.BodyHTML == "" {
1307-
if body, err := e.extractBodyFromRaw(ctx, msg.ID); err == nil && body != "" {
1308-
msg.BodyText = body
1309-
}
1310-
}
1311-
1312-
// Fetch participants
1313-
if err := e.fetchParticipants(ctx, &msg); err != nil {
1314-
return nil, fmt.Errorf("fetch participants: %w", err)
1315-
}
1316-
1317-
// Fetch labels
1318-
if err := e.fetchMessageLabels(ctx, &msg); err != nil {
1319-
return nil, fmt.Errorf("fetch labels: %w", err)
1320-
}
1321-
1322-
// Fetch attachments
1323-
if err := e.fetchAttachments(ctx, &msg); err != nil {
1324-
return nil, fmt.Errorf("fetch attachments: %w", err)
1325-
}
1326-
1327-
return &msg, nil
1328-
}
1329-
1330-
// extractBodyFromRaw extracts text body from compressed MIME data.
1331-
func (e *DuckDBEngine) extractBodyFromRaw(ctx context.Context, messageID int64) (string, error) {
1332-
var compressed []byte
1333-
var compression sql.NullString
1334-
1335-
err := e.db.QueryRowContext(ctx, `
1336-
SELECT raw_data, compression FROM sqlite_db.message_raw WHERE message_id = ?
1337-
`, messageID).Scan(&compressed, &compression)
1338-
if err != nil {
1339-
return "", err
1340-
}
1341-
1342-
var rawData []byte
1343-
if compression.Valid && compression.String == "zlib" {
1344-
r, err := zlib.NewReader(bytes.NewReader(compressed))
1345-
if err != nil {
1346-
return "", err
1347-
}
1348-
defer r.Close()
1349-
rawData, err = io.ReadAll(r)
1350-
if err != nil {
1351-
return "", err
1352-
}
1353-
} else {
1354-
rawData = compressed
1355-
}
1356-
1357-
// Parse MIME and extract text
1358-
parsed, err := mime.Parse(rawData)
1359-
if err != nil {
1360-
return "", err
1361-
}
1362-
1363-
return parsed.GetBodyText(), nil
1364-
}
1365-
1366-
func (e *DuckDBEngine) fetchParticipants(ctx context.Context, msg *MessageDetail) error {
1367-
rows, err := e.db.QueryContext(ctx, `
1368-
SELECT mr.recipient_type, p.email_address, COALESCE(mr.display_name, p.display_name, '')
1369-
FROM sqlite_db.message_recipients mr
1370-
JOIN sqlite_db.participants p ON p.id = mr.participant_id
1371-
WHERE mr.message_id = ?
1372-
`, msg.ID)
1373-
if err != nil {
1374-
return err
1375-
}
1376-
defer rows.Close()
1377-
1378-
for rows.Next() {
1379-
var recipType, email, name string
1380-
if err := rows.Scan(&recipType, &email, &name); err != nil {
1381-
return err
1382-
}
1383-
addr := Address{Email: email, Name: name}
1384-
switch recipType {
1385-
case "from":
1386-
msg.From = append(msg.From, addr)
1387-
case "to":
1388-
msg.To = append(msg.To, addr)
1389-
case "cc":
1390-
msg.Cc = append(msg.Cc, addr)
1391-
case "bcc":
1392-
msg.Bcc = append(msg.Bcc, addr)
1393-
}
1394-
}
1395-
1396-
return rows.Err()
1397-
}
1398-
1399-
func (e *DuckDBEngine) fetchMessageLabels(ctx context.Context, msg *MessageDetail) error {
1400-
rows, err := e.db.QueryContext(ctx, `
1401-
SELECT l.name
1402-
FROM sqlite_db.message_labels ml
1403-
JOIN sqlite_db.labels l ON l.id = ml.label_id
1404-
WHERE ml.message_id = ?
1405-
`, msg.ID)
1406-
if err != nil {
1407-
return err
1408-
}
1409-
defer rows.Close()
1410-
1411-
for rows.Next() {
1412-
var name string
1413-
if err := rows.Scan(&name); err != nil {
1414-
return err
1415-
}
1416-
msg.Labels = append(msg.Labels, name)
1417-
}
1418-
1419-
return rows.Err()
1420-
}
1421-
1422-
func (e *DuckDBEngine) fetchAttachments(ctx context.Context, msg *MessageDetail) error {
1423-
rows, err := e.db.QueryContext(ctx, `
1424-
SELECT id, COALESCE(filename, ''), COALESCE(mime_type, ''), COALESCE(size, 0), COALESCE(content_hash, '')
1425-
FROM sqlite_db.attachments
1426-
WHERE message_id = ?
1427-
`, msg.ID)
1428-
if err != nil {
1429-
return err
1430-
}
1431-
defer rows.Close()
1432-
1433-
for rows.Next() {
1434-
var att AttachmentInfo
1435-
if err := rows.Scan(&att.ID, &att.Filename, &att.MimeType, &att.Size, &att.ContentHash); err != nil {
1436-
return err
1437-
}
1438-
msg.Attachments = append(msg.Attachments, att)
1439-
}
1440-
1441-
return rows.Err()
1205+
return getMessageByQueryShared(ctx, e.db, "sqlite_db.", whereClause, args...)
14421206
}
14431207

14441208
// Search performs a Gmail-style search query.
@@ -1769,20 +1533,7 @@ func (e *DuckDBEngine) GetGmailIDsByFilter(ctx context.Context, filter MessageFi
17691533
}
17701534
defer rows.Close()
17711535

1772-
var ids []string
1773-
for rows.Next() {
1774-
var id string
1775-
if err := rows.Scan(&id); err != nil {
1776-
return nil, fmt.Errorf("scan gmail id: %w", err)
1777-
}
1778-
ids = append(ids, id)
1779-
}
1780-
1781-
if err := rows.Err(); err != nil {
1782-
return nil, fmt.Errorf("iterate gmail ids: %w", err)
1783-
}
1784-
1785-
return ids, nil
1536+
return collectGmailIDs(rows)
17861537
}
17871538

17881539
// HasParquetData checks if Parquet files exist and are usable.

0 commit comments

Comments
 (0)