Skip to content

Commit faa2346

Browse files
Update the implmentation of statement extraction.
The original extraction procedure is unable to parse line comments when the '//' or '--' delimiters are not positioned at the start of the line, and it does not accommodate multiline comments.
1 parent 3cff67d commit faa2346

File tree

2 files changed

+244
-45
lines changed

2 files changed

+244
-45
lines changed

schema_converter.go

Lines changed: 91 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ import (
1919
"context"
2020
"flag"
2121
"fmt"
22+
"io"
2223
"log"
2324
"os"
2425
"strings"
@@ -85,41 +86,109 @@ func checkGCPCredentials() error {
8586
return nil
8687
}
8788

88-
// extractQueries parses a CQL file, splitting it into individual queries delimited by semicolons.
89+
// parseCqlFile reads a CQL file, extracts statements delimited by semicolons,
90+
// and ignores single-line (-- ... or // ...) and multi-line (/* ... */) comments.
91+
// Returns a slice of CQL statements or an error if file operations fail.
8992
//
90-
// TODO: Use the Antlr parser to extract the statements rather than using this function.
91-
func extractQueries(filePath string) ([]string, error) {
93+
// TODO: Considering refactoring this function to a struct that has a getNextStmt function.
94+
// The streaming approach can reduce memory usage and it is crucial for handling large files.
95+
func parseCqlFile(filePath string) ([]string, error) {
9296
file, err := os.Open(filePath)
9397
if err != nil {
9498
return nil, err
9599
}
96100
defer file.Close()
97101

98-
var queries []string
99-
var currentQuery strings.Builder
100-
scanner := bufio.NewScanner(file)
102+
var statements []string
103+
var currentStatement strings.Builder
104+
inMultilineComment := false
105+
inLineComment := false
106+
reader := bufio.NewReader(file)
101107

102-
for scanner.Scan() {
103-
line := strings.TrimSpace(scanner.Text())
108+
for {
109+
r, _, err := reader.ReadRune()
110+
if err == io.EOF {
111+
break
112+
}
113+
if err != nil {
114+
return nil, fmt.Errorf("error reading file: %w", err)
115+
}
116+
117+
currentChar := string(r)
118+
119+
if inMultilineComment {
120+
if currentChar == "*" {
121+
nextRune, _, peekErr := reader.ReadRune()
122+
if peekErr == nil && string(nextRune) == "/" {
123+
inMultilineComment = false
124+
} else if peekErr == nil {
125+
err = reader.UnreadRune()
126+
if err != nil {
127+
return nil, fmt.Errorf("error unreading rune: %w", err)
128+
}
129+
}
130+
}
131+
continue
132+
}
104133

105-
// Skip empty lines and comments
106-
if line == "" || strings.HasPrefix(line, "--") || strings.HasPrefix(line, "//") {
134+
if inLineComment {
135+
if currentChar == "\n" {
136+
inLineComment = false
137+
}
107138
continue
108139
}
109140

110-
currentQuery.WriteString(line + " ")
141+
if currentChar == "/" {
142+
nextRune, _, peekErr := reader.ReadRune()
143+
if peekErr == nil && string(nextRune) == "*" {
144+
inMultilineComment = true
145+
} else if peekErr == nil && string(nextRune) == "/" {
146+
inLineComment = true
147+
} else if peekErr == nil {
148+
currentStatement.WriteString(currentChar)
149+
err = reader.UnreadRune()
150+
if err != nil {
151+
return nil, fmt.Errorf("error unreading rune: %w", err)
152+
}
153+
} else {
154+
currentStatement.WriteString(currentChar)
155+
}
156+
continue
157+
}
111158

112-
// If the line ends with a semicolon, treat it as a full query
113-
if strings.HasSuffix(line, ";") {
114-
queries = append(queries, currentQuery.String())
115-
currentQuery.Reset()
159+
if currentChar == "-" {
160+
nextRune, _, peekErr := reader.ReadRune()
161+
if peekErr == nil && string(nextRune) == "-" {
162+
inLineComment = true
163+
} else if peekErr == nil {
164+
currentStatement.WriteString(currentChar)
165+
err = reader.UnreadRune()
166+
if err != nil {
167+
return nil, fmt.Errorf("error unreading rune: %w", err)
168+
}
169+
} else {
170+
currentStatement.WriteString(currentChar)
171+
}
172+
continue
173+
}
174+
175+
currentStatement.WriteString(currentChar)
176+
177+
if currentChar == ";" {
178+
statement := strings.TrimSpace(currentStatement.String())
179+
if statement != "" {
180+
statements = append(statements, statement)
181+
}
182+
currentStatement.Reset()
116183
}
117184
}
118185

119-
if err := scanner.Err(); err != nil {
120-
return nil, err
186+
// Handle any remaining statement if the file doesn't end with a semicolon
187+
remainingStatement := strings.TrimSpace(currentStatement.String())
188+
if remainingStatement != "" {
189+
statements = append(statements, remainingStatement)
121190
}
122-
return queries, nil
191+
return statements, nil
123192
}
124193

125194
func main() {
@@ -145,13 +214,13 @@ func main() {
145214
// Translates the Cassandra DDL stmts in the CQL file to corresponding Spanner Stmts.
146215
fmt.Printf("Starting Cassandra to Spanner conversion for '%s'\n\n\n", flags.cqlFile)
147216
var spannerCreateTableStmts []string
148-
queries, err := extractQueries(flags.cqlFile)
217+
stmts, err := parseCqlFile(flags.cqlFile)
149218
if err != nil {
150219
log.Fatalf("Failed to read the file: %v\n", err)
151220
}
152-
for _, query := range queries {
153-
fmt.Printf("Converting statement: '%s'\n", query)
154-
spannerCreateTableStmt, err := translator.ToSpannerCreateTableStmt(query, flags.databaseID)
221+
for _, stmt := range stmts {
222+
fmt.Printf("Converting statement: '%s'\n", stmt)
223+
spannerCreateTableStmt, err := translator.ToSpannerCreateTableStmt(stmt, flags.databaseID)
155224
if err != nil {
156225
log.Fatalf("%v\n", err)
157226
}

schema_converter_test.go

Lines changed: 153 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ package main
1818

1919
import (
2020
"os"
21-
"strings"
2221
"testing"
22+
23+
"github.com/stretchr/testify/assert"
2324
)
2425

2526
func TestCheckGCPCredentials(t *testing.T) {
@@ -40,30 +41,159 @@ func TestCheckGCPCredentials(t *testing.T) {
4041
})
4142
}
4243

43-
func TestExtractQueries(t *testing.T) {
44-
content := "CREATE TABLE test (id INT PRIMARY KEY);\n-- This is a comment\nCREATE TABLE another_test (id INT PRIMARY KEY);\n"
45-
filepath := "test.cql"
46-
47-
// Write the content to a temporary file
48-
if err := os.WriteFile(filepath, []byte(content), 0644); err != nil {
49-
t.Fatalf("Failed to write test file: %v", err)
44+
func TestStmtExtractor(t *testing.T) {
45+
testCases := []struct {
46+
name string
47+
content string
48+
expectedStmts []string
49+
expectError bool
50+
expectedErrorMsg string
51+
}{
52+
{
53+
name: "Basic",
54+
content: "CREATE TABLE test (id INT PRIMARY KEY);\n" +
55+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
56+
expectedStmts: []string{
57+
"CREATE TABLE test (id INT PRIMARY KEY);",
58+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
59+
},
60+
expectError: false,
61+
expectedErrorMsg: "",
62+
},
63+
{
64+
name: "No semicolon in the end of the last stmt",
65+
content: "CREATE TABLE test (id INT PRIMARY KEY);\n" +
66+
"CREATE TABLE another_test (id INT PRIMARY KEY)",
67+
expectedStmts: []string{
68+
"CREATE TABLE test (id INT PRIMARY KEY);",
69+
"CREATE TABLE another_test (id INT PRIMARY KEY)",
70+
},
71+
expectError: false,
72+
expectedErrorMsg: "",
73+
},
74+
{
75+
name: "Line comment at the beginning",
76+
content: "CREATE TABLE test (id INT PRIMARY KEY);\n" +
77+
"// CREATE TABLE test (id INT PRIMARY KEY);\n" +
78+
"-- CREATE TABLE test (id INT PRIMARY KEY);\n" +
79+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
80+
expectedStmts: []string{
81+
"CREATE TABLE test (id INT PRIMARY KEY);",
82+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
83+
},
84+
expectError: false,
85+
expectedErrorMsg: "",
86+
},
87+
{
88+
name: "Line comment at the end",
89+
content: "CREATE TABLE test (id INT PRIMARY KEY); // test 123\n" +
90+
"// CREATE TABLE test (id INT PRIMARY KEY);\n" +
91+
"-- CREATE TABLE test (id INT PRIMARY KEY);\n" +
92+
"CREATE TABLE another_test (id INT PRIMARY KEY); -- comment",
93+
expectedStmts: []string{
94+
"CREATE TABLE test (id INT PRIMARY KEY);",
95+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
96+
},
97+
expectError: false,
98+
expectedErrorMsg: "",
99+
},
100+
{
101+
name: "Block comment",
102+
content: "CREATE TABLE test (id INT PRIMARY KEY);\n" +
103+
"/* \n" +
104+
"CREATE TABLE test (id INT PRIMARY KEY);\n" +
105+
"CREATE TABLE test (id INT PRIMARY KEY);*/" +
106+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
107+
expectedStmts: []string{
108+
"CREATE TABLE test (id INT PRIMARY KEY);",
109+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
110+
},
111+
expectError: false,
112+
expectedErrorMsg: "",
113+
},
114+
{
115+
name: "Unterminated Block comment",
116+
content: "CREATE TABLE test (id INT PRIMARY KEY);\n" +
117+
"/* \n" +
118+
"CREATE TABLE test (id INT PRIMARY KEY);\n" +
119+
"CREATE TABLE test (id INT PRIMARY KEY);\n" +
120+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
121+
expectedStmts: []string{
122+
"CREATE TABLE test (id INT PRIMARY KEY);",
123+
},
124+
expectError: false,
125+
expectedErrorMsg: "",
126+
},
127+
{
128+
name: "Inline block comment",
129+
content: "CREATE TABLE test (id INT PRIMARY KEY);\n" +
130+
"/*CREATE TABLE test (id INT PRIMARY KEY);*/ CREATE TABLE test2 (id2 INT PRIMARY KEY);\n" +
131+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
132+
expectedStmts: []string{
133+
"CREATE TABLE test (id INT PRIMARY KEY);",
134+
"CREATE TABLE test2 (id2 INT PRIMARY KEY);",
135+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
136+
},
137+
expectError: false,
138+
expectedErrorMsg: "",
139+
},
140+
{
141+
name: "Two block comments in the same line",
142+
content: "CREATE TABLE test (id INT PRIMARY KEY);\n" +
143+
"/*CREATE TABLE test (id INT PRIMARY KEY);*/ CREATE TABLE test2 (id2 INT PRIMARY KEY); /* test */\n" +
144+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
145+
expectedStmts: []string{
146+
"CREATE TABLE test (id INT PRIMARY KEY);",
147+
"CREATE TABLE test2 (id2 INT PRIMARY KEY);",
148+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
149+
},
150+
expectError: false,
151+
expectedErrorMsg: "",
152+
},
153+
{
154+
name: "Double-slash in block comment",
155+
content: "CREATE TABLE test (id INT PRIMARY KEY);\n" +
156+
"/* test // */CREATE TABLE test2 (id INT PRIMARY KEY);\n" +
157+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
158+
expectedStmts: []string{
159+
"CREATE TABLE test (id INT PRIMARY KEY);",
160+
"CREATE TABLE test2 (id INT PRIMARY KEY);",
161+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
162+
},
163+
expectError: false,
164+
expectedErrorMsg: "",
165+
},
166+
{
167+
name: "Double-slash after block comment",
168+
content: "CREATE TABLE test (id INT PRIMARY KEY);\n" +
169+
"/* test */ //CREATE TABLE test2 (id INT PRIMARY KEY);\n" +
170+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
171+
expectedStmts: []string{
172+
"CREATE TABLE test (id INT PRIMARY KEY);",
173+
"CREATE TABLE another_test (id INT PRIMARY KEY);",
174+
},
175+
expectError: false,
176+
expectedErrorMsg: "",
177+
},
50178
}
51-
defer os.Remove(filepath)
52179

53-
expectedQueries := []string{
54-
"CREATE TABLE test (id INT PRIMARY KEY); ",
55-
"CREATE TABLE another_test (id INT PRIMARY KEY); ",
56-
}
180+
// TODO: Use os.CreateTemp to create the test file.
181+
filepath := "test.cql"
182+
for _, tc := range testCases {
183+
t.Run(tc.name, func(t *testing.T) {
184+
// Write the content to a temporary file
185+
if err := os.WriteFile(filepath, []byte(tc.content), 0644); err != nil {
186+
t.Fatalf("Failed to write test file: %v", err)
187+
}
188+
defer os.Remove(filepath)
57189

58-
t.Run("Extract Queries Successfully", func(t *testing.T) {
59-
queries, err := extractQueries(filepath)
60-
if err != nil {
61-
t.Fatalf("Expected no error, got %v", err)
62-
}
63-
for i, expected := range expectedQueries {
64-
if strings.TrimSpace(queries[i]) != strings.TrimSpace(expected) {
65-
t.Errorf("Expected %v, got %v", expected, queries[i])
190+
stmts, err := parseCqlFile(filepath)
191+
if tc.expectError {
192+
assert.Equal(t, tc.expectedErrorMsg, err.Error())
193+
return
66194
}
67-
}
68-
})
195+
assert.NoError(t, err)
196+
assert.Equal(t, tc.expectedStmts, stmts)
197+
})
198+
}
69199
}

0 commit comments

Comments
 (0)