98
98
// TODO: The "anonymize" parameter is unused for now.
99
99
func generateDDLs (
100
100
zipDir ,
101
- dbName string , anonymize bool ,
101
+ dbName , ddlFile string , anonymize bool ,
102
102
) (allSchemas map [string ]* TableSchema , createStmts map [string ]string , retErr error ) {
103
103
104
+ if ddlFile != "" {
105
+ // DDL file location is present. We will use this instead of the debug zip.
106
+ f , err := os .Open (ddlFile )
107
+ if err != nil {
108
+ return nil , nil , errors .Wrap (err , "failed to open DDL file" )
109
+ }
110
+ defer func () {
111
+ if cerr := f .Close (); cerr != nil && retErr == nil {
112
+ retErr = errors .Wrap (cerr , "failed to close input DDL file" )
113
+ }
114
+ }()
115
+ return generateDDLFromDDLFile (bufio .NewReader (f ), dbName , anonymize )
116
+ }
104
117
f , err := openCreateStatementsTSV (zipDir )
105
118
if err != nil {
106
119
return nil , nil , errors .Wrap (err , "failed to open TSV file" )
@@ -111,14 +124,158 @@ func generateDDLs(
111
124
}
112
125
}()
113
126
114
- return generateDDLFromReader (bufio .NewReader (f ), dbName , anonymize )
127
+ return generateDDLFromCSV (bufio .NewReader (f ), dbName , anonymize )
128
+ }
129
+
130
+ // generateDDLFromDDLFile reads DDL statements from a SQL dump file
131
+ // and returns a map of table names to their schemas and a map of
132
+ // short table names to their CREATE TABLE statements.
133
+ // The file can be generated by running the following:
134
+ //
135
+ // cockroach sql --url='postgresql://<url>/<db name>' --execute="SHOW CREATE ALL TABLES;" > ddl_file.sql
136
+ func generateDDLFromDDLFile (
137
+ reader * bufio.Reader , dbName string , anonymize bool ,
138
+ ) (map [string ]* TableSchema , map [string ]string , error ) {
139
+ // the results are stored in these Maps.
140
+ tableStatements := make (map [string ]string )
141
+ order := make ([]string , 0 )
142
+ seen := make (map [string ]struct {})
143
+
144
+ // Buffer accumulates the SQL statements
145
+ var currentStmt strings.Builder
146
+ // inStatement helps handling multi line statements
147
+ inStatement := false
148
+
149
+ // The file is read line by line
150
+ for {
151
+ line , err := reader .ReadString ('\n' )
152
+ if err != nil {
153
+ if err .Error () == "EOF" {
154
+ break
155
+ }
156
+ return nil , nil , errors .Wrap (err , "failed while reading SQL file" )
157
+ }
158
+
159
+ // Empty lines and comments are skipped
160
+ trimmedLine := strings .TrimSpace (line )
161
+ if ! inStatement {
162
+ // The generated statement has a quote at the start of the statement. This is trimmed.
163
+ trimmedLine = strings .TrimLeft (trimmedLine , "\" " )
164
+ }
165
+ if trimmedLine == "" || strings .HasPrefix (trimmedLine , "--" ) ||
166
+ strings .HasPrefix (trimmedLine , "create_statement" ) {
167
+ continue
168
+ }
169
+
170
+ // A new statement is expected to start with CREATE TABLE.
171
+ if strings .HasPrefix (strings .ToUpper (trimmedLine ), "CREATE TABLE" ) {
172
+ // If we were already in a statement, the previous statement is processed
173
+ if inStatement {
174
+ tableStatements , order , seen = processStatement (currentStmt .String (), tableStatements , order , seen , dbName )
175
+ }
176
+
177
+ // A new statement is started.
178
+ currentStmt .Reset ()
179
+ currentStmt .WriteString (trimmedLine )
180
+ inStatement = true
181
+ } else if strings .HasPrefix (strings .ToUpper (trimmedLine ), "ALTER TABLE" ) {
182
+ // If we were in a CREATE TABLE statement, the statement is processed
183
+ if inStatement {
184
+ tableStatements , order , seen = processStatement (currentStmt .String (), tableStatements , order , seen , dbName )
185
+ }
186
+
187
+ // A new ALTER TABLE statement is started.
188
+ currentStmt .Reset ()
189
+ currentStmt .WriteString (trimmedLine )
190
+ inStatement = true
191
+ } else if inStatement {
192
+ if strings .HasSuffix (trimmedLine , ";\" " ) {
193
+ // The generated statement has a quote at the end of the statement. This needs to be trimmed.
194
+ trimmedLine = strings .TrimRight (trimmedLine , "\" " )
195
+ }
196
+ // The current statement is accumulated.
197
+ currentStmt .WriteString (trimmedLine )
198
+
199
+ // if the statement is complete (ends with semicolon or has closing parenthesis followed by options), it is processed.
200
+ if strings .HasSuffix (trimmedLine , ";" ) ||
201
+ (strings .Contains (trimmedLine , ");" ) && ! strings .HasPrefix (trimmedLine , "--" )) {
202
+ tableStatements , order , seen = processStatement (currentStmt .String (), tableStatements , order , seen , dbName )
203
+ inStatement = false
204
+ }
205
+ }
206
+ }
207
+
208
+ // Any remaining statement is processed.
209
+ if inStatement {
210
+ tableStatements , order , _ = processStatement (currentStmt .String (), tableStatements , order , seen , dbName )
211
+ }
212
+
213
+ return buildSchemas (order , tableStatements ), buildCreateStmts (tableStatements ), nil
214
+ }
215
+
216
+ // processStatement processes a single SQL statement and adds it to the tableStatements map if it's a CREATE TABLE statement
217
+ // It returns the updated tableStatements, order, and seen maps
218
+ func processStatement (
219
+ stmt string ,
220
+ tableStatements map [string ]string ,
221
+ order []string ,
222
+ seen map [string ]struct {},
223
+ dbName string ,
224
+ ) (map [string ]string , []string , map [string ]struct {}) {
225
+ // Only process CREATE TABLE statements
226
+ if ! strings .HasPrefix (strings .ToUpper (strings .TrimSpace (stmt )), "CREATE TABLE" ) {
227
+ return tableStatements , order , seen
228
+ }
229
+
230
+ // Extract the table name using the tablePattern regex
231
+ tableMatch := tablePattern .FindStringSubmatch (stmt )
232
+ if tableMatch == nil {
233
+ return tableStatements , order , seen
234
+ }
235
+
236
+ // Extract and normalize the table name
237
+ tableName := tableMatch [1 ]
238
+ parts := strings .Split (tableName , "." )
239
+ for i := range parts {
240
+ parts [i ] = strings .Trim (parts [i ], `"` ) // Remove quotes from parts
241
+ }
242
+
243
+ // If the table name doesn't have a schema, assume it's "public"
244
+ var schemaName string
245
+ var simpleTableName string
246
+
247
+ if len (parts ) == 1 {
248
+ schemaName = "public"
249
+ simpleTableName = parts [0 ]
250
+ } else if len (parts ) == 2 {
251
+ schemaName = parts [0 ]
252
+ simpleTableName = parts [1 ]
253
+ } else {
254
+ // Skip tables with more complex names
255
+ return tableStatements , order , seen
256
+ }
257
+
258
+ // Create a regex for the schema name
259
+ schemaPattern := regexp .MustCompile (`\b` + regexp .QuoteMeta (schemaName ) + `\.` )
260
+
261
+ // Process the DDL record
262
+ fullTableName , statement := processDDLRecord (dbName , schemaName , simpleTableName , stmt , schemaPattern )
263
+
264
+ // Add to the maps if not seen before
265
+ if _ , ok := seen [fullTableName ]; ! ok && fullTableName != "" {
266
+ tableStatements [fullTableName ] = statement
267
+ order = append (order , fullTableName )
268
+ seen [fullTableName ] = struct {}{}
269
+ }
270
+
271
+ return tableStatements , order , seen
115
272
}
116
273
117
- // generateDDLFromReader takes a reader for a TSV file containing DDL statements,
274
+ // generateDDLFromCSV takes a reader for a TSV file containing DDL statements,
118
275
// parses the statements, and returns a map of table names to their schemas
119
276
// and a map of short table names to their CREATE TABLE statements.
120
277
// It has been deigned this way to maek it unit-testable
121
- func generateDDLFromReader (
278
+ func generateDDLFromCSV (
122
279
r io.Reader , dbName string , anonymize bool ,
123
280
) (map [string ]* TableSchema , map [string ]string , error ) {
124
281
reader := csv .NewReader (r )
0 commit comments