@@ -107,19 +107,29 @@ def parseargs():
107107 parser .add_option ('-v' , '--verbose' , action = 'store_true' )
108108 parser .add_option ('-s' , '--schema' , type = 'string' )
109109 # TODO: perform check just for one/multiple schemas
110- # TODO: perform check for multiple databases
110+ parser . add_option ( '-A' , '--all' , action = 'store_true' )
111111 parser .add_option ('-d' , '--database' , type = 'string' )
112112 (options , args ) = parser .parse_args ()
113113
114114 USER = os .getenv ('USER' )
115115 if USER is None or USER is ' ' :
116- logger .error ('USER environment variable must be set. ' )
116+ logger .error ('USER environment variable must be set' )
117117 parser .exit ()
118118
119- if options .database is None :
120- logger .error ('Database must be specified. ' )
119+ if options .database is None and options . all is None :
120+ logger .error ('Database must be specified' )
121121 parser .exit ()
122122
123+ if options .database is not None and options .all is not None :
124+ logger .info ('Can\' t specify -A and -d options at the same time' )
125+ parser .exit ()
126+
127+ if options .database is not None :
128+ logger .info ("Checking integrity of database %s" % options .database )
129+
130+ if options .all is not None :
131+ logger .info ("Checking integrity of all databases" )
132+
123133 return options
124134
125135
@@ -171,6 +181,27 @@ def get_gp_segment_configuration(database=None):
171181 return cfg
172182
173183
184+ def get_databases ():
185+ """
186+ This function returns the list of databases present in the Greenplum cluster.
187+ :return: list of databases
188+ """
189+ db = connect (database = 'template1' )
190+ database_list = []
191+
192+ # Retrieve all non-catalog/non partitioned parent tables
193+ qry = '''
194+ SELECT datname
195+ FROM pg_database
196+ where datname not like 'template0'
197+ '''
198+ curs = db .query (qry )
199+ for row in curs .dictresult ():
200+ database_list .append (row )
201+ db .close ()
202+ return database_list
203+
204+
174205def get_tables (database = None ):
175206 db = connect (database = database )
176207 table_list = []
@@ -220,6 +251,29 @@ def get_tables_in_schema(database, schema):
220251 return table_list
221252
222253
254+ def spawn_threads (database , schema = None ):
255+ dbids = get_gp_segment_configuration () # get Greenplum segment information
256+ tables = list ()
257+
258+ if schema is not None :
259+ tables = get_tables_in_schema (database , schema )
260+ logger .info ("Checking only tables in schema %s" % schema )
261+ else :
262+ tables = get_tables (database = database ) # get table list
263+
264+ threads = []
265+
266+ for dbid in dbids :
267+ if dbids [dbid ]['isprimary' ] == 't' :
268+ th = CheckIntegrity (tables , dbids [dbid ]['hostname' ], database , dbids [dbid ]['content' ], dbids [dbid ]['port' ])
269+ th .start ()
270+ threads .append (th )
271+
272+ for thread in threads :
273+ logger .debug ('waiting on thread %s' % thread .getName ())
274+ thread .join ()
275+
276+
223277class CheckIntegrity (Thread ):
224278 def __init__ (self , tables , hostname , database , content , port ):
225279 Thread .__init__ (self )
@@ -242,11 +296,11 @@ class CheckIntegrity(Thread):
242296 except DatabaseError , de :
243297 # TODO: better error summary report
244298 logger .error ('Failed for table %s.%s at seg%s' % (table ['schema' ], table ['table' ], self .content ))
245- logger .error ('ERROR: %s' % str (de ).strip ())
299+ logger .error ('%s' % str (de ).strip ())
246300
247301 # Append this table name to reported_table list
248302 table_lock .acquire ()
249- reported_tables .append ("%s .%s in %s:%d gpseg%s" % (table ['schema' ], table ['table' ],
303+ reported_tables .append ("[%s] %s .%s in %s:%d gpseg%s" % (self . database , table ['schema' ], table ['table' ],
250304 self .hostname , self .port , self .content ))
251305 table_lock .release ()
252306
@@ -262,39 +316,26 @@ if __name__ == '__main__':
262316 setup_tool_logging (EXECNAME , getLocalHostname (), getUserName ())
263317
264318 options = parseargs ()
265- logger .info ("Checking integrity of database %s" % options .database )
266319
267320 if options .verbose :
268321 enable_verbose_logging ()
269322
270323 try :
271- # TODO: List number of databases/schemas/tables to be checked and prompt to continue
272- dbids = get_gp_segment_configuration () # get Greenplum segment information
273-
274- tables = list ()
275-
276- if options .schema :
277- tables = get_tables_in_schema (options .database , options .schema )
278- logger .info ("Checking only tables in schema %s" % options .schema )
279- else :
280- tables = get_tables (database = options .database ) # get table list
281-
282- threads = []
283324 reported_tables = []
284325 table_lock = Lock ()
285326
286- for dbid in dbids :
287- if dbids [ dbid ][ 'isprimary' ] == 't' :
288- th = CheckIntegrity ( tables , dbids [ dbid ][ 'hostname' ], options . database , dbids [ dbid ][ 'content' ],
289- dbids [ dbid ][ 'port ' ])
290- th . start ( )
291- threads . append ( th )
292-
293- for thread in threads :
294- logger . debug ( 'waiting on thread %s' % thread . getName ())
295- thread . join ( )
327+ if options . all is not None :
328+ for db in get_databases () :
329+ # TODO: List number of databases/schemas/ tables to be checked and prompt to continue
330+ logger . info ( "Checking database %s" % db [ 'datname ' ])
331+ spawn_threads ( db [ 'datname' ] )
332+ else :
333+ if options . schema is not None :
334+ spawn_threads ( options . database , options . schema )
335+ else :
336+ spawn_threads ( options . database )
296337
297- logger .info ("REPORT SUMMARY %s" % datetime .now ())
338+ logger .info ("ERROR REPORT SUMMARY %s" % datetime .now ())
298339 logger .info ("============================================" )
299340
300341 if len (reported_tables ) == 0 :
0 commit comments