1+ #!/usr/bin/env python3
2+
3+ import sys
4+ import os
5+ import tarfile
6+ import argparse
7+ import subprocess
8+ from datetime import datetime
9+ from pathlib import Path
10+
11+ def ensure_tomli_available ():
12+ """Ensure tomli is available, install if needed."""
13+ print ("=== Checking dependencies ===" )
14+ print (f"Python executable: { sys .executable } " )
15+ print (f"Python version: { sys .version } " )
16+
17+ try :
18+ import tomli
19+ print ("✓ tomli already available" )
20+ return tomli
21+ except ImportError :
22+ print ("✗ tomli not found, installing..." )
23+ try :
24+ print (f"Installing tomli using: { sys .executable } -m pip install tomli" )
25+ subprocess .check_call ([sys .executable , '-m' , 'pip' , 'install' , 'tomli' ],
26+ stdout = subprocess .PIPE , stderr = subprocess .PIPE )
27+ print ("✓ tomli installed successfully" )
28+ import tomli
29+ print ("✓ tomli imported successfully" )
30+ return tomli
31+ except subprocess .CalledProcessError as e :
32+ print (f"✗ Failed to install tomli: { e } " )
33+ print ("Please install manually: pip install tomli" )
34+ sys .exit (1 )
35+ except ImportError :
36+ print ("✗ Failed to import tomli after installation" )
37+ sys .exit (1 )
38+
39+ # Install tomli at startup
40+ print ("Databend Meta Log Collector" )
41+ print ("============================" )
42+ tomli = ensure_tomli_available ()
43+ print ()
44+
45+
46+ def parse_config (config_file ):
47+ """Parse TOML config file and extract log directory."""
48+ try :
49+ with open (config_file , 'rb' ) as f :
50+ config = tomli .load (f )
51+
52+ # Check for log directory in two possible locations:
53+ # 1. Top-level: log_dir = "..."
54+ # 2. In [log.file] section: dir = "..."
55+
56+ top_level_log_dir = config .get ('log_dir' )
57+
58+ log_config = config .get ('log' , {})
59+ file_config = log_config .get ('file' , {})
60+ nested_log_dir = file_config .get ('dir' )
61+
62+ print (f"Top-level log_dir: { top_level_log_dir } " )
63+ print (f"[log.file].dir: { nested_log_dir } " )
64+
65+ # Validate configuration
66+ if top_level_log_dir and nested_log_dir :
67+ if top_level_log_dir != nested_log_dir :
68+ raise ValueError (
69+ f"Conflicting log directory settings found:\n "
70+ f" log_dir = '{ top_level_log_dir } '\n "
71+ f" [log.file].dir = '{ nested_log_dir } '\n "
72+ f"Please use only one log directory configuration."
73+ )
74+ print ("✓ Both log directory settings present and match" )
75+ return top_level_log_dir
76+ elif top_level_log_dir :
77+ print ("✓ Using top-level log_dir setting" )
78+ return top_level_log_dir
79+ elif nested_log_dir :
80+ print ("✓ Using [log.file].dir setting" )
81+ return nested_log_dir
82+ else :
83+ raise ValueError (
84+ "No log directory found in config file. "
85+ "Please set either 'log_dir' or '[log.file].dir'"
86+ )
87+
88+ except FileNotFoundError :
89+ raise FileNotFoundError (f"Config file '{ config_file } ' not found" )
90+ except Exception as e :
91+ raise ValueError (f"Error parsing config file: { e } " )
92+
93+
94+ def resolve_log_dir (log_dir , config_file ):
95+ """Resolve log directory path (handle relative paths)."""
96+ log_path = Path (log_dir )
97+
98+ if not log_path .is_absolute ():
99+ # Resolve relative to config file directory
100+ config_dir = Path (config_file ).parent
101+ log_path = config_dir / log_path
102+
103+ return log_path .resolve ()
104+
105+
106+ def analyze_log_directory (log_dir ):
107+ """Analyze log directory and return detailed information."""
108+ print (f"=== Analyzing log directory ===" )
109+ print (f"Log directory path: { log_dir } " )
110+ print (f"Directory exists: { log_dir .exists ()} " )
111+
112+ if not log_dir .exists ():
113+ print ("✗ Log directory does not exist" )
114+ return None
115+
116+ if not log_dir .is_dir ():
117+ print ("✗ Path is not a directory" )
118+ return None
119+
120+ print (f"Directory readable: { os .access (log_dir , os .R_OK )} " )
121+
122+ # Get directory contents
123+ try :
124+ all_items = list (log_dir .iterdir ())
125+ files = [f for f in all_items if f .is_file ()]
126+ dirs = [d for d in all_items if d .is_dir ()]
127+
128+ print (f"Total items: { len (all_items )} " )
129+ print (f"Files: { len (files )} " )
130+ print (f"Subdirectories: { len (dirs )} " )
131+
132+ if files :
133+ print ("Files found:" )
134+ for f in files [:10 ]: # Show first 10 files
135+ size = f .stat ().st_size
136+ mtime = datetime .fromtimestamp (f .stat ().st_mtime ).strftime ('%Y-%m-%d %H:%M:%S' )
137+ print (f" - { f .name } ({ size } bytes, modified: { mtime } )" )
138+ if len (files ) > 10 :
139+ print (f" ... and { len (files ) - 10 } more files" )
140+
141+ if dirs :
142+ print ("Subdirectories found:" )
143+ for d in dirs [:5 ]: # Show first 5 directories
144+ print (f" - { d .name } /" )
145+ if len (dirs ) > 5 :
146+ print (f" ... and { len (dirs ) - 5 } more directories" )
147+
148+ return {'files' : files , 'dirs' : dirs , 'total' : len (all_items )}
149+
150+ except PermissionError :
151+ print ("✗ Permission denied accessing directory" )
152+ return None
153+ except Exception as e :
154+ print (f"✗ Error accessing directory: { e } " )
155+ return None
156+
157+ def create_log_archive (log_dir , output_file ):
158+ """Create tar.gz archive of all files in log directory."""
159+ analysis = analyze_log_directory (log_dir )
160+ if analysis is None :
161+ raise FileNotFoundError (f"Cannot access log directory '{ log_dir } '" )
162+
163+ print (f"\n === Creating archive ===" )
164+ print (f"Output file: { output_file } " )
165+
166+ files = analysis ['files' ]
167+ dirs = analysis ['dirs' ]
168+
169+ with tarfile .open (output_file , 'w:gz' ) as tar :
170+ if files or dirs :
171+ for file_path in files :
172+ print (f"Adding file: { file_path .name } " )
173+ tar .add (file_path , arcname = file_path .name )
174+ for dir_path in dirs :
175+ print (f"Adding directory: { dir_path .name } /" )
176+ tar .add (dir_path , arcname = dir_path .name )
177+ else :
178+ print ("No files to archive" )
179+
180+ return analysis ['total' ]
181+
182+
183+ def main ():
184+ parser = argparse .ArgumentParser (
185+ description = 'Collect databend-meta logs based on config file' ,
186+ formatter_class = argparse .RawDescriptionHelpFormatter ,
187+ epilog = 'Example: %(prog)s databend-meta-node-1.toml'
188+ )
189+ parser .add_argument ('config_file' , help = 'Path to databend-meta config file (.toml)' )
190+ parser .add_argument ('-o' , '--output' , help = 'Output archive filename (default: auto-generated)' )
191+
192+ args = parser .parse_args ()
193+
194+ try :
195+ # Parse config and extract log directory
196+ print (f"=== Processing config file ===" )
197+ config_abs_path = Path (args .config_file ).resolve ()
198+ print (f"Config file absolute path: { config_abs_path } " )
199+ print (f"Config file exists: { config_abs_path .exists ()} " )
200+ print (f"Config file readable: { os .access (config_abs_path , os .R_OK )} " )
201+
202+ log_dir_str = parse_config (config_abs_path )
203+ print (f"Found log directory setting: '{ log_dir_str } '" )
204+
205+ # Resolve log directory path
206+ log_dir = resolve_log_dir (log_dir_str , config_abs_path )
207+ print (f"Resolved log directory: { log_dir } " )
208+
209+ # Generate output filename if not provided
210+ print (f"\n === Preparing output ===" )
211+ if args .output :
212+ output_file = args .output
213+ print (f"Using provided output filename: { output_file } " )
214+ else :
215+ config_name = Path (args .config_file ).stem
216+ timestamp = datetime .now ().strftime ("%Y%m%d_%H%M%S" )
217+ output_file = f"{ config_name } _logs_{ timestamp } .tar.gz"
218+ print (f"Generated output filename: { output_file } " )
219+
220+ output_abs_path = Path (output_file ).resolve ()
221+ print (f"Output absolute path: { output_abs_path } " )
222+ print (f"Output directory writable: { os .access (output_abs_path .parent , os .W_OK )} " )
223+
224+ # Create log archive
225+ file_count = create_log_archive (log_dir , output_file )
226+
227+ print (f"\n === Archive completed ===" )
228+ if file_count > 0 :
229+ file_size = os .path .getsize (output_file )
230+ size_mb = file_size / (1024 * 1024 )
231+ print (f"✓ Successfully created log archive: { output_file } " )
232+ print (f"✓ Items archived: { file_count } " )
233+ print (f"✓ Archive size: { size_mb :.2f} MB" )
234+ else :
235+ print (f"⚠ Warning: Log directory '{ log_dir } ' is empty" )
236+ print (f"✓ Created empty archive: { output_file } " )
237+
238+ except Exception as e :
239+ print (f"Error: { e } " , file = sys .stderr )
240+ sys .exit (1 )
241+
242+
243+ if __name__ == "__main__" :
244+ main ()
0 commit comments