Skip to content

Commit 2bb9b6a

Browse files
committed
feat: implement comprehensive logging infrastructure to the project
1 parent 919c203 commit 2bb9b6a

File tree

5 files changed

+354
-8
lines changed

5 files changed

+354
-8
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
data
22
table_visualizations
33

4+
# Log files
5+
logs/
6+
*.log
7+
48
__pycache__/
59
*.py[cod]
610
*.egg-info/

README.md

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@ for page_idx, page_result in enumerate(all_results):
8686
print(f"Page {page_idx}: Error - {page_result.get('error', 'Unknown error')}")
8787
```
8888

89+
## 📋 Logging
90+
91+
The project includes comprehensive logging capabilities for debugging and monitoring:
92+
93+
**Log Files**: By default, logs are written to:
94+
- `logs/pdf2table.log` - Main application log
95+
- `logs/pdf2table_errors.log` - Error-only log
96+
97+
**Documentation**: See `docs/logging_guide.md` for detailed logging documentation.
98+
8999
## 🎯 Use Cases
90100

91101
### Document Processing Pipelines

pdf2table/__init__.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,16 @@
11
import os
22
from pathlib import Path
33

4+
from pdf2table.frameworks.logging_config import setup_logging, get_logger
5+
6+
setup_logging(
7+
log_level=os.getenv("PDF2TABLE_LOG_LEVEL", "INFO"),
8+
console_output=True,
9+
file_output=True,
10+
format_type="detailed",
11+
use_colors=os.getenv("PDF2TABLE_USE_COLORS", "true").lower() == "true",
12+
)
13+
414
DEFAULT_PATH = Path(os.path.realpath(__file__)).parents[1]
15+
16+
__all__ = ["get_logger", "setup_logging", "DEFAULT_PATH"]
Lines changed: 286 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
import logging
2+
import logging.config
3+
from pathlib import Path
4+
from typing import Optional
5+
import sys
6+
7+
8+
class ColoredFormatter(logging.Formatter):
9+
COLORS = {
10+
'DEBUG': '\033[36m', # Cyan
11+
'INFO': '\033[32m', # Green
12+
'WARNING': '\033[33m', # Yellow
13+
'ERROR': '\033[31m', # Red
14+
'CRITICAL': '\033[35m', # Magenta
15+
}
16+
17+
BOLD = '\033[1m'
18+
RESET = '\033[0m'
19+
20+
def __init__(self, *args, use_colors=True, **kwargs):
21+
super().__init__(*args, **kwargs)
22+
self.use_colors = use_colors and self._supports_color()
23+
24+
def _supports_color(self):
25+
"""Check if the terminal supports color output."""
26+
return (
27+
hasattr(sys.stderr, 'isatty') and sys.stderr.isatty() and
28+
hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()
29+
)
30+
31+
def format(self, record):
32+
"""Format the log record with colors."""
33+
if not self.use_colors:
34+
return super().format(record)
35+
36+
original_format = super().format(record)
37+
level_color = self.COLORS.get(record.levelname, '')
38+
39+
if level_color:
40+
parts = original_format.split(' - ', 5)
41+
42+
if len(parts) >= 6:
43+
header_parts = parts[:5]
44+
message = parts[5]
45+
colored_header = f"{self.BOLD}{level_color}{' - '.join(header_parts)}{self.RESET}"
46+
return f"{colored_header} - {message}"
47+
else:
48+
colored_level = f"{self.BOLD}{level_color}{record.levelname}{self.RESET}"
49+
return original_format.replace(record.levelname, colored_level, 1)
50+
51+
return original_format
52+
53+
54+
class LoggerFactory:
55+
_initialized = False
56+
_log_dir: Optional[Path] = None
57+
58+
@classmethod
59+
def setup_logging(
60+
cls,
61+
log_level: str = "INFO",
62+
log_dir: Optional[str] = None,
63+
console_output: bool = True,
64+
file_output: bool = True,
65+
format_type: str = "detailed",
66+
use_colors: bool = True
67+
) -> None:
68+
"""
69+
Configure logging for the entire application.
70+
71+
Args:
72+
log_level: Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
73+
log_dir: Directory for log files. If None, uses project root/logs
74+
console_output: Whether to log to console
75+
file_output: Whether to log to files
76+
format_type: Format type ('detailed', 'simple', 'json')
77+
use_colors: Whether to use colors in console output
78+
"""
79+
if cls._initialized:
80+
return
81+
82+
if log_dir is None:
83+
project_root = Path(__file__).parents[2]
84+
cls._log_dir = project_root / "logs"
85+
else:
86+
cls._log_dir = Path(log_dir)
87+
88+
cls._log_dir.mkdir(exist_ok=True)
89+
90+
formats = {
91+
"simple": "%(asctime)s - %(name)s - %(levelname)s - %(message)s",
92+
"detailed": "%(asctime)s - %(name)s - %(levelname)s - %(filename)s:%(lineno)d - %(funcName)s() - %(message)s",
93+
"json": "%(asctime)s | %(name)s | %(levelname)s | %(filename)s:%(lineno)d | %(funcName)s | %(message)s"
94+
}
95+
96+
log_format = formats.get(format_type, formats["detailed"])
97+
98+
config = {
99+
"version": 1,
100+
"disable_existing_loggers": False,
101+
"formatters": {
102+
"standard": {
103+
"format": log_format,
104+
"datefmt": "%Y-%m-%d %H:%M:%S"
105+
},
106+
"colored": {
107+
"()": ColoredFormatter,
108+
"format": log_format,
109+
"datefmt": "%Y-%m-%d %H:%M:%S",
110+
"use_colors": use_colors
111+
}
112+
},
113+
"handlers": {},
114+
"loggers": {
115+
"pdf2table": {
116+
"level": log_level,
117+
"handlers": [],
118+
"propagate": False
119+
},
120+
"root": {
121+
"level": "WARNING",
122+
"handlers": []
123+
}
124+
}
125+
}
126+
127+
if console_output:
128+
config["handlers"]["console"] = {
129+
"class": "logging.StreamHandler",
130+
"level": log_level,
131+
"formatter": "colored",
132+
"stream": "ext://sys.stdout"
133+
}
134+
config["loggers"]["pdf2table"]["handlers"].append("console")
135+
config["loggers"]["root"]["handlers"].append("console")
136+
137+
if file_output:
138+
config["handlers"]["file"] = {
139+
"class": "logging.handlers.RotatingFileHandler",
140+
"level": log_level,
141+
"formatter": "standard",
142+
"filename": str(cls._log_dir / "pdf2table.log"),
143+
"maxBytes": 10485760,
144+
"backupCount": 5
145+
}
146+
147+
config["handlers"]["error_file"] = {
148+
"class": "logging.handlers.RotatingFileHandler",
149+
"level": "ERROR",
150+
"formatter": "standard",
151+
"filename": str(cls._log_dir / "pdf2table_errors.log"),
152+
"maxBytes": 10485760,
153+
"backupCount": 5
154+
}
155+
156+
config["loggers"]["pdf2table"]["handlers"].extend(["file", "error_file"])
157+
config["loggers"]["root"]["handlers"].extend(["file", "error_file"])
158+
159+
logging.config.dictConfig(config)
160+
cls._initialized = True
161+
162+
logger = logging.getLogger("pdf2table.logging_config")
163+
logger.info(f"Logging initialized - Level: {log_level}, Log dir: {cls._log_dir}")
164+
165+
@classmethod
166+
def get_logger(cls, name: str) -> logging.Logger:
167+
"""
168+
Get a logger instance for the given name.
169+
170+
Args:
171+
name: Logger name, typically __name__ from the calling module
172+
173+
Returns:
174+
Configured logger instance
175+
"""
176+
if not cls._initialized:
177+
cls.setup_logging()
178+
179+
if not name.startswith("pdf2table"):
180+
if name == "__main__":
181+
name = "pdf2table.main"
182+
else:
183+
name = f"pdf2table.{name}"
184+
185+
return logging.getLogger(name)
186+
187+
@classmethod
188+
def get_log_directory(cls) -> Optional[Path]:
189+
return cls._log_dir
190+
191+
192+
def get_logger(name: str = None) -> logging.Logger:
193+
"""
194+
Get a logger instance. Convenience function for easy importing.
195+
196+
Args:
197+
name: Logger name. If None, uses the calling module's __name__
198+
199+
Returns:
200+
Configured logger instance
201+
"""
202+
if name is None:
203+
frame = sys._getframe(1)
204+
name = frame.f_globals.get('__name__', 'unknown')
205+
206+
return LoggerFactory.get_logger(name)
207+
208+
209+
def setup_logging(**kwargs) -> None:
210+
LoggerFactory.setup_logging(**kwargs)
211+
212+
213+
class LogLevel:
214+
"""Context manager for temporarily changing log level."""
215+
216+
def __init__(self, logger: logging.Logger, level: str):
217+
self.logger = logger
218+
self.new_level = getattr(logging, level.upper())
219+
self.old_level = None
220+
221+
def __enter__(self):
222+
self.old_level = self.logger.level
223+
self.logger.setLevel(self.new_level)
224+
return self.logger
225+
226+
def __exit__(self, exc_type, exc_val, exc_tb):
227+
self.logger.setLevel(self.old_level)
228+
229+
230+
def log_function_call(logger: logging.Logger = None, level: str = "DEBUG"):
231+
"""
232+
Decorator to log function entry and exit.
233+
234+
Args:
235+
logger: Logger to use. If None, creates one based on function module
236+
level: Log level to use
237+
"""
238+
def decorator(func):
239+
nonlocal logger
240+
if logger is None:
241+
logger = get_logger(func.__module__)
242+
243+
log_level = getattr(logging, level.upper())
244+
245+
def wrapper(*args, **kwargs):
246+
logger.log(log_level, f"Entering {func.__name__}")
247+
try:
248+
result = func(*args, **kwargs)
249+
logger.log(log_level, f"Exiting {func.__name__}")
250+
return result
251+
except Exception as e:
252+
logger.error(f"Exception in {func.__name__}: {e}")
253+
raise
254+
return wrapper
255+
return decorator
256+
257+
258+
def log_execution_time(logger: logging.Logger = None, level: str = "INFO"):
259+
"""
260+
Decorator to log function execution time.
261+
262+
Args:
263+
logger: Logger to use. If None, creates one based on function module
264+
level: Log level to use
265+
"""
266+
def decorator(func):
267+
nonlocal logger
268+
if logger is None:
269+
logger = get_logger(func.__module__)
270+
271+
log_level = getattr(logging, level.upper())
272+
273+
def wrapper(*args, **kwargs):
274+
import time
275+
start_time = time.time()
276+
try:
277+
result = func(*args, **kwargs)
278+
execution_time = time.time() - start_time
279+
logger.log(log_level, f"{func.__name__} executed in {execution_time:.3f}s")
280+
return result
281+
except Exception as e:
282+
execution_time = time.time() - start_time
283+
logger.error(f"{func.__name__} failed after {execution_time:.3f}s: {e}")
284+
raise
285+
return wrapper
286+
return decorator

0 commit comments

Comments
 (0)