Skip to content

Commit 785820f

Browse files
committed
chore: remove structlog
It is considered best practice for Python libraries to use the standard logger package.
1 parent ee4f383 commit 785820f

File tree

15 files changed

+185
-78
lines changed

15 files changed

+185
-78
lines changed

projects/pgai/pgai/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
__version__ = "0.10.0-dev"
22

33
from pgai._install.install import ainstall, install
4+
from pgai.logger import get_logger, set_level
45

5-
__all__ = ["ainstall", "install"]
6+
__all__ = ["ainstall", "install", "get_logger", "set_level"]

projects/pgai/pgai/_install/install.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,14 +2,14 @@
22

33
import psycopg
44
import semver
5-
import structlog
65
from psycopg import sql as sql_lib
76

87
from .. import __version__
8+
from ..logger import get_logger
99

1010
GUC_VECTORIZER_URL = "ai.external_functions_executor_url"
1111

12-
log = structlog.get_logger()
12+
log = get_logger(__name__)
1313

1414

1515
def _get_sql(vector_extension_schema: str) -> str:

projects/pgai/pgai/cli.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,16 @@
88
from typing import Any
99

1010
import click
11-
import structlog
1211
from ddtrace import tracer
1312
from dotenv import load_dotenv
1413
from pytimeparse import parse # type: ignore
1514

1615
from .__init__ import __version__
16+
from .logger import get_logger, set_level
1717

1818
load_dotenv()
1919

20-
structlog.configure(wrapper_class=structlog.make_filtering_bound_logger(logging.INFO))
21-
log = structlog.get_logger()
20+
log = get_logger()
2221

2322

2423
def asbool(value: str | None):
@@ -189,9 +188,8 @@ async def async_run_vectorizer_worker(
189188
signal.signal(signal.SIGINT, shutdown_handler)
190189
signal.signal(signal.SIGTERM, shutdown_handler)
191190

192-
structlog.configure(
193-
wrapper_class=structlog.make_filtering_bound_logger(get_log_level(log_level))
194-
)
191+
# Configure the logging level for pgai loggers
192+
set_level(get_log_level(log_level))
195193

196194
worker = Worker(
197195
db_url,

projects/pgai/pgai/logger.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
import json
2+
import logging
3+
from collections.abc import Callable
4+
from typing import Any, TypeVar
5+
6+
from typing_extensions import override
7+
8+
T = TypeVar("T", bound="StructuredMessage")
9+
10+
# Type for the renderer function
11+
RendererType = Callable[[str, dict[str, Any]], str]
12+
13+
14+
class StructuredMessage:
15+
_renderer: RendererType | None = None
16+
17+
def __init__(self, message: str, /, **kwargs: Any) -> None:
18+
self.message: str = message
19+
self.kwargs: dict[str, Any] = kwargs
20+
21+
@override
22+
def __str__(self) -> str:
23+
renderer: RendererType = self._renderer or self.default_renderer
24+
return renderer(self.message, self.kwargs)
25+
26+
@staticmethod
27+
def default_renderer(msg: str, kwargs: dict[str, Any]) -> str:
28+
return f"{msg} >>> {json.dumps(kwargs)}"
29+
30+
@classmethod
31+
def set_renderer(cls: type[T], renderer_func: RendererType) -> None:
32+
cls._renderer = renderer_func
33+
34+
35+
def set_renderer(renderer_func: RendererType) -> None:
36+
StructuredMessage.set_renderer(renderer_func)
37+
38+
39+
def get_logger(name: str = "") -> logging.Logger:
40+
"""Get a logger instance with the pgai namespace.
41+
42+
Args:
43+
name: The logger name, which will be prefixed with 'pgai.'
44+
45+
Returns:
46+
A Logger instance with the appropriate namespace
47+
"""
48+
if name:
49+
logger_name: str = f"pgai.{name}"
50+
else:
51+
logger_name: str = "pgai"
52+
53+
return logging.getLogger(logger_name)
54+
55+
56+
def set_level(level: int | str) -> None:
57+
"""Set the log level for all pgai loggers.
58+
59+
This does not affect the root logger or any other loggers outside
60+
the pgai namespace.
61+
62+
Args:
63+
level: The logging level (e.g., logging.INFO, logging.DEBUG)
64+
or a string level name ('INFO', 'DEBUG', etc.)
65+
"""
66+
if isinstance(level, str):
67+
numeric_level: int = getattr(logging, level.upper(), logging.INFO)
68+
else:
69+
numeric_level = level
70+
71+
logging.getLogger("pgai").setLevel(numeric_level)

projects/pgai/pgai/vectorizer/embedders/litellm.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pydantic import BaseModel
55
from typing_extensions import override
66

7+
from ...logger import StructuredMessage
78
from ..embeddings import (
89
ApiKeyMixin,
910
Embedder,
@@ -43,7 +44,11 @@ async def embed(
4344
Returns:
4445
Sequence[EmbeddingVector]: The embeddings for each document.
4546
"""
46-
await logger.adebug(f"Chunks produced: {len(documents)}")
47+
logger.debug(
48+
StructuredMessage(
49+
f"Chunks produced: {len(documents)}", chunks=len(documents)
50+
)
51+
)
4752
chunk_lengths = [0 for _ in documents]
4853
async for embeddings in self.batch_chunks_and_embed(documents, chunk_lengths):
4954
yield embeddings
@@ -72,7 +77,7 @@ def _max_chunks_per_batch(self) -> int:
7277
case "voyage":
7378
return 128 # see https://docs.voyageai.com/reference/embeddings-api
7479
case _:
75-
logger.warn(
80+
logger.warning(
7681
f"unknown provider '{custom_llm_provider}', falling back to conservative max chunks per batch" # noqa: E501
7782
)
7883
return 5

projects/pgai/pgai/vectorizer/embedders/ollama.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from pydantic import BaseModel
66
from typing_extensions import TypedDict, override
77

8+
from ...logger import StructuredMessage
89
from ..embeddings import (
910
BaseURLMixin,
1011
Embedder,
@@ -84,7 +85,11 @@ async def embed(
8485
Returns:
8586
Sequence[EmbeddingVector]: The embeddings for each document.
8687
"""
87-
await logger.adebug(f"Chunks produced: {len(documents)}")
88+
logger.debug(
89+
StructuredMessage(
90+
f"Chunks produced: {len(documents)}", chunks=len(documents)
91+
)
92+
)
8893
chunk_lengths = [0 for _ in documents]
8994
async for embeddings in self.batch_chunks_and_embed(documents, chunk_lengths):
9095
yield embeddings
@@ -106,7 +111,7 @@ async def setup(self):
106111
await client.show(self.model)
107112
except ollama.ResponseError as e:
108113
if f"model '{self.model}' not found" in e.error:
109-
logger.warn(
114+
logger.warning(
110115
f"pulling ollama model '{self.model}', this may take a while"
111116
)
112117
await client.pull(self.model)
@@ -138,7 +143,7 @@ async def _context_length(self) -> int | None:
138143
model = await ollama.AsyncClient(host=self.base_url).show(self.model)
139144
architecture = model["model_info"].get("general.architecture", None)
140145
if architecture is None:
141-
logger.warn(f"unable to determine architecture for model '{self.model}'")
146+
logger.warning(f"unable to determine architecture for model '{self.model}'")
142147
return None
143148
context_key = f"{architecture}.context_length"
144149
# see https://github.com/ollama/ollama/blob/712d63c3f06f297e22b1ae32678349187dccd2e4/llm/ggml.go#L116-L118 # noqa

projects/pgai/pgai/vectorizer/embedders/openai.py

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77
from pydantic import BaseModel
88
from typing_extensions import override
99

10+
from ...logger import StructuredMessage
11+
1012
if TYPE_CHECKING:
1113
import openai
1214
import tiktoken
@@ -166,7 +168,11 @@ async def embed(
166168
AsyncGenerator[list[EmbeddingVector], None]: The embeddings for
167169
each document.
168170
"""
169-
await logger.adebug(f"Chunks produced: {len(documents)}")
171+
logger.debug(
172+
StructuredMessage(
173+
f"Chunks produced: {len(documents)}", chunks=len(documents)
174+
)
175+
)
170176
encoder = self._encoder
171177
context_length = self._context_length
172178
if encoder is not None and context_length is not None:
@@ -176,8 +182,12 @@ async def embed(
176182
tokenized = encoder.encode(document)
177183
tokenized_length = len(tokenized)
178184
if tokenized_length > context_length:
179-
await logger.awarning(
180-
f"chunk truncated from {len(tokenized)} to {context_length} tokens" # noqa
185+
logger.warning(
186+
StructuredMessage(
187+
f"chunk truncated from {len(tokenized)} to {context_length} tokens", # noqa
188+
original_length=len(tokenized),
189+
truncated_length=context_length,
190+
)
181191
)
182192
documents[i] = encoder.decode(tokenized[:context_length])
183193
token_counts.append(min(context_length, tokenized_length))

projects/pgai/pgai/vectorizer/embedders/voyageai.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
from pydantic import BaseModel
55
from typing_extensions import override
66

7+
from ...logger import StructuredMessage
78
from ..embeddings import (
89
ApiKeyMixin,
910
Embedder,
@@ -44,7 +45,11 @@ async def embed(
4445
Returns:
4546
Sequence[EmbeddingVector]: The embeddings for each document.
4647
"""
47-
await logger.adebug(f"Chunks produced: {len(documents)}")
48+
logger.debug(
49+
StructuredMessage(
50+
f"Chunks produced: {len(documents)}", chunks=len(documents)
51+
)
52+
)
4853
chunk_lengths = [0 for _ in documents]
4954
async for embeddings in self.batch_chunks_and_embed(documents, chunk_lengths):
5055
yield embeddings

projects/pgai/pgai/vectorizer/embeddings.py

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,11 @@
44
from dataclasses import dataclass
55
from typing import TypeAlias
66

7-
import structlog
87
from ddtrace import tracer
98

10-
logger = structlog.get_logger()
9+
from ..logger import StructuredMessage, get_logger
10+
11+
logger = get_logger(__name__)
1112

1213
EmbeddingVector: TypeAlias = list[float]
1314

@@ -161,9 +162,9 @@ async def batch_chunks_and_embed(
161162
batch_num = i + 1
162163
batch = documents[start:end]
163164

164-
await logger.adebug(f"Batch {batch_num} of {num_batches}")
165-
await logger.adebug(f"Chunks for this batch: {len(batch)}")
166-
await logger.adebug(f"Request {batch_num} of {num_batches} initiated")
165+
logger.debug(f"Batch {batch_num} of {num_batches}")
166+
logger.debug(f"Chunks for this batch: {len(batch)}")
167+
logger.debug(f"Request {batch_num} of {num_batches} initiated")
167168
with tracer.trace("embeddings.do.embedder.create"):
168169
current_span = tracer.current_span()
169170
if current_span:
@@ -181,7 +182,7 @@ async def batch_chunks_and_embed(
181182
request_duration,
182183
)
183184

184-
await logger.adebug(
185+
logger.debug(
185186
f"Request {batch_num} of {num_batches} "
186187
f"ended after: {request_duration} seconds. "
187188
f"Tokens usage: {response_.usage}"
@@ -327,10 +328,12 @@ async def print_stats(self):
327328
chunks per second.
328329
"""
329330
self.wall_time = time.perf_counter() - self.wall_start
330-
await logger.adebug(
331-
"Embedding stats",
332-
total_request_time=self.total_request_time,
333-
wall_time=self.wall_time,
334-
total_chunks=self.total_chunks,
335-
chunks_per_second=self.chunks_per_second(),
331+
logger.debug(
332+
StructuredMessage(
333+
"Embedding stats",
334+
total_request_time=self.total_request_time,
335+
wall_time=self.wall_time,
336+
total_chunks=self.total_chunks,
337+
chunks_per_second=self.chunks_per_second(),
338+
)
336339
)

projects/pgai/pgai/vectorizer/migrations/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,10 @@
66
from typing import Any, Generic, TypeVar
77

88
import semver
9-
import structlog
109

11-
logger = structlog.get_logger()
10+
from ...logger import get_logger
11+
12+
logger = get_logger(__name__)
1213

1314
# config generic type
1415
C = TypeVar("C")

0 commit comments

Comments
 (0)