Conversation
0402e2b to
114f1e7
Compare
Codecov Report❌ Patch coverage is Additional details and impacted files@@ Coverage Diff @@
## main #334 +/- ##
==========================================
- Coverage 85.63% 85.57% -0.06%
==========================================
Files 68 73 +5
Lines 5269 5616 +347
==========================================
+ Hits 4512 4806 +294
- Misses 757 810 +53 ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
|
… and sort builders
… and sorting in log streams
…oved pagination and data access
| _TEXT_TYPES = (DataType.TEXT, DataType.UUID) | ||
| _TEXT_LIST_TYPES = (DataType.TEXT, DataType.UUID, DataType.TAG) | ||
| _NUMERIC_TYPES = (DataType.INTEGER, DataType.FLOATING_POINT) | ||
| _DATE_TYPES = (DataType.TIMESTAMP,) | ||
|
|
||
|
|
||
| def _unwrap_unset(value: Any, default: Any = None) -> Any: |
There was a problem hiding this comment.
For multi-valued string columns the API returns data_type: string_list (see the /spans/available_columns schema in openapi.yaml), but _TEXT_LIST_TYPES only whitelists TEXT, UUID and TAG. When a user calls log_stream.span_columns["tags"].one_of([...]) the new type guard rejects the column with a ValidationError, so valid filters for string_list columns (e.g. tags) are now impossible. Please include DataType.STRING_LIST in the allowed tuple.
| _TEXT_TYPES = (DataType.TEXT, DataType.UUID) | |
| _TEXT_LIST_TYPES = (DataType.TEXT, DataType.UUID, DataType.TAG) | |
| _NUMERIC_TYPES = (DataType.INTEGER, DataType.FLOATING_POINT) | |
| _DATE_TYPES = (DataType.TIMESTAMP,) | |
| def _unwrap_unset(value: Any, default: Any = None) -> Any: | |
| _TEXT_TYPES = (DataType.TEXT, DataType.UUID) | |
| _TEXT_LIST_TYPES = (DataType.TEXT, DataType.UUID, DataType.TAG, DataType.STRING_LIST) | |
| _NUMERIC_TYPES = (DataType.INTEGER, DataType.FLOATING_POINT) | |
| _DATE_TYPES = (DataType.TIMESTAMP,) | |
| def _unwrap_unset(value: Any, default: Any = None) -> Any: |
Finding type: Type Inconsistency
| def get_span_columns(self, project_id: str, log_stream_id: str) -> LogRecordsAvailableColumnsResponse: | ||
| """ | ||
| Get available columns for spans in a log stream. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| project_id : str | ||
| The ID of the project. | ||
| log_stream_id : str | ||
| The ID of the log stream. | ||
|
|
||
| Returns | ||
| ------- | ||
| LogRecordsAvailableColumnsResponse | ||
| The response containing available span columns. | ||
|
|
||
| Raises | ||
| ------ | ||
| errors.UnexpectedStatus | ||
| If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. | ||
| httpx.TimeoutException | ||
| If the request takes longer than Client.timeout. | ||
| """ | ||
| body = LogRecordsAvailableColumnsRequest(log_stream_id=log_stream_id) | ||
| response = spans_available_columns_projects_project_id_spans_available_columns_post.sync( | ||
| project_id=project_id, client=self.config.api_client, body=body | ||
| ) | ||
| if isinstance(response, HTTPValidationError): | ||
| raise response | ||
| if not response: | ||
| raise ValueError("Unable to retrieve span columns") | ||
| return response | ||
|
|
||
| def get_session_columns(self, project_id: str, log_stream_id: str) -> LogRecordsAvailableColumnsResponse: | ||
| """ | ||
| Get available columns for sessions in a log stream. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| project_id : str | ||
| The ID of the project. | ||
| log_stream_id : str | ||
| The ID of the log stream. | ||
|
|
||
| Returns | ||
| ------- | ||
| LogRecordsAvailableColumnsResponse | ||
| The response containing available session columns. | ||
|
|
||
| Raises | ||
| ------ | ||
| errors.UnexpectedStatus | ||
| If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. | ||
| httpx.TimeoutException | ||
| If the request takes longer than Client.timeout. | ||
| """ | ||
| body = LogRecordsAvailableColumnsRequest(log_stream_id=log_stream_id) | ||
| response = sessions_available_columns_projects_project_id_sessions_available_columns_post.sync( | ||
| project_id=project_id, client=self.config.api_client, body=body | ||
| ) | ||
| if isinstance(response, HTTPValidationError): | ||
| raise response | ||
| if not response: | ||
| raise ValueError("Unable to retrieve session columns") | ||
| return response | ||
|
|
||
| def get_trace_columns(self, project_id: str, log_stream_id: str) -> LogRecordsAvailableColumnsResponse: | ||
| """ | ||
| Get available columns for traces in a log stream. | ||
|
|
||
| Parameters | ||
| ---------- | ||
| project_id : str | ||
| The ID of the project. | ||
| log_stream_id : str | ||
| The ID of the log stream. | ||
|
|
||
| Returns | ||
| ------- | ||
| LogRecordsAvailableColumnsResponse | ||
| The response containing available trace columns. | ||
|
|
||
| Raises | ||
| ------ | ||
| errors.UnexpectedStatus | ||
| If the server returns an undocumented status code and Client.raise_on_unexpected_status is True. | ||
| httpx.TimeoutException | ||
| If the request takes longer than Client.timeout. | ||
| """ | ||
| body = LogRecordsAvailableColumnsRequest(log_stream_id=log_stream_id) | ||
| response = traces_available_columns_projects_project_id_traces_available_columns_post.sync( | ||
| project_id=project_id, client=self.config.api_client, body=body | ||
| ) | ||
| # TODO: Check how to properly handle the errors. | ||
| if isinstance(response, HTTPValidationError): | ||
| raise response | ||
| if not response: | ||
| raise ValueError("Unable to retrieve trace columns") | ||
| return response |
There was a problem hiding this comment.
Would it make sense to extract a helper for get_span_columns, get_session_columns, and get_trace_columns? They only differ by the API function called, so maybe something like:
def _get_columns(self, project_id, log_stream_id, api_func):
body = LogRecordsAvailableColumnsRequest(log_stream_id=log_stream_id)
response = api_func.sync(project_id=project_id, client=self.config.api_client, body=body)
if isinstance(response, HTTPValidationError):
raise response
if not response:
raise ValueError("Unable to retrieve columns")
return response
Then each method could call this with the appropriate API function. What do you think?
Prompt for AI Agents:
In `src/galileo/log_streams.py` around lines 519-617, refactor the three column
retrieval methods (`get_span_columns`, `get_session_columns`, and `get_trace_columns`)
to use a common private helper method. Extract a generalized `_get_columns` method that
takes the API function as a parameter, which will eliminate the duplicated error
handling and response processing logic. This will make the code more DRY (Don't Repeat
Yourself), reduce potential for inconsistent error handling, and make future
modifications easier by centralizing the column retrieval logic.
Finding type: Code Dedup and Conventions
| def span_columns(self) -> ColumnCollection: | ||
| """ | ||
| Get available columns for spans in this log stream. | ||
|
|
||
| Returns | ||
| ------- | ||
| ColumnCollection: A collection of columns available for spans, accessible by column ID. | ||
|
|
||
| Raises | ||
| ------ | ||
| ValueError: If the log stream lacks required id or project_id attributes. | ||
|
|
||
| Examples | ||
| -------- | ||
| log_stream = LogStream.get(name="Production Logs", project_name="My AI Project") | ||
| columns = log_stream.span_columns | ||
|
|
||
| # Access a specific column | ||
| input_column = columns["input"] | ||
|
|
||
| # Filter using columns | ||
| spans = log_stream.get_spans( | ||
| filters=[columns["input"].contains("world")], | ||
| sort=columns["created_at"].descending() | ||
| ) | ||
| """ | ||
| if self.id is None: | ||
| raise ValueError("Log stream ID is not set. Cannot get span columns from a local-only log stream.") | ||
| if self.project_id is None: | ||
| raise ValueError("Project ID is not set. Cannot get span columns without project_id.") | ||
|
|
||
| log_streams_service = LogStreams() | ||
| response = log_streams_service.get_span_columns(project_id=self.project_id, log_stream_id=self.id) | ||
| columns = [Column(col) for col in response.columns] | ||
| return ColumnCollection(columns) | ||
|
|
||
| @property | ||
| def session_columns(self) -> ColumnCollection: | ||
| """ | ||
| Get available columns for sessions in this log stream. | ||
|
|
||
| Returns | ||
| ------- | ||
| ColumnCollection: A collection of columns available for sessions, accessible by column ID. | ||
|
|
||
| Raises | ||
| ------ | ||
| ValueError: If the log stream lacks required id or project_id attributes. | ||
|
|
||
| Examples | ||
| -------- | ||
| log_stream = LogStream.get(name="Production Logs", project_name="My AI Project") | ||
| columns = log_stream.session_columns | ||
|
|
||
| # Access a specific column | ||
| model_column = columns["model"] | ||
|
|
||
| # Filter using columns | ||
| sessions = log_stream.get_sessions( | ||
| filters=[columns["model"].equals("gpt-4o-mini")], | ||
| sort=columns["created_at"].descending() | ||
| ) | ||
| """ | ||
| if self.id is None: | ||
| raise ValueError("Log stream ID is not set. Cannot get session columns from a local-only log stream.") | ||
| if self.project_id is None: | ||
| raise ValueError("Project ID is not set. Cannot get session columns without project_id.") | ||
|
|
||
| log_streams_service = LogStreams() | ||
| response = log_streams_service.get_session_columns(project_id=self.project_id, log_stream_id=self.id) | ||
| columns = [Column(col) for col in response.columns] | ||
| return ColumnCollection(columns) | ||
|
|
||
| @property | ||
| def trace_columns(self) -> ColumnCollection: | ||
| """ | ||
| Get available columns for traces in this log stream. | ||
|
|
||
| Returns | ||
| ------- | ||
| ColumnCollection: A collection of columns available for traces, accessible by column ID. | ||
|
|
||
| Raises | ||
| ------ | ||
| ValueError: If the log stream lacks required id or project_id attributes. | ||
|
|
||
| Examples | ||
| -------- | ||
| log_stream = LogStream.get(name="Production Logs", project_name="My AI Project") | ||
| columns = log_stream.trace_columns | ||
|
|
||
| # Access a specific column | ||
| input_column = columns["input"] | ||
|
|
||
| # Filter using columns | ||
| traces = log_stream.get_traces( | ||
| filters=[columns["input"].contains("largest")], | ||
| sort=columns["created_at"].descending() | ||
| ) | ||
| """ | ||
| if self.id is None: | ||
| raise ValueError("Log stream ID is not set. Cannot get trace columns from a local-only log stream.") | ||
| if self.project_id is None: | ||
| raise ValueError("Project ID is not set. Cannot get trace columns without project_id.") | ||
|
|
||
| log_streams_service = LogStreams() | ||
| response = log_streams_service.get_trace_columns(project_id=self.project_id, log_stream_id=self.id) | ||
| columns = [Column(col) for col in response.columns] | ||
| return ColumnCollection(columns) |
There was a problem hiding this comment.
The span_columns, session_columns, and trace_columns property methods all share nearly identical logic, differing only in the LogStreams method called. Would it make sense to extract a private helper like _get_columns(column_type: str) to reduce duplication? For example:
def _get_columns(self, column_type: str) -> ColumnCollection:
if self.id is None:
raise ValueError(...)
if self.project_id is None:
raise ValueError(...)
log_streams_service = LogStreams()
if column_type == "span":
response = log_streams_service.get_span_columns(...)
elif column_type == "session":
response = log_streams_service.get_session_columns(...)
elif column_type == "trace":
response = log_streams_service.get_trace_columns(...)
columns = [Column(col) for col in response.columns]
return ColumnCollection(columns)Then each property could just call this helper with the appropriate type. This might help keep things DRY and easier to maintain.
Prompt for AI Agents:
In src/galileo/__future__/log_stream.py around lines 763-871, refactor the
`span_columns`, `session_columns`, and `trace_columns` properties to eliminate code
duplication. Create a private helper method `_get_columns()` that encapsulates the
common logic of validating log stream ID and project ID, calling the appropriate
LogStreams method, and converting columns. This will make the code more DRY (Don't
Repeat Yourself) by centralizing the column retrieval logic and making it easier to
maintain. Each property method should then simply call this helper with the appropriate
column type parameter. The refactoring should preserve the existing error handling, type
conversion, and overall functionality while significantly reducing code repetition.
Finding type: Code Dedup and Conventions
| # Pagination | ||
| if result.next_starting_token: | ||
| next_result = result.next_page() | ||
| for record in next_result: | ||
| print(record["id"]) |
There was a problem hiding this comment.
Here the docstring still shows next_result = result.next_page() and iterating over it, but next_page() was changed to mutate in place and return None, so this example now raises when copied. Please update the guidance to reflect that the same QueryResult instance should be used after pagination.
| # Pagination | |
| if result.next_starting_token: | |
| next_result = result.next_page() | |
| for record in next_result: | |
| print(record["id"]) | |
| # Pagination | |
| if result.next_starting_token: | |
| result.next_page() | |
| for record in result: | |
| print(record["id"]) |
Finding type: Logical Bugs
User description
This is a draft branch to share ideas about what the future of the SDK should look like.
This code can be locally installed using pip:
Generated description
Below is a concise technical summary of the changes proposed in this PR:
graph LR LogStream_span_columns_("LogStream.span_columns"):::added LogStreams_get_span_columns_("LogStreams.get_span_columns"):::added LogStream_session_columns_("LogStream.session_columns"):::added LogStreams_get_session_columns_("LogStreams.get_session_columns"):::added LogStream_trace_columns_("LogStream.trace_columns"):::added LogStreams_get_trace_columns_("LogStreams.get_trace_columns"):::added LogStream_query_("LogStream.query"):::modified QueryResult_("QueryResult"):::added LogStream_get_spans_("LogStream.get_spans"):::modified LogStream_get_traces_("LogStream.get_traces"):::modified LogStream_get_sessions_("LogStream.get_sessions"):::modified LogStream_span_columns_ -- "Added method to fetch span columns via LogStreams service" --> LogStreams_get_span_columns_ LogStream_session_columns_ -- "Added method to fetch session columns via LogStreams service" --> LogStreams_get_session_columns_ LogStream_trace_columns_ -- "Added method to fetch trace columns via LogStreams service" --> LogStreams_get_trace_columns_ LogStream_query_ -- "Modified to wrap results in QueryResult for pagination" --> QueryResult_ LogStream_query_ -- "Uses project and log stream IDs to query data" --> LogStreams_get_span_columns_ LogStream_get_spans_ -- "Now calls updated query returning paginated span results" --> LogStream_query_ LogStream_get_traces_ -- "Now calls updated query returning paginated trace results" --> LogStream_query_ LogStream_get_sessions_ -- "Now calls updated query returning paginated session results" --> LogStream_query_ classDef added stroke:#15AA7A classDef removed stroke:#CD5270 classDef modified stroke:#EDAC4C linkStyle default stroke:#CBD5E1,font-size:13pxIntroduce a new, enhanced query API for
LogStreamobjects within thegalileo.futureSDK, providing aQueryResultclass for paginated results andColumnandColumnCollectionclasses for type-safe filtering and sorting. Refactor common base classes and exceptions into a newsharedmodule to centralize utilities and improve code organization.LogStreamobjects, enabling type-safe filtering and sorting throughColumnandColumnCollectionclasses. This enhancement also introduces aQueryResultwrapper for paginated results, providing list-like access and automatic flattening of nested records, significantly improving the developer experience for data retrieval.Modified files (13)
Latest Contributors(2)
StateManagementMixin,SyncState) and exception classes (APIError,ValidationError, etc.) into a newgalileo.__future__.sharedmodule. This improves modularity, reduces redundancy, and centralizes shared utilities across various SDK components.Modified files (13)
Latest Contributors(2)