Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .idea/vcs.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 20 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,26 @@ All notable changes to this project will be documented in this file. Dates are d

Generated by [`auto-changelog`](https://github.com/CookPete/auto-changelog).

#### [1.5.0](https://me.github.com/ivdatahub/api-to-dataframe/compare/1.4.0...1.5.0)

> 10 March 2025

- feature: Comprehensive OpenTelemetry integration with otel-wrapper:
- Added detailed tracing for all operations (HTTP requests, dataframe conversions, retries)
- Added metrics for performance monitoring and error tracking
- Added structured logging with contextual information
- Improved error handling with detailed error context
- Complete observability across all components using otel-wrapper

#### [1.4.0](https://me.github.com/ivdatahub/api-to-dataframe/compare/1.3.11...1.4.0)

> 10 March 2025

- feature: add otel-wrapper
- feature: update logging implementation and version bump
- chore: deps
- chore: bump version

#### [1.3.11](https://me.github.com/ivdatahub/api-to-dataframe/compare/1.3.10...1.3.11)

> 24 September 2024
Expand Down
23 changes: 23 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# Root conftest.py
# This file ensures that pytest only collects tests from our tests directory
# and ignores any tests in temporary directories or installed packages

import os
import sys

# Add the 'src' directory to the path so imports work correctly
# This ensures that the in-development code is used, not the installed version
sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), "src")))

def pytest_ignore_collect(collection_path, config):
"""
Configure pytest to ignore certain paths when collecting tests.

Returns:
bool: True if the path should be ignored, False otherwise.
"""
# Skip the temp directory
if "temp/" in str(collection_path):
return True

return False
14 changes: 7 additions & 7 deletions poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

26 changes: 24 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "api-to-dataframe"
version = "1.4.0"
version = "1.5.0"
description = "A package to convert API responses to pandas dataframe"
authors = ["IvanildoBarauna <[email protected]>"]
readme = "README.md"
Expand All @@ -27,7 +27,7 @@ python = "^3.9"
pandas = "^2.2.3"
requests = "^2.32.3"
logging = "^0.4.9.6"
otel-wrapper = "^0.0.1"
otel-wrapper = "^0.1.0"

[tool.poetry.group.dev.dependencies]
poetry-dynamic-versioning = "^1.3.0"
Expand Down Expand Up @@ -58,3 +58,25 @@ disable = [
"C0115", # missing-class-docstring
"R0903", # too-few-public-methods
]

[tool.pytest.ini_options]
testpaths = ["tests"]
python_files = "test_*.py"
python_classes = ["Test*"]
python_functions = ["test_*"]

[tool.coverage.run]
source = ["src/api_to_dataframe"]
omit = [
"tests/*",
"temp/*",
"*/__init__.py",
]

[tool.coverage.report]
exclude_lines = [
"pragma: no cover",
"def __repr__",
"raise NotImplementedError",
"if __name__ == .__main__.:",
]
182 changes: 165 additions & 17 deletions src/api_to_dataframe/controller/client_builder.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from api_to_dataframe.models.retainer import retry_strategies, Strategies
from api_to_dataframe.models.get_data import GetData
from api_to_dataframe.utils.logger import logger
from otel_wrapper import OpenObservability
from api_to_dataframe.utils.logger import logger, telemetry
import time


class ClientBuilder:
Expand Down Expand Up @@ -35,16 +35,40 @@ def __init__( # pylint: disable=too-many-positional-arguments,too-many-argument
if headers is None:
headers = {}
if endpoint == "":
logger.error("endpoint cannot be an empty string")
error_msg = "endpoint cannot be an empty string"
logger.error(error_msg)
telemetry.logs().new_log(
msg=error_msg,
tags={"component": "ClientBuilder", "method": "__init__"},
level=40 # ERROR level
)
raise ValueError
if not isinstance(retries, int) or retries < 0:
logger.error("retries must be a non-negative integer")
error_msg = "retries must be a non-negative integer"
logger.error(error_msg)
telemetry.logs().new_log(
msg=error_msg,
tags={"component": "ClientBuilder", "method": "__init__"},
level=40 # ERROR level
)
raise ValueError
if not isinstance(initial_delay, int) or initial_delay < 0:
logger.error("initial_delay must be a non-negative integer")
error_msg = "initial_delay must be a non-negative integer"
logger.error(error_msg)
telemetry.logs().new_log(
msg=error_msg,
tags={"component": "ClientBuilder", "method": "__init__"},
level=40 # ERROR level
)
raise ValueError
if not isinstance(connection_timeout, int) or connection_timeout < 0:
logger.error("connection_timeout must be a non-negative integer")
error_msg = "connection_timeout must be a non-negative integer"
logger.error(error_msg)
telemetry.logs().new_log(
msg=error_msg,
tags={"component": "ClientBuilder", "method": "__init__"},
level=40 # ERROR level
)
raise ValueError

self.endpoint = endpoint
Expand All @@ -53,9 +77,28 @@ def __init__( # pylint: disable=too-many-positional-arguments,too-many-argument
self.headers = headers
self.retries = retries
self.delay = initial_delay
self._o11y_wrapper = OpenObservability(application_name="api-to-dataframe").get_wrapper()
self._traces = self._o11y_wrapper.traces()
self._tracer = self._traces.get_tracer()

# Record client initialization metric
telemetry.metrics().metric_increment(
name="client.initialization",
tags={
"endpoint": endpoint,
"retry_strategy": retry_strategy.name,
"connection_timeout": str(connection_timeout)
}
)

# Log initialization
telemetry.logs().new_log(
msg=f"ClientBuilder initialized with endpoint {endpoint}",
tags={
"endpoint": endpoint,
"retry_strategy": retry_strategy.name,
"connection_timeout": str(connection_timeout),
"component": "ClientBuilder"
},
level=20 # INFO level
)

@retry_strategies
def get_api_data(self):
Expand All @@ -69,16 +112,62 @@ def get_api_data(self):
Returns:
dict: The JSON response from the API as a dictionary.
"""

with self._tracer.start_as_current_span("get_last_quote") as span:
# Use the telemetry spans with context manager
with telemetry.traces().span_in_context("get_api_data") as (span, _):
# Add span attributes
span.set_attribute("endpoint", self.endpoint)

span.set_attribute("retry_strategy", self.retry_strategy.name)
span.set_attribute("connection_timeout", self.connection_timeout)

# Log the API request
telemetry.logs().new_log(
msg=f"Making API request to {self.endpoint}",
tags={
"endpoint": self.endpoint,
"component": "ClientBuilder",
"method": "get_api_data"
},
level=20 # INFO level
)

# Record the start time for response time measurement
start_time = time.time()

# Make the API request
response = GetData.get_response(
endpoint=self.endpoint,
headers=self.headers,
connection_timeout=self.connection_timeout,
)


# Calculate response time
response_time = time.time() - start_time

# Record response time as histogram
telemetry.metrics().record_histogram(
name="api.response_time",
tags={"endpoint": self.endpoint},
value=response_time
)

# Record successful request metric
telemetry.metrics().metric_increment(
name="api.request.success",
tags={"endpoint": self.endpoint}
)

# Log success
telemetry.logs().new_log(
msg=f"API request to {self.endpoint} successful",
tags={
"endpoint": self.endpoint,
"response_status": response.status_code,
"response_time": response_time,
"component": "ClientBuilder",
"method": "get_api_data"
},
level=20 # INFO level
)

return response.json()

Expand All @@ -97,7 +186,66 @@ def api_to_dataframe(response: dict):
Returns:
DataFrame: A pandas DataFrame containing the data from the API response.
"""

df = GetData.to_dataframe(response)

return df
# Use telemetry for this operation
with telemetry.traces().span_in_context("api_to_dataframe") as (span, _):
response_size = len(response) if isinstance(response, list) else 1
span.set_attribute("response_size", response_size)

# Log conversion start
telemetry.logs().new_log(
msg="Converting API response to DataFrame",
tags={
"response_size": response_size,
"response_type": type(response).__name__,
"component": "ClientBuilder",
"method": "api_to_dataframe"
},
level=20 # INFO level
)

try:
# Convert to dataframe
df = GetData.to_dataframe(response)

# Record metrics
telemetry.metrics().metric_increment(
name="dataframe.conversion.success",
tags={"size": len(df)}
)

# Log success
telemetry.logs().new_log(
msg="Successfully converted API response to DataFrame",
tags={
"dataframe_rows": len(df),
"dataframe_columns": len(df.columns),
"component": "ClientBuilder",
"method": "api_to_dataframe"
},
level=20 # INFO level
)

return df

except Exception as e:
# Record failure metric
telemetry.metrics().metric_increment(
name="dataframe.conversion.failure",
tags={"error_type": type(e).__name__}
)

# Log error
error_msg = f"Failed to convert API response to DataFrame: {str(e)}"
telemetry.logs().new_log(
msg=error_msg,
tags={
"error": str(e),
"error_type": type(e).__name__,
"component": "ClientBuilder",
"method": "api_to_dataframe"
},
level=40 # ERROR level
)

# Re-raise the exception
raise
Loading