test

samikshya-db · samikshya-db · commit 876ed8891b9f · 2025-11-21T07:06:46.000Z
diff --git a/src/databricks/sql/telemetry/latency_logger.py b/src/databricks/sql/telemetry/latency_logger.py
@@ -1,6 +1,6 @@
 import time
 import functools
-from typing import Optional
+from typing import Optional, Dict, Any
 import logging
 from databricks.sql.telemetry.telemetry_client import TelemetryClientFactory
 from databricks.sql.telemetry.models.event import (
@@ -11,127 +11,141 @@
 logger = logging.getLogger(__name__)
 
 
-class TelemetryExtractor:
+def _extract_cursor_data(cursor) -> Dict[str, Any]:
     """
-    Base class for extracting telemetry information from various object types.
+    Extract telemetry data directly from a Cursor object.
 
-    This class serves as a proxy that delegates attribute access to the wrapped object
-    while providing a common interface for extracting telemetry-related data.
-    """
-
-    def __init__(self, obj):
-        self._obj = obj
-
-    def __getattr__(self, name):
-        return getattr(self._obj, name)
-
-    def get_session_id_hex(self):
-        pass
-
-    def get_statement_id(self):
-        pass
-
-    def get_is_compressed(self):
-        pass
-
-    def get_execution_result_format(self):
-        pass
-
-    def get_retry_count(self):
-        pass
-
-    def get_chunk_id(self):
-        pass
+    OPTIMIZATION: Uses direct attribute access instead of wrapper objects.
+    This eliminates object creation overhead and method call indirection.
 
+    Args:
+        cursor: The Cursor object to extract data from
 
-class CursorExtractor(TelemetryExtractor):
+    Returns:
+        Dict with telemetry data (values may be None if extraction fails)
     """
-    Telemetry extractor specialized for Cursor objects.
-
-    Extracts telemetry information from database cursor objects, including
-    statement IDs, session information, compression settings, and result formats.
+    data = {}
+
+    # Extract statement_id (query_id) - direct attribute access
+    try:
+        data['statement_id'] = cursor.query_id
+    except (AttributeError, Exception):
+        data['statement_id'] = None
+
+    # Extract session_id_hex - direct method call
+    try:
+        data['session_id_hex'] = cursor.connection.get_session_id_hex()
+    except (AttributeError, Exception):
+        data['session_id_hex'] = None
+
+    # Extract is_compressed - direct attribute access
+    try:
+        data['is_compressed'] = cursor.connection.lz4_compression
+    except (AttributeError, Exception):
+        data['is_compressed'] = False
+
+    # Extract execution_result_format - inline logic
+    try:
+        if cursor.active_result_set is None:
+            data['execution_result'] = ExecutionResultFormat.FORMAT_UNSPECIFIED
+        else:
+            from databricks.sql.utils import ColumnQueue, CloudFetchQueue, ArrowQueue
+
+            results = cursor.active_result_set.results
+            if isinstance(results, ColumnQueue):
+                data['execution_result'] = ExecutionResultFormat.COLUMNAR_INLINE
+            elif isinstance(results, CloudFetchQueue):
+                data['execution_result'] = ExecutionResultFormat.EXTERNAL_LINKS
+            elif isinstance(results, ArrowQueue):
+                data['execution_result'] = ExecutionResultFormat.INLINE_ARROW
+            else:
+                data['execution_result'] = ExecutionResultFormat.FORMAT_UNSPECIFIED
+    except (AttributeError, Exception):
+        data['execution_result'] = ExecutionResultFormat.FORMAT_UNSPECIFIED
+
+    # Extract retry_count - direct attribute access
+    try:
+        if hasattr(cursor.backend, "retry_policy") and cursor.backend.retry_policy:
+            data['retry_count'] = len(cursor.backend.retry_policy.history)
+        else:
+            data['retry_count'] = 0
+    except (AttributeError, Exception):
+        data['retry_count'] = 0
+
+    # chunk_id is always None for Cursor
+    data['chunk_id'] = None
+
+    return data
+
+
+def _extract_result_set_handler_data(handler) -> Dict[str, Any]:
     """
+    Extract telemetry data directly from a ResultSetDownloadHandler object.
 
-    def get_statement_id(self) -> Optional[str]:
-        return self.query_id
-
-    def get_session_id_hex(self) -> Optional[str]:
-        return self.connection.get_session_id_hex()
-
-    def get_is_compressed(self) -> bool:
-        return self.connection.lz4_compression
-
-    def get_execution_result_format(self) -> ExecutionResultFormat:
-        if self.active_result_set is None:
-            return ExecutionResultFormat.FORMAT_UNSPECIFIED
-
-        from databricks.sql.utils import ColumnQueue, CloudFetchQueue, ArrowQueue
-
-        if isinstance(self.active_result_set.results, ColumnQueue):
-            return ExecutionResultFormat.COLUMNAR_INLINE
-        elif isinstance(self.active_result_set.results, CloudFetchQueue):
-            return ExecutionResultFormat.EXTERNAL_LINKS
-        elif isinstance(self.active_result_set.results, ArrowQueue):
-            return ExecutionResultFormat.INLINE_ARROW
-        return ExecutionResultFormat.FORMAT_UNSPECIFIED
-
-    def get_retry_count(self) -> int:
-        if hasattr(self.backend, "retry_policy") and self.backend.retry_policy:
-            return len(self.backend.retry_policy.history)
-        return 0
-
-    def get_chunk_id(self):
-        return None
+    OPTIMIZATION: Uses direct attribute access instead of wrapper objects.
 
+    Args:
+        handler: The ResultSetDownloadHandler object to extract data from
 
-class ResultSetDownloadHandlerExtractor(TelemetryExtractor):
-    """
-    Telemetry extractor specialized for ResultSetDownloadHandler objects.
+    Returns:
+        Dict with telemetry data (values may be None if extraction fails)
     """
+    data = {}
 
-    def get_session_id_hex(self) -> Optional[str]:
-        return self._obj.session_id_hex
+    # Extract session_id_hex - direct attribute access
+    try:
+        data['session_id_hex'] = handler.session_id_hex
+    except (AttributeError, Exception):
+        data['session_id_hex'] = None
 
-    def get_statement_id(self) -> Optional[str]:
-        return self._obj.statement_id
+    # Extract statement_id - direct attribute access
+    try:
+        data['statement_id'] = handler.statement_id
+    except (AttributeError, Exception):
+        data['statement_id'] = None
 
-    def get_is_compressed(self) -> bool:
-        return self._obj.settings.is_lz4_compressed
+    # Extract is_compressed - direct attribute access
+    try:
+        data['is_compressed'] = handler.settings.is_lz4_compressed
+    except (AttributeError, Exception):
+        data['is_compressed'] = False
 
-    def get_execution_result_format(self) -> ExecutionResultFormat:
-        return ExecutionResultFormat.EXTERNAL_LINKS
+    # execution_result is always EXTERNAL_LINKS for result set handlers
+    data['execution_result'] = ExecutionResultFormat.EXTERNAL_LINKS
 
-    def get_retry_count(self) -> Optional[int]:
-        # standard requests and urllib3 libraries don't expose retry count
-        return None
+    # retry_count is not available for result set handlers
+    data['retry_count'] = None
+
+    # Extract chunk_id - direct attribute access
+    try:
+        data['chunk_id'] = handler.chunk_id
+    except (AttributeError, Exception):
+        data['chunk_id'] = None
 
-    def get_chunk_id(self) -> Optional[int]:
-        return self._obj.chunk_id
+    return data
 
 
-def get_extractor(obj):
+def _extract_telemetry_data(obj) -> Optional[Dict[str, Any]]:
     """
-    Factory function to create the appropriate telemetry extractor for an object.
+    Extract telemetry data from an object based on its type.
 
-    Determines the object type and returns the corresponding specialized extractor
-    that can extract telemetry information from that object type.
+    OPTIMIZATION: Returns a simple dict instead of creating wrapper objects.
+    This dict will be used to create the SqlExecutionEvent in the background thread.
 
     Args:
-        obj: The object to create an extractor for. Can be a Cursor,
-             ResultSetDownloadHandler, or any other object.
+        obj: The object to extract data from (Cursor, ResultSetDownloadHandler, etc.)
 
     Returns:
-        TelemetryExtractor: A specialized extractor instance:
-            - CursorExtractor for Cursor objects
-            - ResultSetDownloadHandlerExtractor for ResultSetDownloadHandler objects
-            - None for all other objects
+        Dict with telemetry data, or None if object type is not supported
     """
-    if obj.__class__.__name__ == "Cursor":
-        return CursorExtractor(obj)
-    elif obj.__class__.__name__ == "ResultSetDownloadHandler":
-        return ResultSetDownloadHandlerExtractor(obj)
+    obj_type = obj.__class__.__name__
+
+    if obj_type == "Cursor":
+        return _extract_cursor_data(obj)
+    elif obj_type == "ResultSetDownloadHandler":
+        return _extract_result_set_handler_data(obj)
     else:
-        logger.debug("No extractor found for %s", obj.__class__.__name__)
+        logger.debug("No telemetry extraction available for %s", obj_type)
         return None
 
 
@@ -143,11 +157,10 @@ def log_latency(statement_type: StatementType = StatementType.NONE):
     data about the operation, including latency, statement information, and
     execution context.
 
-    The decorator automatically:
-    - Measures execution time using high-precision performance counters
-    - Extracts telemetry information from the method's object (self)
-    - Creates a SqlExecutionEvent with execution details
-    - Sends the telemetry data asynchronously via TelemetryClient
+    OPTIMIZATIONS APPLIED:
+    - Uses time.monotonic() instead of time.perf_counter() for faster timing
+    - Direct attribute access instead of wrapper extractor objects
+    - Dict-based data collection to minimize object creation overhead
 
     Args:
         statement_type (StatementType): The type of SQL statement being executed.
@@ -162,46 +175,41 @@ def execute(self, query):
         function: A decorator that wraps methods to add latency logging.
 
     Note:
-        The wrapped method's object (self) must be compatible with the
-        telemetry extractor system (e.g., Cursor or ResultSet objects).
+        The wrapped method's object (self) must be a Cursor or
+        ResultSetDownloadHandler for telemetry data extraction.
     """
 
     def decorator(func):
         @functools.wraps(func)
         def wrapper(self, *args, **kwargs):
-            start_time = time.perf_counter()
+            # Use monotonic clock for faster timing, sufficient for telemetry
+            start_time = time.monotonic()
             result = None
             try:
                 result = func(self, *args, **kwargs)
                 return result
             finally:
-
-                def _safe_call(func_to_call):
-                    """Calls a function and returns a default value on any exception."""
-                    try:
-                        return func_to_call()
-                    except Exception:
-                        return None
-
-                end_time = time.perf_counter()
+                # Calculate duration once
+                end_time = time.monotonic()
                 duration_ms = int((end_time - start_time) * 1000)
 
-                extractor = get_extractor(self)
+                # Extract telemetry data directly without creating extractor objects
+                telemetry_data = _extract_telemetry_data(self)
 
-                if extractor is not None:
-                    session_id_hex = _safe_call(extractor.get_session_id_hex)
-                    statement_id = _safe_call(extractor.get_statement_id)
+                if telemetry_data is not None:
+                    session_id_hex = telemetry_data.get('session_id_hex')
+                    statement_id = telemetry_data.get('statement_id')
 
+                    # Create event from extracted data
                     sql_exec_event = SqlExecutionEvent(
                         statement_type=statement_type,
-                        is_compressed=_safe_call(extractor.get_is_compressed),
-                        execution_result=_safe_call(
-                            extractor.get_execution_result_format
-                        ),
-                        retry_count=_safe_call(extractor.get_retry_count),
-                        chunk_id=_safe_call(extractor.get_chunk_id),
+                        is_compressed=telemetry_data.get('is_compressed'),
+                        execution_result=telemetry_data.get('execution_result'),
+                        retry_count=telemetry_data.get('retry_count'),
+                        chunk_id=telemetry_data.get('chunk_id'),
                     )
 
+                    # Send telemetry asynchronously
                     telemetry_client = TelemetryClientFactory.get_telemetry_client(
                         session_id_hex
                     )
diff --git a/src/databricks/sql/telemetry/telemetry_client.py b/src/databricks/sql/telemetry/telemetry_client.py
@@ -2,6 +2,7 @@
 import time
 import logging
 import json
+from queue import Queue, Full
 from concurrent.futures import ThreadPoolExecutor
 from concurrent.futures import Future
 from datetime import datetime, timezone
@@ -180,8 +181,11 @@ def __init__(
         self._session_id_hex = session_id_hex
         self._auth_provider = auth_provider
         self._user_agent = None
-        self._events_batch = []
-        self._lock = threading.RLock()
+
+        # OPTIMIZATION: Use lock-free Queue instead of list + lock
+        # Queue is thread-safe internally and has better performance under concurrency
+        self._events_queue = Queue(maxsize=batch_size * 2)  # Allow some buffering
+
         self._driver_connection_params = None
         self._host_url = host_url
         self._executor = executor
@@ -192,19 +196,41 @@ def __init__(
     def _export_event(self, event):
         """Add an event to the batch queue and flush if batch is full"""
         logger.debug("Exporting event for connection %s", self._session_id_hex)
-        with self._lock:
-            self._events_batch.append(event)
-        if len(self._events_batch) >= self._batch_size:
+
+        # OPTIMIZATION: Use non-blocking put with queue
+        # No explicit lock needed - Queue is thread-safe internally
+        try:
+            self._events_queue.put_nowait(event)
+        except Full:
+            # Queue is full, trigger immediate flush
+            logger.debug("Event queue full, triggering flush")
+            self._flush()
+            # Try again after flush
+            try:
+                self._events_queue.put_nowait(event)
+            except Full:
+                # Still full, drop event (acceptable for telemetry)
+                logger.debug("Dropped telemetry event - queue still full")
+
+        # Check if we should flush based on queue size
+        if self._events_queue.qsize() >= self._batch_size:
             logger.debug(
                 "Batch size limit reached (%s), flushing events", self._batch_size
             )
             self._flush()
 
     def _flush(self):
         """Flush the current batch of events to the server"""
-        with self._lock:
-            events_to_flush = self._events_batch.copy()
-            self._events_batch = []
+        # OPTIMIZATION: Drain queue without locks
+        # Collect all events currently in the queue
+        events_to_flush = []
+        while not self._events_queue.empty():
+            try:
+                event = self._events_queue.get_nowait()
+                events_to_flush.append(event)
+            except:
+                # Queue is empty
+                break
 
         if events_to_flush:
             logger.debug("Flushing %s telemetry events to server", len(events_to_flush))