airbytehq
diff --git a/‎airbyte_cdk/connector_builder/test_reader/reader.py‎
Lines changed: 6 additions & 2 deletions b/‎airbyte_cdk/connector_builder/test_reader/reader.py‎
Lines changed: 6 additions & 2 deletions
diff --git a/‎airbyte_cdk/entrypoint.py‎
Lines changed: 2 additions & 2 deletions b/‎airbyte_cdk/entrypoint.py‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎airbyte_cdk/legacy/sources/declarative/manifest_declarative_source.py‎
Lines changed: 8 additions & 2 deletions b/‎airbyte_cdk/legacy/sources/declarative/manifest_declarative_source.py‎
Lines changed: 8 additions & 2 deletions
diff --git a/‎airbyte_cdk/logger.py‎
Lines changed: 21 additions & 3 deletions b/‎airbyte_cdk/logger.py‎
Lines changed: 21 additions & 3 deletions
diff --git a/‎airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py‎
Lines changed: 1 addition & 1 deletion b/‎airbyte_cdk/sources/concurrent_source/concurrent_read_processor.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎airbyte_cdk/sources/concurrent_source/concurrent_source.py‎
Lines changed: 1 addition & 0 deletions b/‎airbyte_cdk/sources/concurrent_source/concurrent_source.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎airbyte_cdk/sources/declarative/concurrent_declarative_source.py‎
Lines changed: 10 additions & 10 deletions b/‎airbyte_cdk/sources/declarative/concurrent_declarative_source.py‎
Lines changed: 10 additions & 10 deletions
@@ -122,10 +122,14 @@ def run_test_read(
         deprecation_warnings: List[LogMessage] = source.deprecation_warnings()
 
         schema_inferrer = SchemaInferrer(
-            self._pk_to_nested_and_composite_field(stream.primary_key) if stream else None,
-            self._cursor_field_to_nested_and_composite_field(stream.cursor_field)
+            self._pk_to_nested_and_composite_field(
+                stream.primary_key if hasattr(stream, "primary_key") else stream._primary_key  # type: ignore  # We are accessing the private property here as the primary key is not exposed. We should either expose it or use `as_airbyte_stream` to retrieve it as this is the "official" way where it is exposed in the Airbyte protocol
+            )
             if stream
             else None,
+            self._cursor_field_to_nested_and_composite_field(stream.cursor_field)
+            if stream and stream.cursor_field
+            else None,
         )
         datetime_format_inferrer = DatetimeFormatInferrer()
 
 
@@ -22,7 +22,7 @@
 
 from airbyte_cdk.connector import TConfig
 from airbyte_cdk.exception_handler import init_uncaught_exception_handler
-from airbyte_cdk.logger import PRINT_BUFFER, init_logger
+from airbyte_cdk.logger import PRINT_BUFFER, init_logger, is_platform_debug_log_enabled
 from airbyte_cdk.models import (
     AirbyteConnectionStatus,
     AirbyteMessage,
@@ -158,7 +158,7 @@ def run(self, parsed_args: argparse.Namespace) -> Iterable[str]:
         if not cmd:
             raise Exception("No command passed")
 
-        if hasattr(parsed_args, "debug") and parsed_args.debug:
+        if (hasattr(parsed_args, "debug") and parsed_args.debug) or is_platform_debug_log_enabled():
             self.logger.setLevel(logging.DEBUG)
             logger.setLevel(logging.DEBUG)
             self.logger.debug("Debug logs enabled")
 
@@ -8,7 +8,7 @@
 from copy import deepcopy
 from importlib import metadata
 from types import ModuleType
-from typing import Any, Dict, Iterator, List, Mapping, Optional, Set
+from typing import Any, Dict, Iterator, List, Mapping, Optional, Set, Union
 
 import orjson
 import yaml
@@ -66,6 +66,7 @@
 from airbyte_cdk.sources.declarative.resolvers import COMPONENTS_RESOLVER_TYPE_MAPPING
 from airbyte_cdk.sources.declarative.spec.spec import Spec
 from airbyte_cdk.sources.message import MessageRepository
+from airbyte_cdk.sources.streams.concurrent.abstract_stream import AbstractStream
 from airbyte_cdk.sources.streams.core import Stream
 from airbyte_cdk.sources.types import Config, ConnectionDefinition
 from airbyte_cdk.sources.utils.slice_logger import (
@@ -297,7 +298,12 @@ def connection_checker(self) -> ConnectionChecker:
                 f"Expected to generate a ConnectionChecker component, but received {check_stream.__class__}"
             )
 
-    def streams(self, config: Mapping[str, Any]) -> List[Stream]:
+    def streams(self, config: Mapping[str, Any]) -> List[Union[Stream, AbstractStream]]:  # type: ignore  # we are migrating away from the AbstractSource and are expecting that this will only be called by ConcurrentDeclarativeSource or the Connector Builder
+        """
+        As a migration step, this method will return both legacy stream (Stream) and concurrent stream (AbstractStream).
+        Once the migration is done, we can probably have this method throw "not implemented" as we figure out how to
+        fully decouple this from the AbstractSource.
+        """
         if self._spec_component:
             self._spec_component.validate_config(config)
 
 
@@ -1,10 +1,10 @@
 #
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
-
 import json
 import logging
 import logging.config
+import os
 from typing import Any, Callable, Mapping, Optional, Tuple
 
 import orjson
@@ -40,6 +40,10 @@
 }
 
 
+def is_platform_debug_log_enabled() -> bool:
+    return os.environ.get("LOG_LEVEL", "info").lower() == "debug"
+
+
 def init_logger(name: Optional[str] = None) -> logging.Logger:
     """Initial set up of logger"""
     logger = logging.getLogger(name)
@@ -73,8 +77,22 @@ def format(self, record: logging.LogRecord) -> str:
         airbyte_level = self.level_mapping.get(record.levelno, "INFO")
         if airbyte_level == Level.DEBUG:
             extras = self.extract_extra_args_from_record(record)
-            debug_dict = {"type": "DEBUG", "message": record.getMessage(), "data": extras}
-            return filter_secrets(json.dumps(debug_dict))
+            if is_platform_debug_log_enabled():
+                # We have a different behavior between debug logs enabled through `--debug` argument and debug logs
+                # enabled through environment variable. The reason is that for platform logs, we need to have these
+                # printed as AirbyteMessage which is not the case with the current previous implementation.
+                # Why not migrate both to AirbyteMessages then? AirbyteMessages do not support having structured logs.
+                # which means that the DX would be degraded compared to the current solution (devs will need to identify
+                # the `log.message` field and figure out where in this field is the response while the current solution
+                # have a specific field that is structured for extras.
+                message = f"{filter_secrets(record.getMessage())} ///\nExtra logs: {filter_secrets(json.dumps(extras))}"
+                log_message = AirbyteMessage(
+                    type=Type.LOG, log=AirbyteLogMessage(level=airbyte_level, message=message)
+                )
+                return orjson.dumps(AirbyteMessageSerializer.dump(log_message)).decode()
+            else:
+                debug_dict = {"type": "DEBUG", "message": record.getMessage(), "data": extras}
+                return filter_secrets(json.dumps(debug_dict))
         else:
             message = super().format(record)
             message = filter_secrets(message)
 
@@ -2,6 +2,7 @@
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
 import logging
+import os
 from typing import Dict, Iterable, List, Optional, Set
 
 from airbyte_cdk.exception_handler import generate_failed_streams_error_message
@@ -153,7 +154,6 @@ def on_record(self, record: Record) -> Iterable[AirbyteMessage]:
                     stream.as_airbyte_stream(), AirbyteStreamStatus.RUNNING
                 )
             self._record_counter[stream.name] += 1
-            stream.cursor.observe(record)
         yield message
         yield from self._message_repository.consume_queue()
 
 
@@ -1,6 +1,7 @@
 #
 # Copyright (c) 2023 Airbyte, Inc., all rights reserved.
 #
+
 import concurrent
 import logging
 from queue import Queue
 
@@ -19,6 +19,7 @@
     Optional,
     Set,
     Tuple,
+    Union,
 )
 
 import orjson
@@ -51,10 +52,6 @@
 from airbyte_cdk.sources.declarative.checks.connection_checker import ConnectionChecker
 from airbyte_cdk.sources.declarative.concurrency_level import ConcurrencyLevel
 from airbyte_cdk.sources.declarative.declarative_stream import DeclarativeStream
-from airbyte_cdk.sources.declarative.extractors import RecordSelector
-from airbyte_cdk.sources.declarative.extractors.record_filter import (
-    ClientSideIncrementalRecordFilterDecorator,
-)
 from airbyte_cdk.sources.declarative.incremental import (
     ConcurrentPerPartitionCursor,
     GlobalSubstreamCursor,
@@ -205,7 +202,6 @@ def __init__(
         # incremental streams running in full refresh.
         component_factory = ModelToComponentFactory(
             emit_connector_builder_messages=emit_connector_builder_messages,
-            disable_resumable_full_refresh=True,
             message_repository=ConcurrentMessageRepository(queue, message_repository),
             connector_state_manager=self._connector_state_manager,
             max_concurrent_async_job_count=source_config.get("max_concurrent_async_job_count"),
@@ -459,7 +455,7 @@ def discover(self, logger: logging.Logger, config: Mapping[str, Any]) -> Airbyte
             ]
         )
 
-    def streams(self, config: Mapping[str, Any]) -> List[Stream]:
+    def streams(self, config: Mapping[str, Any]) -> List[Union[Stream, AbstractStream]]:  # type: ignore  # we are migrating away from the AbstractSource and are expecting that this will only be called by ConcurrentDeclarativeSource or the Connector Builder
         """
         The `streams` method is used as part of the AbstractSource in the following cases:
         * ConcurrentDeclarativeSource.check -> ManifestDeclarativeSource.check -> AbstractSource.check -> DeclarativeSource.check_connection -> CheckStream.check_connection -> streams
@@ -622,6 +618,10 @@ def _group_streams(
             # these legacy Python streams the way we do low-code streams to determine if they are concurrent compatible,
             # so we need to treat them as synchronous
 
+            if isinstance(declarative_stream, AbstractStream):
+                concurrent_streams.append(declarative_stream)
+                continue
+
             supports_file_transfer = (
                 isinstance(declarative_stream, DeclarativeStream)
                 and "file_uploader" in name_to_stream_mapping[declarative_stream.name]
@@ -691,7 +691,7 @@ def _group_streams(
                         partition_generator = StreamSlicerPartitionGenerator(
                             partition_factory=DeclarativePartitionFactory(
                                 stream_name=declarative_stream.name,
-                                json_schema=declarative_stream.get_json_schema(),
+                                schema_loader=declarative_stream._schema_loader,  # type: ignore  # We are accessing the private property but the public one is optional and we will remove this code soonish
                                 retriever=retriever,
                                 message_repository=self._message_repository,
                                 max_records_limit=self._limits.max_records
@@ -728,7 +728,7 @@ def _group_streams(
                         partition_generator = StreamSlicerPartitionGenerator(
                             partition_factory=DeclarativePartitionFactory(
                                 stream_name=declarative_stream.name,
-                                json_schema=declarative_stream.get_json_schema(),
+                                schema_loader=declarative_stream._schema_loader,  # type: ignore  # We are accessing the private property but the public one is optional and we will remove this code soonish
                                 retriever=retriever,
                                 message_repository=self._message_repository,
                                 max_records_limit=self._limits.max_records
@@ -762,7 +762,7 @@ def _group_streams(
                     partition_generator = StreamSlicerPartitionGenerator(
                         DeclarativePartitionFactory(
                             stream_name=declarative_stream.name,
-                            json_schema=declarative_stream.get_json_schema(),
+                            schema_loader=declarative_stream._schema_loader,  # type: ignore  # We are accessing the private property but the public one is optional and we will remove this code soonish
                             retriever=declarative_stream.retriever,
                             message_repository=self._message_repository,
                             max_records_limit=self._limits.max_records if self._limits else None,
@@ -826,7 +826,7 @@ def _group_streams(
                     partition_generator = StreamSlicerPartitionGenerator(
                         DeclarativePartitionFactory(
                             stream_name=declarative_stream.name,
-                            json_schema=declarative_stream.get_json_schema(),
+                            schema_loader=declarative_stream._schema_loader,  # type: ignore  # We are accessing the private property but the public one is optional and we will remove this code soonish
                             retriever=retriever,
                             message_repository=self._message_repository,
                             max_records_limit=self._limits.max_records if self._limits else None,
Original file line number	Diff line number	Diff line change
`@@ -2,6 +2,7 @@`
`2`	`2`	`# Copyright (c) 2023 Airbyte, Inc., all rights reserved.`
`3`	`3`	`#`
`4`	`4`	`import logging`
	`5`	`+import os`
`5`	`6`	`from typing import Dict, Iterable, List, Optional, Set`
`6`	`7`
`7`	`8`	`from airbyte_cdk.exception_handler import generate_failed_streams_error_message`
`@@ -153,7 +154,6 @@ def on_record(self, record: Record) -> Iterable[AirbyteMessage]:`
`153`	`154`	`stream.as_airbyte_stream(), AirbyteStreamStatus.RUNNING`
`154`	`155`	`)`
`155`	`156`	`self._record_counter[stream.name] += 1`
`156`		`- stream.cursor.observe(record)`
`157`	`157`	`yield message`
`158`	`158`	`yield from self._message_repository.consume_queue()`
`159`	`159`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,7 @@`
`1`	`1`	`#`
`2`	`2`	`# Copyright (c) 2023 Airbyte, Inc., all rights reserved.`
`3`	`3`	`#`
	`4`	`+`
`4`	`5`	`import concurrent`
`5`	`6`	`import logging`
`6`	`7`	`from queue import Queue`