Skip to content

Commit 5824c53

Browse files
pawel-kmiecikCopilotpawel-chmielak-deepsenseChmielok
authored
o11y: Improve connectors logging (#557)
# Enhance logging in connectors This PR is enhancing the logging in connectors by introducing the LoggingMixin class that handles logging for standard connectors' operations sanitizes restricted fields like filenames (optional) --------- Co-authored-by: Copilot <[email protected]> Co-authored-by: Paweł Chmielak <[email protected]> Co-authored-by: Paweł Chmielak <[email protected]>
1 parent 64e30d5 commit 5824c53

File tree

12 files changed

+670
-65
lines changed

12 files changed

+670
-65
lines changed

CHANGELOG.md

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,19 @@
1+
## 1.0.58
2+
3+
* **o11y: Improved logging in connectors' operations with LoggingMixin class**
4+
15
## 1.0.57
26

3-
- **test: Longer interval for pinecone integration tests**
7+
* **test: Longer interval for pinecone integration tests**
48

59
## 1.0.56
610

7-
- **Fix: set correct display_name in HtmlMixin produced FileData**
11+
* **Fix: set correct display_name in HtmlMixin produced FileData**
812

913
## 1.0.55
1014

1115
* **Fix: add precheck method to SharePoint connector**
1216

13-
1417
## 1.0.54
1518

1619
* **Fix bump Togetherai dependency**

test/integration/connectors/test_sharepoint.py

Lines changed: 33 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -333,62 +333,66 @@ def base_sharepoint_config():
333333
"""Base SharePoint config for testing."""
334334
config = sharepoint_config()
335335
return {
336-
'client_id': config.client_id,
337-
'tenant': config.tenant,
338-
'site': "https://unstructuredio.sharepoint.com/sites/utic-platform-test-source",
339-
'client_cred': config.client_cred
336+
"client_id": config.client_id,
337+
"tenant": config.tenant,
338+
"site": "https://unstructuredio.sharepoint.com/sites/utic-platform-test-source",
339+
"client_cred": config.client_cred,
340340
}
341341

342342

343343
@pytest.fixture
344344
def indexer_factory(base_sharepoint_config):
345345
"""Factory for creating SharePoint indexers with different configs."""
346+
346347
def _create_indexer(client_cred=None, site=None, path=None):
347348
access_config = SharepointAccessConfig(
348-
client_cred=client_cred or base_sharepoint_config['client_cred']
349+
client_cred=client_cred or base_sharepoint_config["client_cred"]
349350
)
350351
connection_config = SharepointConnectionConfig(
351-
client_id=base_sharepoint_config['client_id'],
352-
site=site or base_sharepoint_config['site'],
353-
tenant=base_sharepoint_config['tenant'],
352+
client_id=base_sharepoint_config["client_id"],
353+
site=site or base_sharepoint_config["site"],
354+
tenant=base_sharepoint_config["tenant"],
354355
access_config=access_config,
355356
)
356357
index_config = SharepointIndexerConfig(path=path or "")
357358
return SharepointIndexer(
358359
connection_config=connection_config,
359360
index_config=index_config,
360361
)
362+
361363
return _create_indexer
362364

363365

364366
@pytest.mark.asyncio
365367
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
366368
@requires_env("SHAREPOINT_CLIENT_ID", "SHAREPOINT_CRED", "MS_TENANT_ID", "MS_USER_PNAME")
367-
@pytest.mark.parametrize("error_scenario,expected_error", [
368-
("invalid_creds", "UserAuthError"),
369-
("nonexistent_site", "UserError"),
370-
("invalid_path", "UserError"),
371-
])
369+
@pytest.mark.parametrize(
370+
"error_scenario,expected_error",
371+
[
372+
("invalid_creds", "UserAuthError"),
373+
("nonexistent_site", "UserError"),
374+
("invalid_path", "UserError"),
375+
],
376+
)
372377
async def test_sharepoint_precheck_error_scenarios(indexer_factory, error_scenario, expected_error):
373378
"""Parametrized test for different SharePoint precheck error scenarios."""
374379
from unstructured_ingest.errors_v2 import UserAuthError, UserError
375-
376-
error_class_map = {
377-
"UserAuthError": UserAuthError,
378-
"UserError": UserError
379-
}
380-
380+
381+
error_class_map = {"UserAuthError": UserAuthError, "UserError": UserError}
382+
381383
expected_exception = error_class_map[expected_error]
382-
384+
383385
if error_scenario == "invalid_creds":
384386
indexer = indexer_factory(client_cred="invalid_creds")
385387
elif error_scenario == "nonexistent_site":
386-
indexer = indexer_factory(site="https://unstructuredai.sharepoint.com/sites/definitely-does-not-exist-12345")
388+
indexer = indexer_factory(
389+
site="https://unstructuredai.sharepoint.com/sites/definitely-does-not-exist-12345"
390+
)
387391
elif error_scenario == "invalid_path":
388392
indexer = indexer_factory(path="NonExistentFolder/SubFolder/DoesNotExist")
389393
else:
390394
pytest.fail(f"Unknown error scenario: {error_scenario}")
391-
395+
392396
with pytest.raises(expected_exception):
393397
indexer.precheck()
394398

@@ -397,21 +401,21 @@ async def test_sharepoint_precheck_error_scenarios(indexer_factory, error_scenar
397401
def insufficient_perms_config():
398402
"""Config for testing insufficient permissions."""
399403
from types import SimpleNamespace
400-
404+
401405
required_vars = [
402406
"SHAREPOINT_CLIENT_ID_INSUFFICIENT",
403-
"SHAREPOINT_CRED_INSUFFICIENT",
404-
"MS_TENANT_ID_INSUFFICIENT"
407+
"SHAREPOINT_CRED_INSUFFICIENT",
408+
"MS_TENANT_ID_INSUFFICIENT",
405409
]
406-
410+
407411
missing_vars = [var for var in required_vars if var not in os.environ]
408412
if missing_vars:
409413
pytest.skip(f"Missing environment variables: {missing_vars}")
410-
414+
411415
return SimpleNamespace(
412416
client_id=os.environ["SHAREPOINT_CLIENT_ID_INSUFFICIENT"],
413417
client_cred=os.environ["SHAREPOINT_CRED_INSUFFICIENT"],
414-
tenant=os.environ["MS_TENANT_ID_INSUFFICIENT"]
418+
tenant=os.environ["MS_TENANT_ID_INSUFFICIENT"],
415419
)
416420

417421

@@ -428,7 +432,7 @@ async def test_sharepoint_precheck_insufficient_permissions(
428432
access_config = SharepointAccessConfig(client_cred=insufficient_perms_config.client_cred)
429433
connection_config = SharepointConnectionConfig(
430434
client_id=insufficient_perms_config.client_id,
431-
site=base_sharepoint_config['site'],
435+
site=base_sharepoint_config["site"],
432436
tenant=insufficient_perms_config.tenant,
433437
access_config=access_config,
434438
)
@@ -441,5 +445,3 @@ async def test_sharepoint_precheck_insufficient_permissions(
441445

442446
with pytest.raises(UserAuthError):
443447
indexer.precheck()
444-
445-

unstructured_ingest/__version__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
__version__ = "1.0.57" # pragma: no cover
1+
__version__ = "1.0.58" # pragma: no cover

unstructured_ingest/interfaces/connector.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@
55
from pydantic import BaseModel, Secret, model_validator
66
from pydantic.types import _SecretBase
77

8+
from unstructured_ingest.processes.utils.logging.connector import ConnectorLoggingMixin
9+
810

911
class AccessConfig(BaseModel):
1012
"""Meant to designate holding any sensitive information associated with other configs
@@ -46,5 +48,9 @@ def _is_access_config_optional(self) -> bool:
4648

4749

4850
@dataclass
49-
class BaseConnector(ABC):
51+
class BaseConnector(ABC, ConnectorLoggingMixin):
5052
connection_config: ConnectionConfigT
53+
54+
def __post_init__(self):
55+
"""Initialize the logging mixin after dataclass initialization."""
56+
ConnectorLoggingMixin.__init__(self)

0 commit comments

Comments
 (0)