Skip to content

Commit 4667acb

Browse files
[Enhancement] Add ServiceNow Data Handler (Source/Sink) Integration (#65)
* feat: add ServiceNow integration with PySNC - Add ServiceNowHandler for reading/writing ServiceNow tables - Support multiple authentication methods (Basic, OAuth2, env vars) - Implement auto-table creation for custom tables (u_* prefix) - Add flexible query building (dict filters, encoded queries) - Support insert/update/upsert operations with key_field matching - Add automatic field prefixing for custom tables - Implement batch processing and streaming mode - Add comprehensive test suite with mock-based tests - Add complete documentation in data handler README - Add example tasks for insert and update workflows - Add fluent API support (from_servicenow/to_servicenow) - Update configuration models for ServiceNow sources/sinks * Resolved pysnc and boto3 version conflict * Fixed test case import * fix: support older PySNC --------- Co-authored-by: Sriram Puttagunta <[email protected]>
1 parent a85f259 commit 4667acb

File tree

13 files changed

+2261
-13
lines changed

13 files changed

+2261
-13
lines changed

docs/concepts/data_handler/README.md

Lines changed: 678 additions & 8 deletions
Large diffs are not rendered by default.

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ dependencies = [
5858
"types-pyyaml>=6.0,<7.0",
5959
"fasttext-wheel (>=0.9.2,<0.10.0)",
6060
"litellm (>=1.79.3,<2.0.0)",
61+
"pysnc (>=1.1.0,<1.2.0)",
6162
"boto3 (>=1.40.71,<2.0.0)",
6263
"google-auth (>=2.43.0,<3.0.0)",
6364
"google-cloud-aiplatform (>=1.128.0,<2.0.0)",
@@ -143,6 +144,7 @@ sentence-transformers = "^5.1"
143144
soundfile = "^0.13"
144145
types-pyyaml = "^6.0"
145146
litellm = "^1.79.3"
147+
pysnc = ">=1.1.0,<1.2.0"
146148
boto3 = "^1.40.71"
147149
google-auth = "^2.43.0"
148150
google-cloud-aiplatform = "^1.128.0"

sygra/core/base_task_executor.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from sygra.core.dataset.dataset_processor import DatasetProcessor
2222
from sygra.core.dataset.file_handler import FileHandler
2323
from sygra.core.dataset.huggingface_handler import HuggingFaceHandler
24+
from sygra.core.dataset.servicenow_handler import ServiceNowHandler
2425
from sygra.core.graph.graph_config import GraphConfig
2526
from sygra.core.graph.langgraph.graph_builder import LangGraphBuilder
2627
from sygra.core.graph.sygra_state import SygraState
@@ -436,7 +437,7 @@ def _generate_empty_dataset(self) -> list[dict]:
436437
logger.info(f"Generating {num_records} empty records")
437438
return [{} for _ in range(num_records)]
438439

439-
def _get_data_reader(self) -> Union[HuggingFaceHandler, FileHandler]:
440+
def _get_data_reader(self) -> Union[HuggingFaceHandler, FileHandler, ServiceNowHandler]:
440441
"""Get appropriate data reader based on source type"""
441442
if self.source_config is None:
442443
raise ValueError("source_config must be set to get a data reader")
@@ -445,6 +446,8 @@ def _get_data_reader(self) -> Union[HuggingFaceHandler, FileHandler]:
445446
return HuggingFaceHandler(self.source_config)
446447
elif self.source_config.type == DataSourceType.DISK_FILE:
447448
return FileHandler(self.source_config)
449+
elif self.source_config.type == DataSourceType.SERVICENOW:
450+
return ServiceNowHandler(self.source_config)
448451
else:
449452
raise ValueError(f"Unsupported data source type: {self.source_config.type}")
450453

@@ -731,6 +734,11 @@ def execute(self):
731734
source_config=self.source_config,
732735
output_config=self.output_config,
733736
).write(data)
737+
elif self.output_config.type == OutputType.SERVICENOW:
738+
ServiceNowHandler(
739+
source_config=None,
740+
output_config=self.output_config,
741+
).write(data)
734742
else:
735743
if self.output_config.file_path is None:
736744
raise ValueError("file_path must be set for output_config")

sygra/core/dataset/dataset_config.py

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -17,10 +17,12 @@ class DataSourceType(Enum):
1717
Attributes:
1818
HUGGINGFACE: HuggingFace dataset source
1919
DISK_FILE: Local file system source
20+
SERVICENOW: ServiceNow table source
2021
"""
2122

2223
HUGGINGFACE = "hf"
2324
DISK_FILE = "disk"
25+
SERVICENOW = "servicenow"
2426

2527

2628
class TransformConfig(BaseModel):
@@ -44,13 +46,15 @@ class OutputType(Enum):
4446
JSONL: JSON Lines file output
4547
CSV: CSV file output
4648
PARQUET: Parquet file output
49+
SERVICENOW: ServiceNow table output
4750
"""
4851

4952
HUGGINGFACE = "hf"
5053
JSON = "json"
5154
JSONL = "jsonl"
5255
CSV = "csv"
5356
PARQUET = "parquet"
57+
SERVICENOW = "servicenow"
5458
NONE = None
5559

5660

@@ -69,8 +73,14 @@ class ShardConfig(BaseModel):
6973
class DataSourceConfig(BaseModel):
7074
"""Configuration for data sources.
7175
72-
This class provides configuration options for both HuggingFace datasets
73-
and local file system sources, including transformation specifications.
76+
This class provides configuration options for HuggingFace datasets,
77+
local file system sources, and ServiceNow tables, including transformation specs.
78+
79+
For ServiceNow sources:
80+
- Connection credentials (instance, username, password) are read from
81+
environment variables: SNOW_INSTANCE, SNOW_USERNAME, SNOW_PASSWORD
82+
- Only query details (table, filters, fields, etc.) need to be specified
83+
- Config values for credentials are optional overrides
7484
7585
Attributes:
7686
type (DataSourceType): Type of data source
@@ -83,6 +93,9 @@ class DataSourceConfig(BaseModel):
8393
file_format (Optional[str]): Format for local files
8494
file_path (Optional[str]): Path to local file
8595
encoding (str): Character encoding for text files
96+
table (Optional[str]): ServiceNow table name for queries
97+
filters (Optional[dict]): Filters for ServiceNow queries
98+
fields (Optional[list[str]]): Fields to retrieve from ServiceNow
8699
transformations (Optional[list[TransformConfig]]): List of transformations to apply
87100
"""
88101

@@ -101,6 +114,27 @@ class DataSourceConfig(BaseModel):
101114
file_path: Optional[str] = None
102115
encoding: str = "utf-8"
103116

117+
# For ServiceNow tables
118+
instance: Optional[str] = None
119+
username: Optional[str] = None
120+
password: Optional[str] = None
121+
oauth_client_id: Optional[str] = None
122+
oauth_client_secret: Optional[str] = None
123+
table: Optional[str] = None
124+
query: Optional[str] = None
125+
filters: Optional[dict[str, Any]] = None
126+
fields: Optional[list[str]] = None
127+
limit: Optional[int] = None
128+
batch_size: int = 100
129+
order_by: Optional[str] = None
130+
order_desc: bool = False
131+
display_value: str = "all"
132+
exclude_reference_link: bool = True
133+
proxy: Optional[str] = None
134+
verify_ssl: Optional[bool] = None
135+
cert: Optional[str] = None
136+
auto_retry: bool = True
137+
104138
# Transformation functions
105139
transformations: Optional[list[TransformConfig]] = None
106140

@@ -169,8 +203,14 @@ def from_dict(cls, config: dict[str, Any]) -> "DataSourceConfig":
169203
class OutputConfig(BaseModel):
170204
"""Configuration for data output operations.
171205
172-
This class provides configuration options for both HuggingFace datasets
173-
and local file system outputs.
206+
This class provides configuration options for HuggingFace datasets,
207+
local file system outputs, and ServiceNow tables.
208+
209+
For ServiceNow outputs:
210+
- Connection credentials (instance, username, password) are read from
211+
environment variables: SNOW_INSTANCE, SNOW_USERNAME, SNOW_PASSWORD
212+
- Only operation details (table, operation, key_field) need to be specified
213+
- Config values for credentials are optional overrides
174214
175215
Attributes:
176216
type (OutputType): Type of output
@@ -183,6 +223,9 @@ class OutputConfig(BaseModel):
183223
filename (Optional[str]): Output filename
184224
file_path (Optional[str]): Output file path
185225
encoding (str): Character encoding for text files
226+
table (Optional[str]): ServiceNow table name for output
227+
operation (str): ServiceNow operation (insert/update/upsert)
228+
key_field (str): Field to match for update/upsert operations
186229
"""
187230

188231
type: Optional[OutputType] = None
@@ -196,6 +239,20 @@ class OutputConfig(BaseModel):
196239
file_path: Optional[str] = None
197240
encoding: str = "utf-8"
198241

242+
# For ServiceNow output
243+
instance: Optional[str] = None
244+
username: Optional[str] = None
245+
password: Optional[str] = None
246+
oauth_client_id: Optional[str] = None
247+
oauth_client_secret: Optional[str] = None
248+
table: Optional[str] = None
249+
operation: str = "insert" # insert, update, or upsert
250+
key_field: str = "sys_id" # Field to match for update/upsert
251+
proxy: Optional[str] = None
252+
verify_ssl: Optional[bool] = None
253+
cert: Optional[str] = None
254+
auto_retry: bool = True
255+
199256
@classmethod
200257
def from_dict(cls, config: dict[str, Any]) -> "OutputConfig":
201258
"""Create configuration from dictionary.
@@ -217,4 +274,17 @@ def from_dict(cls, config: dict[str, Any]) -> "OutputConfig":
217274
filename=config.get("filename"),
218275
file_path=config.get("file_path"),
219276
encoding=config.get("encoding", "utf-8"),
277+
# ServiceNow fields
278+
instance=config.get("instance"),
279+
username=config.get("username"),
280+
password=config.get("password"),
281+
oauth_client_id=config.get("oauth_client_id"),
282+
oauth_client_secret=config.get("oauth_client_secret"),
283+
table=config.get("table"),
284+
operation=config.get("operation", "insert"),
285+
key_field=config.get("key_field", "sys_id"),
286+
proxy=config.get("proxy"),
287+
verify_ssl=config.get("verify_ssl"),
288+
cert=config.get("cert"),
289+
auto_retry=config.get("auto_retry", True),
220290
)

0 commit comments

Comments
 (0)