Skip to content

Commit f57a40c

Browse files
Refactor _AzureSearchSettings initialization to ensure proper inheritance from BaseSettings and DatasourcePayloadConstructor; update regex pattern in PdfTextSplitter for improved URL matching
1 parent ecdcfc0 commit f57a40c

File tree

2 files changed

+5
-6
lines changed

2 files changed

+5
-6
lines changed

scripts/data_utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@ def find_urls(string):
161161
r"(?i)\b("
162162
r"(?:https?://|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)"
163163
r"(?:[^()\s<>]+|\(([^()\s<>]+|(\([^()\s<>]+\)))*\))+"
164-
r"(?:\(([^()\s<>]+|(\([^()\s<>]+\)))*\)|[^()\s`!()\[\]{};:'\".,<>?«»“”‘’])"
164+
r"(?:\(([^()\s<>]+|(\([^()\s<>]+\)))*\)|[^()\s`!\[\]{};:'\".,<>?«»“”‘’])"
165165
r")"
166166
)
167167
urls = re.findall(regex, string)

src/backend/settings.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -275,11 +275,6 @@ class _AzureSearchSettings(BaseSettings, DatasourcePayloadConstructor):
275275
endpoint_suffix: str = Field(default="search.windows.net", exclude=True)
276276
connection_name: Optional[str] = None
277277
index: str = Field(serialization_alias="index_name")
278-
279-
def __init__(self, settings: "_AppSettings", **data):
280-
# Ensure both BaseSettings and DatasourcePayloadConstructor are initialized
281-
super().__init__(settings=settings, **data)
282-
283278
key: Optional[str] = Field(default=None, exclude=True)
284279
use_semantic_search: bool = Field(default=False, exclude=True)
285280
semantic_search_config: str = Field(
@@ -308,6 +303,10 @@ def __init__(self, settings: "_AppSettings", **data):
308303
fields_mapping: Optional[dict] = None
309304
filter: Optional[str] = Field(default=None, exclude=True)
310305

306+
def __init__(self, settings: "_AppSettings", **data):
307+
# Ensure both BaseSettings and DatasourcePayloadConstructor are initialized
308+
super().__init__(settings=settings, **data)
309+
311310
@field_validator("content_columns", "vector_columns", mode="before")
312311
@classmethod
313312
def split_columns(cls, comma_separated_string: str) -> List[str]:

0 commit comments

Comments
 (0)