Skip to content

Commit e701f13

Browse files
authored
Merge pull request #8202 from fstagni/cherry-pick-2-2e4317e76-integration
[sweep:integration] Add PreferredURLPatterns for URL sorting
2 parents 9f1f318 + 82065a8 commit e701f13

File tree

4 files changed

+131
-11
lines changed

4 files changed

+131
-11
lines changed

docs/source/AdministratorGuide/Configuration/ConfReference/Systems/index.rst

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,44 @@ In each system you normally find the following sections:
3434
* FailoverURLs: Like URLs, but they are only tried if no server in URLs was successfully contacted.
3535

3636

37+
Preferred URLs
38+
--------------
39+
40+
For most services, the standard ``URLs`` and ``FailoverURLs`` mechanism provides a way to specify primary and backup service endpoints.
41+
42+
However, this approach has limitations in certain scenarios:
43+
44+
- Some services (like the Configuration service) have replicas that automatically register themselves in the Configuration System
45+
- External servers ("voboxes") running at sites may not be accessible from all clients
46+
- Connection attempts to inaccessible servers cause errors that, while harmless due to fallback mechanisms, slow down DIRAC and generate misleading error messages
47+
48+
To address these issues, you can define a ``PreferredURLPatterns`` that identifies a subset of URLs to try first:
49+
50+
.. code-block:: guess
51+
52+
System
53+
{
54+
URLs
55+
{
56+
Service = dips://host1.main.invalid:1234/System/Service,dips://host2.main.invalid:1234/System/Service,dips://external.invalid:1234/System/Service
57+
}
58+
}
59+
DIRAC
60+
{
61+
PreferredURLPatterns = .*\.main\.invalid/.*
62+
}
63+
64+
In this example:
65+
66+
1. The ``PreferredURLPatterns`` specifies a regular expression that matches servers in the ``main.invalid`` domain
67+
2. When connecting to the service, DIRAC will first try URLs matching this pattern (``host1.main.invalid`` and ``host2.main.invalid``)
68+
3. Only if these preferred servers fail will DIRAC attempt to connect to other servers (``external.invalid``)
69+
70+
This approach reduces connection errors and improves performance by prioritizing servers that are more likely to be accessible from the client.
71+
72+
.. note:: The ``PreferredURLPatterns`` is a list of regular expressions, not a single regular expression. This allows you to specify multiple patterns to match different subsets of servers if desired.
73+
74+
3775
Main Servers
3876
------------
3977

src/DIRAC/ConfigurationSystem/Client/PathFinder.py

Lines changed: 47 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,12 @@
11
""" Collection of utilities for finding paths in the CS
22
"""
3+
import re
4+
from copy import deepcopy
5+
from collections.abc import Iterable
36
from urllib import parse
47

8+
from cachetools import cached, TTLCache
9+
510
from DIRAC.Core.Utilities import List
611
from DIRAC.ConfigurationSystem.Client.ConfigurationData import gConfigurationData
712
from DIRAC.ConfigurationSystem.Client.Helpers import Path
@@ -151,6 +156,43 @@ def getSystemURLs(system, failover=False):
151156
return urlDict
152157

153158

159+
def groupURLsByPriority(urls: Iterable[str]) -> list[set[str]]:
160+
"""Group URLs by priority.
161+
162+
:param Iterable[str] preferredURLPatterns: patterns to check in ranked order
163+
:param set[str] urls: URLs to check
164+
165+
:return: list[set[str]] -- list of URL groups, ordered by priority
166+
"""
167+
return deepcopy(_groupURLsByPriority(frozenset(urls)))
168+
169+
170+
@cached(cache=TTLCache(maxsize=1024, ttl=300))
171+
def _groupURLsByPriority(urls: frozenset[str]) -> list[set[str]]:
172+
preferredURLPatterns = []
173+
if patterns := gConfigurationData.extractOptionFromCFG("/DIRAC/PreferredURLPatterns"):
174+
preferredURLPatterns = [re.compile(pattern) for pattern in List.fromChar(patterns)]
175+
176+
urlGroups = [set() for _ in range(len(preferredURLPatterns) + 1)]
177+
for url in urls:
178+
urlGroups[findURLPriority(preferredURLPatterns, url)].add(url)
179+
return urlGroups
180+
181+
182+
def findURLPriority(preferredURLPatterns: list[re.Pattern[str]], url: str) -> int:
183+
"""Find which preferred URL pattern the URL matches.
184+
185+
:param str preferredURLPatterns: patterns to check in ranked order
186+
:param str url: URL to check
187+
188+
:return: int -- index of the pattern that matched, smallest is the most preferred
189+
"""
190+
for i, pattern in enumerate(preferredURLPatterns):
191+
if re.match(pattern, url):
192+
return i
193+
return len(preferredURLPatterns)
194+
195+
154196
def getServiceURLs(system, service=None, failover=False):
155197
"""Generate url.
156198
@@ -168,8 +210,8 @@ def getServiceURLs(system, service=None, failover=False):
168210
# Add failover URLs at the end of the list
169211
failover = "Failover" if failover else ""
170212
for fURLs in ["", "Failover"] if failover else [""]:
171-
urlList = []
172213
urls = List.fromChar(gConfigurationData.extractOptionFromCFG(f"{systemSection}/{fURLs}URLs/{service}"))
214+
urlList = set()
173215

174216
# Be sure that urls not None
175217
for url in urls or []:
@@ -186,16 +228,13 @@ def getServiceURLs(system, service=None, failover=False):
186228

187229
for srv in mainServers:
188230
_url = checkComponentURL(url.replace("$MAINSERVERS$", srv), system, service, pathMandatory=True)
189-
if _url not in urlList:
190-
urlList.append(_url)
231+
urlList.add(_url)
191232
continue
192233

193-
_url = checkComponentURL(url, system, service, pathMandatory=True)
194-
if _url not in urlList:
195-
urlList.append(_url)
234+
urlList.add(checkComponentURL(url, system, service, pathMandatory=True))
196235

197-
# Randomize list if needed
198-
resList.extend(List.randomize(urlList))
236+
for urlGroup in groupURLsByPriority(urlList):
237+
resList.extend(List.randomize(urlGroup))
199238

200239
return resList
201240

src/DIRAC/ConfigurationSystem/Client/test/Test_PathFinder.py

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,26 @@
1111
localCFGData = ConfigurationData(False)
1212
mergedCFG = CFG()
1313
mergedCFG.loadFromBuffer(
14-
"""
14+
r"""
15+
DIRAC
16+
{
17+
PreferredURLPatterns = dips://.*\.site:.*
18+
PreferredURLPatterns += dips://.*\.other:.*
19+
}
1520
Systems
1621
{
22+
Configuration
23+
{
24+
URLs
25+
{
26+
Server = dips://server1.site:1234/Configuration/Server
27+
Server += dips://server2.site:1234/Configuration/Server
28+
Server += dips://server3.site:1234/Configuration/Server
29+
Server += dips://server4.site:1234/Configuration/Server
30+
Server += dips://server.other:1234/Configuration/Server
31+
Server += dips://server.external:1234/Configuration/Server
32+
}
33+
}
1734
WorkloadManagement
1835
{
1936
URLs
@@ -181,6 +198,29 @@ def test_getServiceURLs(pathFinder, serviceName, service, failover, result):
181198
assert set(pathFinder.getServiceURLs(serviceName, service=service, failover=failover)) == result
182199

183200

201+
def test_getServiceURLsOrdering(pathFinder):
202+
"""Ensure the PreferredURLPattern option is respected"""
203+
all_results = set()
204+
for _ in range(10_000):
205+
urls = pathFinder.getServiceURLs("Configuration", service="Server")
206+
assert set(urls) == {
207+
"dips://server1.site:1234/Configuration/Server",
208+
"dips://server2.site:1234/Configuration/Server",
209+
"dips://server3.site:1234/Configuration/Server",
210+
"dips://server4.site:1234/Configuration/Server",
211+
"dips://server.other:1234/Configuration/Server",
212+
"dips://server.external:1234/Configuration/Server",
213+
}
214+
# The second to last URL should always be "other"
215+
assert urls[-2] == "dips://server.other:1234/Configuration/Server"
216+
# The last URL should always be the one which isn't preferred
217+
assert urls[-1] == "dips://server.external:1234/Configuration/Server"
218+
all_results.add(tuple(urls))
219+
# There are 4! = 24 possible orderings of the preferred URLs, we should have seen all
220+
# of them at least once in 10_000 iterations
221+
assert len(all_results) >= 24
222+
223+
184224
@pytest.mark.parametrize(
185225
"system, failover, result",
186226
[

src/DIRAC/Core/Utilities/List.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,10 @@
33
"""
44
import random
55
import sys
6-
from typing import Any
6+
from typing import Any, TypeVar
7+
from collections.abc import Iterable
8+
9+
T = TypeVar("T")
710

811

912
def uniqueElements(aList: list) -> list:
@@ -37,7 +40,7 @@ def fromChar(inputString: str, sepChar: str = ","):
3740
return [fieldString.strip() for fieldString in inputString.split(sepChar) if len(fieldString.strip()) > 0]
3841

3942

40-
def randomize(aList: list) -> list:
43+
def randomize(aList: Iterable[T]) -> list[T]:
4144
"""Return a randomly sorted list.
4245
4346
:param aList: list to permute

0 commit comments

Comments
 (0)