Skip to content

Commit af54a2e

Browse files
committed
Merge branch 'master' into feat/tracab_meta
2 parents 0fe591f + 45ab84c commit af54a2e

File tree

17 files changed

+1279
-80
lines changed

17 files changed

+1279
-80
lines changed

kloppy/_providers/sportec.py

Lines changed: 155 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
from typing import Optional, List
1+
from typing import List, Optional
2+
3+
from requests.exceptions import HTTPError
24

35
from kloppy.config import get_config
46
from kloppy.domain import EventDataset, EventFactory, TrackingDataset
@@ -10,7 +12,7 @@
1012
SportecTrackingDataDeserializer,
1113
SportecTrackingDataInputs,
1214
)
13-
from kloppy.io import open_as_file, FileLike
15+
from kloppy.io import FileLike, open_as_file
1416
from kloppy.utils import deprecated
1517

1618

@@ -82,3 +84,154 @@ def load(
8284
return load_event(
8385
event_data, meta_data, event_types, coordinates, event_factory
8486
)
87+
88+
89+
def get_IDSSE_url(match_id: str, data_type: str) -> str:
90+
"""Returns the URL for the meta, event or tracking data for a match in the IDDSE dataset."""
91+
# match_id -> file_id
92+
DATA_MAP = {
93+
"J03WPY": {"meta": 48392497, "event": 48392542, "tracking": 48392572},
94+
"J03WN1": {"meta": 48392491, "event": 48392527, "tracking": 48392512},
95+
"J03WMX": {"meta": 48392485, "event": 48392524, "tracking": 48392539},
96+
"J03WOH": {"meta": 48392515, "event": 48392500, "tracking": 48392578},
97+
"J03WQQ": {"meta": 48392488, "event": 48392521, "tracking": 48392545},
98+
"J03WOY": {"meta": 48392503, "event": 48392518, "tracking": 48392551},
99+
"J03WR9": {"meta": 48392494, "event": 48392530, "tracking": 48392563},
100+
}
101+
# URL constant
102+
DATA_URL = "https://figshare.com/ndownloader/files/{file_id}?private_link=1f806cb3e755c6b54e05"
103+
104+
if data_type not in ["meta", "event", "tracking"]:
105+
raise ValueError(
106+
f"Data type should be one of ['meta', 'event', 'tracking'], but got {data_type}"
107+
)
108+
if match_id not in DATA_MAP:
109+
raise ValueError(
110+
f"This match_id is not available, please select from {list(DATA_MAP.keys())}"
111+
)
112+
return DATA_URL.format(file_id=str(DATA_MAP[match_id][data_type]))
113+
114+
115+
def load_open_event_data(
116+
match_id: str = "J03WPY",
117+
event_types: Optional[List[str]] = None,
118+
coordinates: Optional[str] = None,
119+
event_factory: Optional[EventFactory] = None,
120+
) -> EventDataset:
121+
"""
122+
Load event data for a game from the IDSSE dataset.
123+
124+
The IDSSE dataset will be released with the publication of the *An integrated
125+
dataset of synchronized spatiotemporal and event data in elite soccer*
126+
paper [1]_ and is released under the Creative Commons Attribution 4.0
127+
license.
128+
129+
Args:
130+
match_id (str, optional):
131+
Match-ID of one of the matches. Defaults to `'J03WPY'`. See below
132+
for available matches.
133+
event_types:
134+
coordinates:
135+
event_factory:
136+
137+
Notes:
138+
The dataset contains seven full matches of raw event and position data
139+
for both teams and the ball from the German Men's Bundesliga season
140+
2022/23 first and second division. A detailed description of the
141+
dataset as well as the collection process can be found in the
142+
accompanying paper.
143+
144+
The following matches are available::
145+
146+
matches = {
147+
'J03WMX': 1. FC Köln vs. FC Bayern München,
148+
'J03WN1': VfL Bochum 1848 vs. Bayer 04 Leverkusen,
149+
'J03WPY': Fortuna Düsseldorf vs. 1. FC Nürnberg,
150+
'J03WOH': Fortuna Düsseldorf vs. SSV Jahn Regensburg,
151+
'J03WQQ': Fortuna Düsseldorf vs. FC St. Pauli,
152+
'J03WOY': Fortuna Düsseldorf vs. F.C. Hansa Rostock,
153+
'J03WR9': Fortuna Düsseldorf vs. 1. FC Kaiserslautern
154+
}
155+
156+
References:
157+
.. [1] Bassek, M., Weber, H., Rein, R., & Memmert, D. (2024). "An integrated
158+
dataset of synchronized spatiotemporal and event data in elite soccer."
159+
In Submission.
160+
"""
161+
try:
162+
return load_event(
163+
event_data=get_IDSSE_url(match_id, "event"),
164+
meta_data=get_IDSSE_url(match_id, "meta"),
165+
event_types=event_types,
166+
coordinates=coordinates,
167+
event_factory=event_factory,
168+
)
169+
except HTTPError as e:
170+
raise HTTPError(
171+
"Unable to retrieve data. The dataset archive location may have changed. "
172+
"See https://github.com/PySport/kloppy/issues/369 for details."
173+
) from e
174+
175+
176+
def load_open_tracking_data(
177+
match_id: str = "J03WPY",
178+
sample_rate: Optional[float] = None,
179+
limit: Optional[int] = None,
180+
coordinates: Optional[str] = None,
181+
only_alive: Optional[bool] = True,
182+
) -> TrackingDataset:
183+
"""
184+
Load tracking data for a game from the IDSSE dataset.
185+
186+
The IDSSE dataset will be released with the publication of the *An integrated
187+
dataset of synchronized spatiotemporal and event data in elite soccer*
188+
paper [1]_ and is released under the Creative Commons Attribution 4.0
189+
license.
190+
191+
Args:
192+
match_id (str, optional):
193+
Match-ID of one of the matches. Defaults to `'J03WPY'`. See below
194+
for available matches.
195+
sampe_rate:
196+
limit:
197+
coordinates:
198+
only_alive:
199+
200+
Notes:
201+
The dataset contains seven full matches of raw event and position data
202+
for both teams and the ball from the German Men's Bundesliga season
203+
2022/23 first and second division. A detailed description of the
204+
dataset as well as the collection process can be found in the
205+
accompanying paper.
206+
207+
The following matches are available::
208+
209+
matches = {
210+
'J03WMX': 1. FC Köln vs. FC Bayern München,
211+
'J03WN1': VfL Bochum 1848 vs. Bayer 04 Leverkusen,
212+
'J03WPY': Fortuna Düsseldorf vs. 1. FC Nürnberg,
213+
'J03WOH': Fortuna Düsseldorf vs. SSV Jahn Regensburg,
214+
'J03WQQ': Fortuna Düsseldorf vs. FC St. Pauli,
215+
'J03WOY': Fortuna Düsseldorf vs. F.C. Hansa Rostock,
216+
'J03WR9': Fortuna Düsseldorf vs. 1. FC Kaiserslautern
217+
}
218+
219+
References:
220+
.. [1] Bassek, M., Weber, H., Rein, R., & Memmert, D. (2024). "An integrated
221+
dataset of synchronized spatiotemporal and event data in elite soccer."
222+
In Submission.
223+
"""
224+
try:
225+
return load_tracking(
226+
raw_data=get_IDSSE_url(match_id, "tracking"),
227+
meta_data=get_IDSSE_url(match_id, "meta"),
228+
sample_rate=sample_rate,
229+
limit=limit,
230+
coordinates=coordinates,
231+
only_alive=only_alive,
232+
)
233+
except HTTPError as e:
234+
raise HTTPError(
235+
"Unable to retrieve data. The dataset archive location may have changed. "
236+
"See https://github.com/PySport/kloppy/issues/369 for details."
237+
) from e

kloppy/domain/models/common.py

Lines changed: 42 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020

2121
from .position import PositionType
2222

23-
from ...utils import deprecated
23+
from ...utils import deprecated, snake_case
2424

2525
if sys.version_info >= (3, 8):
2626
from typing import Literal
@@ -119,6 +119,46 @@ def __str__(self):
119119
return self.value
120120

121121

122+
class OfficialType(Enum):
123+
"""Enumeration for types of officials (referees)."""
124+
125+
VideoAssistantReferee = "Video Assistant Referee"
126+
MainReferee = "Main Referee"
127+
AssistantReferee = "Assistant Referee"
128+
FourthOfficial = "Fourth Official"
129+
130+
def __str__(self):
131+
return self.value
132+
133+
134+
@dataclass(frozen=True)
135+
class Official:
136+
"""
137+
Represents an official (referee) with optional names and roles.
138+
"""
139+
140+
official_id: str
141+
name: Optional[str] = None
142+
first_name: Optional[str] = None
143+
last_name: Optional[str] = None
144+
role: Optional[OfficialType] = None
145+
146+
@property
147+
def full_name(self):
148+
"""
149+
Returns the full name of the official, falling back to role-based or ID-based naming.
150+
"""
151+
if self.name:
152+
return self.name
153+
if self.first_name and self.last_name:
154+
return f"{self.first_name} {self.last_name}"
155+
if self.last_name:
156+
return self.last_name
157+
if self.role:
158+
return f"{snake_case(str(self.role))}_{self.official_id}"
159+
return f"official_{self.official_id}"
160+
161+
122162
@dataclass(frozen=True)
123163
class Player:
124164
"""
@@ -1016,6 +1056,7 @@ class Metadata:
10161056
game_id: Optional[str] = None
10171057
home_coach: Optional[str] = None
10181058
away_coach: Optional[str] = None
1059+
officials: Optional[List] = field(default_factory=list)
10191060
attributes: Optional[Dict] = field(default_factory=dict, compare=False)
10201061

10211062
def __post_init__(self):

kloppy/domain/models/position.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,8 @@ class PositionType(Enum):
1313
CenterBack = ("Center Back", "CB", "Defender")
1414
LeftCenterBack = ("Left Center Back", "LCB", "CenterBack")
1515
RightCenterBack = ("Right Center Back", "RCB", "CenterBack")
16+
LeftWingBack = ("Left Wing Back", "LWB", "WingBack")
17+
RightWingBack = ("Right Wing Back", "RWB", "WingBack")
1618

1719
Midfielder = ("Midfielder", "MID", None)
1820
DefensiveMidfield = ("Defensive Midfield", "DM", "Midfielder")

kloppy/infra/serializers/event/sportec/deserializer.py

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@
2929
CardType,
3030
AttackingDirection,
3131
PositionType,
32+
Official,
33+
OfficialType,
3234
)
3335
from kloppy.exceptions import DeserializationError
3436
from kloppy.infra.serializers.event.deserializer import EventDataDeserializer
@@ -55,6 +57,14 @@
5557
"LA": PositionType.LeftWing,
5658
}
5759

60+
referee_types_mapping: Dict[str, OfficialType] = {
61+
"referee": OfficialType.MainReferee,
62+
"firstAssistant": OfficialType.AssistantReferee,
63+
"videoReferee": OfficialType.VideoAssistantReferee,
64+
"secondAssistant": OfficialType.AssistantReferee,
65+
"fourthOfficial": OfficialType.FourthOfficial,
66+
}
67+
5868
logger = logging.getLogger(__name__)
5969

6070

@@ -102,6 +112,7 @@ class SportecMetadata(NamedTuple):
102112
fps: int
103113
home_coach: str
104114
away_coach: str
115+
officials: List[Official]
105116

106117

107118
def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
@@ -213,6 +224,31 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
213224
]
214225
)
215226

227+
if hasattr(match_root, "MatchInformation") and hasattr(
228+
match_root.MatchInformation, "Referees"
229+
):
230+
officials = []
231+
referee_path = objectify.ObjectPath(
232+
"PutDataRequest.MatchInformation.Referees"
233+
)
234+
referee_elms = referee_path.find(match_root).iterchildren(
235+
tag="Referee"
236+
)
237+
238+
for referee in referee_elms:
239+
ref_attrib = referee.attrib
240+
officials.append(
241+
Official(
242+
official_id=ref_attrib["PersonId"],
243+
name=ref_attrib["Shortname"],
244+
first_name=ref_attrib["FirstName"],
245+
last_name=ref_attrib["LastName"],
246+
role=referee_types_mapping[ref_attrib["Role"]],
247+
)
248+
)
249+
else:
250+
officials = []
251+
216252
return SportecMetadata(
217253
score=score,
218254
teams=teams,
@@ -222,6 +258,7 @@ def sportec_metadata_from_xml_elm(match_root) -> SportecMetadata:
222258
fps=SPORTEC_FPS,
223259
home_coach=home_coach,
224260
away_coach=away_coach,
261+
officials=officials,
225262
)
226263

227264

@@ -673,6 +710,7 @@ def deserialize(self, inputs: SportecEventDataInputs) -> EventDataset:
673710
game_id=game_id,
674711
home_coach=home_coach,
675712
away_coach=away_coach,
713+
officials=sportec_metadata.officials,
676714
)
677715

678716
return EventDataset(

0 commit comments

Comments
 (0)