|
1 | | -from typing import Optional, List |
| 1 | +from typing import List, Optional |
| 2 | + |
| 3 | +from requests.exceptions import HTTPError |
2 | 4 |
|
3 | 5 | from kloppy.config import get_config |
4 | 6 | from kloppy.domain import EventDataset, EventFactory, TrackingDataset |
|
10 | 12 | SportecTrackingDataDeserializer, |
11 | 13 | SportecTrackingDataInputs, |
12 | 14 | ) |
13 | | -from kloppy.io import open_as_file, FileLike |
| 15 | +from kloppy.io import FileLike, open_as_file |
14 | 16 | from kloppy.utils import deprecated |
15 | 17 |
|
16 | 18 |
|
@@ -82,3 +84,154 @@ def load( |
82 | 84 | return load_event( |
83 | 85 | event_data, meta_data, event_types, coordinates, event_factory |
84 | 86 | ) |
| 87 | + |
| 88 | + |
| 89 | +def get_IDSSE_url(match_id: str, data_type: str) -> str: |
| 90 | + """Returns the URL for the meta, event or tracking data for a match in the IDDSE dataset.""" |
| 91 | + # match_id -> file_id |
| 92 | + DATA_MAP = { |
| 93 | + "J03WPY": {"meta": 48392497, "event": 48392542, "tracking": 48392572}, |
| 94 | + "J03WN1": {"meta": 48392491, "event": 48392527, "tracking": 48392512}, |
| 95 | + "J03WMX": {"meta": 48392485, "event": 48392524, "tracking": 48392539}, |
| 96 | + "J03WOH": {"meta": 48392515, "event": 48392500, "tracking": 48392578}, |
| 97 | + "J03WQQ": {"meta": 48392488, "event": 48392521, "tracking": 48392545}, |
| 98 | + "J03WOY": {"meta": 48392503, "event": 48392518, "tracking": 48392551}, |
| 99 | + "J03WR9": {"meta": 48392494, "event": 48392530, "tracking": 48392563}, |
| 100 | + } |
| 101 | + # URL constant |
| 102 | + DATA_URL = "https://figshare.com/ndownloader/files/{file_id}?private_link=1f806cb3e755c6b54e05" |
| 103 | + |
| 104 | + if data_type not in ["meta", "event", "tracking"]: |
| 105 | + raise ValueError( |
| 106 | + f"Data type should be one of ['meta', 'event', 'tracking'], but got {data_type}" |
| 107 | + ) |
| 108 | + if match_id not in DATA_MAP: |
| 109 | + raise ValueError( |
| 110 | + f"This match_id is not available, please select from {list(DATA_MAP.keys())}" |
| 111 | + ) |
| 112 | + return DATA_URL.format(file_id=str(DATA_MAP[match_id][data_type])) |
| 113 | + |
| 114 | + |
| 115 | +def load_open_event_data( |
| 116 | + match_id: str = "J03WPY", |
| 117 | + event_types: Optional[List[str]] = None, |
| 118 | + coordinates: Optional[str] = None, |
| 119 | + event_factory: Optional[EventFactory] = None, |
| 120 | +) -> EventDataset: |
| 121 | + """ |
| 122 | + Load event data for a game from the IDSSE dataset. |
| 123 | +
|
| 124 | + The IDSSE dataset will be released with the publication of the *An integrated |
| 125 | + dataset of synchronized spatiotemporal and event data in elite soccer* |
| 126 | + paper [1]_ and is released under the Creative Commons Attribution 4.0 |
| 127 | + license. |
| 128 | +
|
| 129 | + Args: |
| 130 | + match_id (str, optional): |
| 131 | + Match-ID of one of the matches. Defaults to `'J03WPY'`. See below |
| 132 | + for available matches. |
| 133 | + event_types: |
| 134 | + coordinates: |
| 135 | + event_factory: |
| 136 | +
|
| 137 | + Notes: |
| 138 | + The dataset contains seven full matches of raw event and position data |
| 139 | + for both teams and the ball from the German Men's Bundesliga season |
| 140 | + 2022/23 first and second division. A detailed description of the |
| 141 | + dataset as well as the collection process can be found in the |
| 142 | + accompanying paper. |
| 143 | +
|
| 144 | + The following matches are available:: |
| 145 | +
|
| 146 | + matches = { |
| 147 | + 'J03WMX': 1. FC Köln vs. FC Bayern München, |
| 148 | + 'J03WN1': VfL Bochum 1848 vs. Bayer 04 Leverkusen, |
| 149 | + 'J03WPY': Fortuna Düsseldorf vs. 1. FC Nürnberg, |
| 150 | + 'J03WOH': Fortuna Düsseldorf vs. SSV Jahn Regensburg, |
| 151 | + 'J03WQQ': Fortuna Düsseldorf vs. FC St. Pauli, |
| 152 | + 'J03WOY': Fortuna Düsseldorf vs. F.C. Hansa Rostock, |
| 153 | + 'J03WR9': Fortuna Düsseldorf vs. 1. FC Kaiserslautern |
| 154 | + } |
| 155 | +
|
| 156 | + References: |
| 157 | + .. [1] Bassek, M., Weber, H., Rein, R., & Memmert, D. (2024). "An integrated |
| 158 | + dataset of synchronized spatiotemporal and event data in elite soccer." |
| 159 | + In Submission. |
| 160 | + """ |
| 161 | + try: |
| 162 | + return load_event( |
| 163 | + event_data=get_IDSSE_url(match_id, "event"), |
| 164 | + meta_data=get_IDSSE_url(match_id, "meta"), |
| 165 | + event_types=event_types, |
| 166 | + coordinates=coordinates, |
| 167 | + event_factory=event_factory, |
| 168 | + ) |
| 169 | + except HTTPError as e: |
| 170 | + raise HTTPError( |
| 171 | + "Unable to retrieve data. The dataset archive location may have changed. " |
| 172 | + "See https://github.com/PySport/kloppy/issues/369 for details." |
| 173 | + ) from e |
| 174 | + |
| 175 | + |
| 176 | +def load_open_tracking_data( |
| 177 | + match_id: str = "J03WPY", |
| 178 | + sample_rate: Optional[float] = None, |
| 179 | + limit: Optional[int] = None, |
| 180 | + coordinates: Optional[str] = None, |
| 181 | + only_alive: Optional[bool] = True, |
| 182 | +) -> TrackingDataset: |
| 183 | + """ |
| 184 | + Load tracking data for a game from the IDSSE dataset. |
| 185 | +
|
| 186 | + The IDSSE dataset will be released with the publication of the *An integrated |
| 187 | + dataset of synchronized spatiotemporal and event data in elite soccer* |
| 188 | + paper [1]_ and is released under the Creative Commons Attribution 4.0 |
| 189 | + license. |
| 190 | +
|
| 191 | + Args: |
| 192 | + match_id (str, optional): |
| 193 | + Match-ID of one of the matches. Defaults to `'J03WPY'`. See below |
| 194 | + for available matches. |
| 195 | + sampe_rate: |
| 196 | + limit: |
| 197 | + coordinates: |
| 198 | + only_alive: |
| 199 | +
|
| 200 | + Notes: |
| 201 | + The dataset contains seven full matches of raw event and position data |
| 202 | + for both teams and the ball from the German Men's Bundesliga season |
| 203 | + 2022/23 first and second division. A detailed description of the |
| 204 | + dataset as well as the collection process can be found in the |
| 205 | + accompanying paper. |
| 206 | +
|
| 207 | + The following matches are available:: |
| 208 | +
|
| 209 | + matches = { |
| 210 | + 'J03WMX': 1. FC Köln vs. FC Bayern München, |
| 211 | + 'J03WN1': VfL Bochum 1848 vs. Bayer 04 Leverkusen, |
| 212 | + 'J03WPY': Fortuna Düsseldorf vs. 1. FC Nürnberg, |
| 213 | + 'J03WOH': Fortuna Düsseldorf vs. SSV Jahn Regensburg, |
| 214 | + 'J03WQQ': Fortuna Düsseldorf vs. FC St. Pauli, |
| 215 | + 'J03WOY': Fortuna Düsseldorf vs. F.C. Hansa Rostock, |
| 216 | + 'J03WR9': Fortuna Düsseldorf vs. 1. FC Kaiserslautern |
| 217 | + } |
| 218 | +
|
| 219 | + References: |
| 220 | + .. [1] Bassek, M., Weber, H., Rein, R., & Memmert, D. (2024). "An integrated |
| 221 | + dataset of synchronized spatiotemporal and event data in elite soccer." |
| 222 | + In Submission. |
| 223 | + """ |
| 224 | + try: |
| 225 | + return load_tracking( |
| 226 | + raw_data=get_IDSSE_url(match_id, "tracking"), |
| 227 | + meta_data=get_IDSSE_url(match_id, "meta"), |
| 228 | + sample_rate=sample_rate, |
| 229 | + limit=limit, |
| 230 | + coordinates=coordinates, |
| 231 | + only_alive=only_alive, |
| 232 | + ) |
| 233 | + except HTTPError as e: |
| 234 | + raise HTTPError( |
| 235 | + "Unable to retrieve data. The dataset archive location may have changed. " |
| 236 | + "See https://github.com/PySport/kloppy/issues/369 for details." |
| 237 | + ) from e |
0 commit comments