Skip to content

[Wyscout v3] Create periods prior to creating events #441

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 50 additions & 35 deletions kloppy/infra/serializers/event/wyscout/deserializer_v3.py
Original file line number Diff line number Diff line change
Expand Up @@ -708,6 +708,48 @@ def _parse_period_id(raw_period: str) -> int:
return period_id


def create_periods(raw_events, period_minutes_offset_mapping):
periods = []

for idx, raw_event in enumerate(raw_events["events"]):
next_period_id = None
if (idx + 1) < len(raw_events["events"]):
next_event = raw_events["events"][idx + 1]
next_period_id = _parse_period_id(next_event["matchPeriod"])

period_id = _parse_period_id(raw_event["matchPeriod"])

if len(periods) == 0 or periods[-1].id != period_id:
periods.append(
Period(
id=period_id,
start_timestamp=(
timedelta(seconds=0)
if len(periods) == 0
else periods[-1].end_timestamp
),
end_timestamp=None,
)
)

if next_period_id != period_id:
period_start_timestamp = periods[period_id - 1].start_timestamp
period_offset = (
period_start_timestamp
- period_minutes_offset_mapping[period_id]
)
periods[-1] = replace(
periods[-1],
end_timestamp=period_offset
+ timedelta(
seconds=float(
raw_event["second"] + raw_event["minute"] * 60
)
),
)
return periods


class WyscoutDeserializerV3(EventDataDeserializer[WyscoutInputs]):
@property
def provider(self) -> Provider:
Expand All @@ -722,7 +764,6 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
if "id" not in event:
event["id"] = event["type"]["primary"]

periods = []
# start timestamps are fixed
start_ts = {
1: timedelta(minutes=0),
Expand Down Expand Up @@ -771,18 +812,21 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
"shortName"
)

periods = create_periods(raw_events, start_ts)

events = []

next_pass_is_kickoff = False
for idx, raw_event in enumerate(raw_events["events"]):
next_event = None
next_period_id = None
ball_owning_team = None
if (idx + 1) < len(raw_events["events"]):
next_event = raw_events["events"][idx + 1]
next_period_id = _parse_period_id(
next_event["matchPeriod"]
)

if raw_event["possession"]:
ball_owning_team = teams[
str(raw_event["possession"]["team"]["id"])
]
if (
idx == 0
or raw_event["matchPeriod"]
Expand All @@ -796,35 +840,6 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
player_id = str(raw_event["player"]["id"])
period_id = _parse_period_id(raw_event["matchPeriod"])

if len(periods) == 0 or periods[-1].id != period_id:
periods.append(
Period(
id=period_id,
start_timestamp=(
timedelta(seconds=0)
if len(periods) == 0
else periods[-1].end_timestamp
),
end_timestamp=None,
)
)

if next_period_id != period_id:
periods[-1] = replace(
periods[-1],
end_timestamp=timedelta(
seconds=float(
raw_event["second"] + raw_event["minute"] * 60
)
),
)

ball_owning_team = None
if raw_event["possession"]:
ball_owning_team = teams[
str(raw_event["possession"]["team"]["id"])
]

if player_id == INVALID_PLAYER:
player = None
elif player_id not in players[team_id]:
Expand Down Expand Up @@ -853,7 +868,7 @@ def deserialize(self, inputs: WyscoutInputs) -> EventDataset:
"player": player,
"ball_owning_team": ball_owning_team,
"ball_state": None,
"period": periods[-1],
"period": periods[period_id - 1],
"timestamp": _create_timestamp_timedelta(
raw_event, start_ts, period_id
),
Expand Down
15 changes: 14 additions & 1 deletion kloppy/tests/test_wyscout.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ def test_metadata(self, dataset: EventDataset):
)
assert dataset.metadata.periods[1].end_timestamp == timedelta(
minutes=45, seconds=5
) + timedelta(minutes=46, seconds=53)
) + timedelta(minutes=46, seconds=58)

assert (
dataset.metadata.teams[0].starting_formation
Expand All @@ -206,6 +206,17 @@ def test_metadata(self, dataset: EventDataset):
== FormationType.FOUR_THREE_ONE_TWO
)

second_period_end_time = Time(
period=dataset.metadata.periods[1],
timestamp=timedelta(seconds=2818),
)
assert (
dataset.metadata.teams[1]
.formations.items.keys()[0]
.period.end_time
== second_period_end_time
)

cr7 = dataset.metadata.teams[0].get_player_by_id("3322")

assert cr7.full_name == "Cristiano Ronaldo dos Santos Aveiro"
Expand All @@ -231,8 +242,10 @@ def test_enriched_metadata(self, dataset: EventDataset):
def test_timestamps(self, dataset: EventDataset):
kickoff_p1 = dataset.get_event_by_id(1927028854)
assert kickoff_p1.timestamp == timedelta(minutes=0, seconds=3)
assert kickoff_p1.time.period.id == 1
kickoff_p2 = dataset.get_event_by_id(1927029460)
assert kickoff_p2.timestamp == timedelta(minutes=0, seconds=0)
assert kickoff_p2.time.period.id == 2

def test_coordinates(self, dataset: EventDataset):
assert dataset.records[2].coordinates == Point(32.0, 56.0)
Expand Down
Loading