Goal Expectancy

Estimates the number of goals the bookmaker expects each team to score based on the home, draw, away probabilities

[1]:
import sys

sys.path.append("../../")

import penaltyblog as pb
import pandas as pd
WARNING (aesara.tensor.blas): Using NumPy C-API based implementation for BLAS functions.

Get data from football-data.co.uk

[2]:
fb = pb.scrapers.FootballData("ENG Premier League", "2019-2020")
df = fb.get_fixtures()

cols = ["team_home", "team_away", "psh", "psd", "psa"]
df = df[cols]

df.head()
[2]:
team_home team_away psh psd psa
id
1565308800---liverpool---norwich Liverpool Norwich 1.15 9.59 18.05
1565395200---bournemouth---sheffield_united Bournemouth Sheffield United 2.04 3.57 3.90
1565395200---burnley---southampton Burnley Southampton 2.71 3.31 2.81
1565395200---crystal_palace---everton Crystal Palace Everton 3.21 3.37 2.39
1565395200---tottenham---aston_villa Tottenham Aston Villa 1.30 5.84 10.96

Remove the overround from the odds

[3]:
def remove_overround(row):
    odds = [
        row["psh"],
        row["psd"],
        row["psa"],
    ]

    odds = pb.implied.differential_margin_weighting(odds)

    return pd.Series(odds["implied_probabilities"])

df[["home", "draw", "away"]] = df.apply(remove_overround, axis=1)

df.head()
[3]:
team_home team_away psh psd psa home draw away
id
1565308800---liverpool---norwich Liverpool Norwich 1.15 9.59 18.05 0.859818 0.094528 0.045654
1565395200---bournemouth---sheffield_united Bournemouth Sheffield United 2.04 3.57 3.90 0.481290 0.271206 0.247504
1565395200---burnley---southampton Burnley Southampton 2.71 3.31 2.81 0.360007 0.293118 0.346875
1565395200---crystal_palace---everton Crystal Palace Everton 3.21 3.37 2.39 0.302636 0.287845 0.409519
1565395200---tottenham---aston_villa Tottenham Aston Villa 1.30 5.84 10.96 0.758663 0.160665 0.080673

Get the goal expectancy

[4]:
output = list()
for idx, row in df.head().iterrows():
    res = pb.models.goal_expectancy(row["home"], row["draw"], row["away"])

    tmp = {
        "team_home": row["team_home"],
        "team_away": row["team_away"],
        "home_expectancy": res["home_exp"],
        "away_expectancy": res["away_exp"],
        "success": res["success"],
        "error": res["error"]
    }

    output.append(tmp)

output = pd.DataFrame(output)
output
[4]:
team_home team_away home_expectancy away_expectancy success error
0 Liverpool Norwich 3.219932 0.667064 True 5.381783e-09
1 Bournemouth Sheffield United 1.403588 0.923020 True 5.932290e-11
2 Burnley Southampton 1.099876 1.073906 True 1.595469e-11
3 Crystal Palace Everton 0.999968 1.213245 True 6.410598e-11
4 Tottenham Aston Villa 2.312856 0.607438 True 1.123880e-10