End-to-end Example#
[3]:
import penaltyblog as pb
import pandas as pd
Download data from football-data.co.uk#
[ ]:
df = pd.concat([
pb.scrapers.FootballData("ENG Premier League", "2021-2022").get_fixtures(),
pb.scrapers.FootballData("ENG Premier League", "2022-2023").get_fixtures(),
pb.scrapers.FootballData("ENG Premier League", "2023-2024").get_fixtures(),
])
df.head()
| date | datetime | season | competition | div | time | team_home | team_away | fthg | ftag | ... | b365_cahh | b365_caha | pcahh | pcaha | max_cahh | max_caha | avg_cahh | avg_caha | goals_home | goals_away | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| id | |||||||||||||||||||||
| 1628812800---brentford---arsenal | 2021-08-13 | 2021-08-13 20:00:00 | 2021-2022 | ENG Premier League | E0 | 20:00 | Brentford | Arsenal | 2 | 0 | ... | 1.75 | 2.05 | 1.81 | 2.13 | 2.05 | 2.17 | 1.80 | 2.09 | 2 | 0 |
| 1628899200---burnley---brighton | 2021-08-14 | 2021-08-14 15:00:00 | 2021-2022 | ENG Premier League | E0 | 15:00 | Burnley | Brighton | 1 | 2 | ... | 1.79 | 2.15 | 1.81 | 2.14 | 1.82 | 2.19 | 1.79 | 2.12 | 1 | 2 |
| 1628899200---chelsea---crystal_palace | 2021-08-14 | 2021-08-14 15:00:00 | 2021-2022 | ENG Premier League | E0 | 15:00 | Chelsea | Crystal Palace | 3 | 0 | ... | 2.05 | 1.75 | 2.12 | 1.81 | 2.16 | 1.93 | 2.06 | 1.82 | 3 | 0 |
| 1628899200---everton---southampton | 2021-08-14 | 2021-08-14 15:00:00 | 2021-2022 | ENG Premier League | E0 | 15:00 | Everton | Southampton | 3 | 1 | ... | 2.05 | 1.88 | 2.05 | 1.88 | 2.08 | 1.90 | 2.03 | 1.86 | 3 | 1 |
| 1628899200---leicester---wolves | 2021-08-14 | 2021-08-14 15:00:00 | 2021-2022 | ENG Premier League | E0 | 15:00 | Leicester | Wolves | 1 | 0 | ... | 2.02 | 1.91 | 2.01 | 1.92 | 2.05 | 1.95 | 1.99 | 1.89 | 1 | 0 |
5 rows × 111 columns
Create time-decay weights so more recent data have more influence on the model’s fit#
[5]:
xi = 0.001
weights = pb.models.dixon_coles_weights(df["date"], xi=xi)
Build the model’s inputs#
[6]:
gh = df["goals_home"].values
ga = df["goals_away"].values
th = df["team_home"].values
ta = df["team_away"].values
Fit a Dixon & Coles model#
use_gradient=Truefor faster convergenceminimizer_optionspass directly toscipy.optimize.minimize(options=...)
[16]:
model = pb.models.DixonColesGoalModel(gh, ga, th, ta, weights=weights)
model.fit(
use_gradient=True, # optional; can be False for back-compat
minimizer_options={ # optional; passes to `scipy.optimize.minimize`
"maxiter": 3000, # more iterations if needed
"gtol": 1e-8, # gradient tolerance
"ftol": 1e-9, # function tolerance
"disp": False, # silence optimiser output
}
)
print("Fitted:", model.fitted)
print("Log-likelihood:", model.loglikelihood)
print("AIC:", model.aic)
print("Number of params:", model.n_params)
print("First few params:", list(model.params.items())[:5])
Fitted: True
Log-likelihood: -2164.74110313536
AIC: 4433.48220627072
Number of params: 52
First few params: [('attack_Arsenal', np.float64(1.4615514952216384)), ('attack_Aston Villa', np.float64(1.184047741206666)), ('attack_Bournemouth', np.float64(0.9147685302294655)), ('attack_Brentford', np.float64(1.0526808596367745)), ('attack_Brighton', np.float64(1.090265129154662))]
Predict a specific fixture and get a FootballProbabilityGrid back#
[20]:
home_team = "Man City"
away_team = "Liverpool"
# By default: max_goals=15 and normalize=True
pred = model.predict(home_team, away_team, max_goals=15, normalize=True)
Core markets (1x2)#
[21]:
print("P(Home win), P(Draw), P(Away win):", pred.home_draw_away)
print("P(Home win):", pred.home_win)
print("P(Draw):", pred.draw)
print("P(Away win):", pred.away_win)
P(Home win), P(Draw), P(Away win): [0.5679744323673176, 0.21833778831586806, 0.21368777931681426]
P(Home win): 0.5679744323673176
P(Draw): 0.21833778831586806
P(Away win): 0.21368777931681426
Goal Expectancy#
[22]:
print("Home xG:", pred.home_goal_expectation)
print("Away xG:", pred.away_goal_expectation)
Home xG: 1.9358658956305108
Away xG: 1.103726188219536
BTTS#
[ ]:
print("BTTS (Yes):", pred.btts_yes)
print("BTTS (No):", pred.btts_no)
BTTS (Yes): 0.5713026487032481
BTTS (No): 0.4286973512967519
Totals: Over/Under with push handling#
[24]:
# Integer line (push possible)
u, p, o = pred.totals(2.0)
print("Totals 2.0 -> Under, Push, Over:", (u, p, o))
# Half line (no push)
u, p, o = pred.totals(2.5)
print("Totals 2.5 -> Under, Push, Over:", (u, p, o))
# Back-compat helper (no push returned)
print("P(Over 2.5):", pred.total_goals("over", 2.5))
Totals 2.0 -> Under, Push, Over: (0.1939317196938681, 0.22044686250601725, 0.5856214178001146)
Totals 2.5 -> Under, Push, Over: (0.4143785821998854, 0.0, 0.5856214178001146)
P(Over 2.5): 0.5856214178001146
Asian handicap (integer / half / quarter lines)#
[25]:
print("AH Home -0.5 (win prob only):", pred.asian_handicap("home", -0.5))
print("AH Home -0.25 (Win/Push/Lose):", pred.asian_handicap_probs("home", -0.25))
print("AH Away +1.0 (Win/Push/Lose):", pred.asian_handicap_probs("away", +1.0))
AH Home -0.5 (win prob only): 0.7863122206831856
AH Home -0.25 (Win/Push/Lose): {'win': 0.6771433265252516, 'push': 0.10916889415793403, 'lose': 0.21368777931681426}
AH Away +1.0 (Win/Push/Lose): {'win': 0.07973956221991603, 'push': 0.13394821709689822, 'lose': 0.7863122206831856}
Double chance & Draw No Bet#
[26]:
print("Double chance 1X:", pred.double_chance_1x)
print("Double chance X2:", pred.double_chance_x2)
print("Double chance 12:", pred.double_chance_12)
print("DNB Home (conditional win prob):", pred.draw_no_bet_home)
print("DNB Away (conditional win prob):", pred.draw_no_bet_away)
Double chance 1X: 0.7863122206831856
Double chance X2: 0.4320255676326823
Double chance 12: 0.7816622116841319
DNB Home (conditional win prob): 0.7266238841757325
DNB Away (conditional win prob): 0.27337611582426735
Exact scores & distributions#
[27]:
print("P(Exact score 2-1):", pred.exact_score(2, 1))
print("Home goal distribution (P(H=k)):", pred.home_goal_distribution())
print("Away goal distribution (P(A=k)):", pred.away_goal_distribution())
print("Total goals distribution (P(T=k)):", pred.total_goals_distribution())
P(Exact score 2-1): 0.09897005717496646
Home goal distribution (P(H=k)): [1.44299267e-01 2.79344030e-01 2.70386290e-01 1.74477199e-01
8.44411149e-02 3.26933349e-02 1.05483187e-02 2.91716148e-03
7.05904178e-04 1.51837314e-04 2.93936677e-05 5.17292718e-06
8.34507775e-07 1.24268857e-07 1.71834173e-08]
Away goal distribution (P(A=k)): [3.31633051e-01 3.66032084e-01 2.01999598e-01 7.43174155e-02
2.05065194e-02 4.52671651e-03 8.32709259e-04 1.31297574e-04
1.81145713e-05 2.22150297e-06 2.45193101e-07 2.46023679e-08
2.26285648e-09 1.92121073e-10 1.51463614e-11]
Total goals distribution (P(T=k)): [4.72349671e-02 1.46696753e-01 2.20446863e-01 2.23983794e-01
1.70204841e-01 1.03470658e-01 5.24180987e-02 2.27613768e-02
8.64816260e-03 2.92076518e-03 8.87793471e-04 2.45320910e-04
6.21396246e-05 1.45291624e-05 3.15448050e-06 6.38486656e-07
1.20534896e-07 2.11560541e-08 3.43503914e-09 5.13555845e-10
7.04936725e-11 8.87598896e-12 1.02559236e-12 1.08826264e-13
1.05979703e-14 9.42632896e-16 7.53980297e-17 5.18351760e-18
2.60266249e-19]
Team-centric analytics#
[28]:
print("Home win to nil:", pred.win_to_nil_home())
print("Away win to nil:", pred.win_to_nil_away())
print("Expected points (Home):", pred.expected_points_home())
print("Expected points (Away):", pred.expected_points_away())
Home win to nil: 0.2843980843256169
Away win to nil: 0.09706429990582308
Expected points (Home): 1.922261085417821
Expected points (Away): 0.8594011262663108
Normalisation & truncation notes#
[32]:
print("Grid sum (should be 1.0 if normalize=True):", round(pred.grid.sum(), 6))
# If you need to audit truncation effects, you can re-run with normalize=False
pred_raw = model.predict(home_team, away_team, max_goals=15, normalize=False)
print("Raw grid sum (<= 1.0):", pred_raw.grid.sum())
Grid sum (should be 1.0 if normalize=True): 1.0
Raw grid sum (<= 1.0): 0.9999999974786222
Save & load a fitted model#
[34]:
file_path = "/tmp/dixon_coles.pkl"
model.save(file_path)
loaded = pb.models.DixonColesGoalModel.load(file_path)
pred2 = loaded.predict(home_team, away_team) # same API
print("Loaded model 1X2:", pred2.home_draw_away)
Loaded model 1X2: [0.5679744323673176, 0.21833778831586806, 0.21368777931681426]