{ "cells": [ { "cell_type": "markdown", "id": "81215b75-f5f8-4c17-9cd4-08b1b0ed4234", "metadata": {}, "source": [ "# End-to-end Example\n", "\n", "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1GjrDG_iq_9_lxEQK_aBmr-jCCCnFt0v7?usp=sharing)" ] }, { "cell_type": "code", "execution_count": 3, "id": "1f931497-c1f9-4cb4-969a-058676e42a24", "metadata": { "tags": [] }, "outputs": [], "source": [ "import penaltyblog as pb\n", "import pandas as pd" ] }, { "cell_type": "markdown", "id": "4a1b5c76-8f47-4f59-8351-d5add2f69309", "metadata": {}, "source": [ "## Download data from football-data.co.uk" ] }, { "cell_type": "code", "execution_count": null, "id": "949b129d-e4e5-4975-8318-dd601d918e90", "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datedatetimeseasoncompetitiondivtimeteam_hometeam_awayfthgftag...b365_cahhb365_cahapcahhpcahamax_cahhmax_cahaavg_cahhavg_cahagoals_homegoals_away
id
1628812800---brentford---arsenal2021-08-132021-08-13 20:00:002021-2022ENG Premier LeagueE020:00BrentfordArsenal20...1.752.051.812.132.052.171.802.0920
1628899200---burnley---brighton2021-08-142021-08-14 15:00:002021-2022ENG Premier LeagueE015:00BurnleyBrighton12...1.792.151.812.141.822.191.792.1212
1628899200---chelsea---crystal_palace2021-08-142021-08-14 15:00:002021-2022ENG Premier LeagueE015:00ChelseaCrystal Palace30...2.051.752.121.812.161.932.061.8230
1628899200---everton---southampton2021-08-142021-08-14 15:00:002021-2022ENG Premier LeagueE015:00EvertonSouthampton31...2.051.882.051.882.081.902.031.8631
1628899200---leicester---wolves2021-08-142021-08-14 15:00:002021-2022ENG Premier LeagueE015:00LeicesterWolves10...2.021.912.011.922.051.951.991.8910
\n", "

5 rows × 111 columns

\n", "
" ], "text/plain": [ " date datetime \\\n", "id \n", "1628812800---brentford---arsenal 2021-08-13 2021-08-13 20:00:00 \n", "1628899200---burnley---brighton 2021-08-14 2021-08-14 15:00:00 \n", "1628899200---chelsea---crystal_palace 2021-08-14 2021-08-14 15:00:00 \n", "1628899200---everton---southampton 2021-08-14 2021-08-14 15:00:00 \n", "1628899200---leicester---wolves 2021-08-14 2021-08-14 15:00:00 \n", "\n", " season competition div \\\n", "id \n", "1628812800---brentford---arsenal 2021-2022 ENG Premier League E0 \n", "1628899200---burnley---brighton 2021-2022 ENG Premier League E0 \n", "1628899200---chelsea---crystal_palace 2021-2022 ENG Premier League E0 \n", "1628899200---everton---southampton 2021-2022 ENG Premier League E0 \n", "1628899200---leicester---wolves 2021-2022 ENG Premier League E0 \n", "\n", " time team_home team_away fthg \\\n", "id \n", "1628812800---brentford---arsenal 20:00 Brentford Arsenal 2 \n", "1628899200---burnley---brighton 15:00 Burnley Brighton 1 \n", "1628899200---chelsea---crystal_palace 15:00 Chelsea Crystal Palace 3 \n", "1628899200---everton---southampton 15:00 Everton Southampton 3 \n", "1628899200---leicester---wolves 15:00 Leicester Wolves 1 \n", "\n", " ftag ... b365_cahh b365_caha pcahh \\\n", "id ... \n", "1628812800---brentford---arsenal 0 ... 1.75 2.05 1.81 \n", "1628899200---burnley---brighton 2 ... 1.79 2.15 1.81 \n", "1628899200---chelsea---crystal_palace 0 ... 2.05 1.75 2.12 \n", "1628899200---everton---southampton 1 ... 2.05 1.88 2.05 \n", "1628899200---leicester---wolves 0 ... 2.02 1.91 2.01 \n", "\n", " pcaha max_cahh max_caha avg_cahh \\\n", "id \n", "1628812800---brentford---arsenal 2.13 2.05 2.17 1.80 \n", "1628899200---burnley---brighton 2.14 1.82 2.19 1.79 \n", "1628899200---chelsea---crystal_palace 1.81 2.16 1.93 2.06 \n", "1628899200---everton---southampton 1.88 2.08 1.90 2.03 \n", "1628899200---leicester---wolves 1.92 2.05 1.95 1.99 \n", "\n", " avg_caha goals_home goals_away \n", "id \n", "1628812800---brentford---arsenal 2.09 2 0 \n", "1628899200---burnley---brighton 2.12 1 2 \n", "1628899200---chelsea---crystal_palace 1.82 3 0 \n", "1628899200---everton---southampton 1.86 3 1 \n", "1628899200---leicester---wolves 1.89 1 0 \n", "\n", "[5 rows x 111 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.concat([\n", " pb.scrapers.FootballData(\"ENG Premier League\", \"2021-2022\").get_fixtures(),\n", " pb.scrapers.FootballData(\"ENG Premier League\", \"2022-2023\").get_fixtures(),\n", " pb.scrapers.FootballData(\"ENG Premier League\", \"2023-2024\").get_fixtures(),\n", "])\n", "\n", "df.head()" ] }, { "cell_type": "markdown", "id": "48a049b8", "metadata": {}, "source": [ "## Create time-decay weights so more recent data have more influence on the model's fit" ] }, { "cell_type": "code", "execution_count": 5, "id": "7d39d92f-6fa0-4a2a-8a48-22d214e38efc", "metadata": { "tags": [] }, "outputs": [], "source": [ "xi = 0.001\n", "weights = pb.models.dixon_coles_weights(df[\"date\"], xi=xi)" ] }, { "cell_type": "markdown", "id": "63a12589-0066-431f-8444-92e2944b55a4", "metadata": {}, "source": [ "## Build the model's inputs" ] }, { "cell_type": "code", "execution_count": 6, "id": "1831867d-c26e-4970-9586-a9e23e75cfed", "metadata": { "tags": [] }, "outputs": [], "source": [ "gh = df[\"goals_home\"].values\n", "ga = df[\"goals_away\"].values\n", "th = df[\"team_home\"].values\n", "ta = df[\"team_away\"].values" ] }, { "cell_type": "markdown", "id": "7842a0b7", "metadata": {}, "source": [ "## Fit a Dixon & Coles model\n", "\n", "- `use_gradient=True` for faster convergence\n", "- `minimizer_options` pass directly to `scipy.optimize.minimize(options=...)`" ] }, { "cell_type": "code", "execution_count": 16, "id": "fc93ec32-d113-4155-a516-abfe58dc8469", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitted: True\n", "Log-likelihood: -2164.74110313536\n", "AIC: 4433.48220627072\n", "Number of params: 52\n", "First few params: [('attack_Arsenal', np.float64(1.4615514952216384)), ('attack_Aston Villa', np.float64(1.184047741206666)), ('attack_Bournemouth', np.float64(0.9147685302294655)), ('attack_Brentford', np.float64(1.0526808596367745)), ('attack_Brighton', np.float64(1.090265129154662))]\n" ] } ], "source": [ "model = pb.models.DixonColesGoalModel(gh, ga, th, ta, weights=weights)\n", "\n", "model.fit(\n", " use_gradient=True, # optional; can be False for back-compat\n", " minimizer_options={ # optional; passes to `scipy.optimize.minimize`\n", " \"maxiter\": 3000, # more iterations if needed\n", " \"gtol\": 1e-8, # gradient tolerance\n", " \"ftol\": 1e-9, # function tolerance\n", " \"disp\": False, # silence optimiser output\n", " }\n", ")\n", "\n", "print(\"Fitted:\", model.fitted)\n", "print(\"Log-likelihood:\", model.loglikelihood)\n", "print(\"AIC:\", model.aic)\n", "print(\"Number of params:\", model.n_params)\n", "print(\"First few params:\", list(model.params.items())[:5])" ] }, { "cell_type": "markdown", "id": "43bb1f12-7010-421b-bf93-bb8e1dba2df6", "metadata": {}, "source": [ "## Predict a specific fixture and get a `FootballProbabilityGrid` back" ] }, { "cell_type": "code", "execution_count": 20, "id": "3a047b77-707d-46b6-bcf8-57f3356efee3", "metadata": { "tags": [] }, "outputs": [], "source": [ "home_team = \"Man City\"\n", "away_team = \"Liverpool\"\n", "\n", "# By default: max_goals=15 and normalize=True\n", "pred = model.predict(home_team, away_team, max_goals=15, normalize=True)" ] }, { "cell_type": "markdown", "id": "2a5274e7-d13e-455b-8e77-a6f51ba6f830", "metadata": {}, "source": [ "### Core markets (1x2)" ] }, { "cell_type": "code", "execution_count": 21, "id": "cc1d6199-c35e-4ea3-bf82-a89c31a7277d", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "P(Home win), P(Draw), P(Away win): [0.5679744323673176, 0.21833778831586806, 0.21368777931681426]\n", "P(Home win): 0.5679744323673176\n", "P(Draw): 0.21833778831586806\n", "P(Away win): 0.21368777931681426\n" ] } ], "source": [ "print(\"P(Home win), P(Draw), P(Away win):\", pred.home_draw_away)\n", "print(\"P(Home win):\", pred.home_win)\n", "print(\"P(Draw):\", pred.draw)\n", "print(\"P(Away win):\", pred.away_win)" ] }, { "cell_type": "markdown", "id": "58652620", "metadata": {}, "source": [ "## Goal Expectancy" ] }, { "cell_type": "code", "execution_count": 22, "id": "eef96983-d83d-4c39-bd49-47cb4a704ab4", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Home xG: 1.9358658956305108\n", "Away xG: 1.103726188219536\n" ] } ], "source": [ "print(\"Home xG:\", pred.home_goal_expectation)\n", "print(\"Away xG:\", pred.away_goal_expectation)" ] }, { "cell_type": "markdown", "id": "c1cf6c2c", "metadata": {}, "source": [ "## BTTS" ] }, { "cell_type": "code", "execution_count": null, "id": "e08561b2-07ed-47b3-89d7-14c0a05cf854", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "BTTS (Yes): 0.5713026487032481\n", "BTTS (No): 0.4286973512967519\n" ] } ], "source": [ "print(\"BTTS (Yes):\", pred.btts_yes)\n", "print(\"BTTS (No):\", pred.btts_no)" ] }, { "cell_type": "markdown", "id": "6804463e", "metadata": {}, "source": [ "## Totals: Over/Under with push handling" ] }, { "cell_type": "code", "execution_count": 24, "id": "594e21a7-9a75-49a3-b3e8-50fa4bd8ac51", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Totals 2.0 -> Under, Push, Over: (0.1939317196938681, 0.22044686250601725, 0.5856214178001146)\n", "Totals 2.5 -> Under, Push, Over: (0.4143785821998854, 0.0, 0.5856214178001146)\n", "P(Over 2.5): 0.5856214178001146\n" ] } ], "source": [ "# Integer line (push possible)\n", "u, p, o = pred.totals(2.0)\n", "print(\"Totals 2.0 -> Under, Push, Over:\", (u, p, o))\n", "# Half line (no push)\n", "u, p, o = pred.totals(2.5)\n", "print(\"Totals 2.5 -> Under, Push, Over:\", (u, p, o))\n", "# Back-compat helper (no push returned)\n", "print(\"P(Over 2.5):\", pred.total_goals(\"over\", 2.5))\n" ] }, { "cell_type": "markdown", "id": "b1147b45", "metadata": {}, "source": [ "## Asian handicap (integer / half / quarter lines)" ] }, { "cell_type": "code", "execution_count": 25, "id": "6f880286", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AH Home -0.5 (win prob only): 0.7863122206831856\n", "AH Home -0.25 (Win/Push/Lose): {'win': 0.6771433265252516, 'push': 0.10916889415793403, 'lose': 0.21368777931681426}\n", "AH Away +1.0 (Win/Push/Lose): {'win': 0.07973956221991603, 'push': 0.13394821709689822, 'lose': 0.7863122206831856}\n" ] } ], "source": [ "print(\"AH Home -0.5 (win prob only):\", pred.asian_handicap(\"home\", -0.5))\n", "print(\"AH Home -0.25 (Win/Push/Lose):\", pred.asian_handicap_probs(\"home\", -0.25))\n", "print(\"AH Away +1.0 (Win/Push/Lose):\", pred.asian_handicap_probs(\"away\", +1.0))" ] }, { "cell_type": "markdown", "id": "9996be1b-acf8-4305-9bf0-6e4832505d47", "metadata": {}, "source": [ "## Double chance & Draw No Bet" ] }, { "cell_type": "code", "execution_count": 26, "id": "8da5ea91-ff28-4c6d-b6bf-0d5ef417da2b", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Double chance 1X: 0.7863122206831856\n", "Double chance X2: 0.4320255676326823\n", "Double chance 12: 0.7816622116841319\n", "DNB Home (conditional win prob): 0.7266238841757325\n", "DNB Away (conditional win prob): 0.27337611582426735\n" ] } ], "source": [ "print(\"Double chance 1X:\", pred.double_chance_1x)\n", "print(\"Double chance X2:\", pred.double_chance_x2)\n", "print(\"Double chance 12:\", pred.double_chance_12)\n", "print(\"DNB Home (conditional win prob):\", pred.draw_no_bet_home)\n", "print(\"DNB Away (conditional win prob):\", pred.draw_no_bet_away)" ] }, { "cell_type": "markdown", "id": "5a0876d3-9d69-4b63-ae8a-d2b3b8f40aa6", "metadata": {}, "source": [ "## Exact scores & distributions" ] }, { "cell_type": "code", "execution_count": 27, "id": "280e7570-5010-4b39-8104-71ca27e4005a", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "P(Exact score 2-1): 0.09897005717496646\n", "Home goal distribution (P(H=k)): [1.44299267e-01 2.79344030e-01 2.70386290e-01 1.74477199e-01\n", " 8.44411149e-02 3.26933349e-02 1.05483187e-02 2.91716148e-03\n", " 7.05904178e-04 1.51837314e-04 2.93936677e-05 5.17292718e-06\n", " 8.34507775e-07 1.24268857e-07 1.71834173e-08]\n", "Away goal distribution (P(A=k)): [3.31633051e-01 3.66032084e-01 2.01999598e-01 7.43174155e-02\n", " 2.05065194e-02 4.52671651e-03 8.32709259e-04 1.31297574e-04\n", " 1.81145713e-05 2.22150297e-06 2.45193101e-07 2.46023679e-08\n", " 2.26285648e-09 1.92121073e-10 1.51463614e-11]\n", "Total goals distribution (P(T=k)): [4.72349671e-02 1.46696753e-01 2.20446863e-01 2.23983794e-01\n", " 1.70204841e-01 1.03470658e-01 5.24180987e-02 2.27613768e-02\n", " 8.64816260e-03 2.92076518e-03 8.87793471e-04 2.45320910e-04\n", " 6.21396246e-05 1.45291624e-05 3.15448050e-06 6.38486656e-07\n", " 1.20534896e-07 2.11560541e-08 3.43503914e-09 5.13555845e-10\n", " 7.04936725e-11 8.87598896e-12 1.02559236e-12 1.08826264e-13\n", " 1.05979703e-14 9.42632896e-16 7.53980297e-17 5.18351760e-18\n", " 2.60266249e-19]\n" ] } ], "source": [ "print(\"P(Exact score 2-1):\", pred.exact_score(2, 1))\n", "print(\"Home goal distribution (P(H=k)):\", pred.home_goal_distribution())\n", "print(\"Away goal distribution (P(A=k)):\", pred.away_goal_distribution())\n", "print(\"Total goals distribution (P(T=k)):\", pred.total_goals_distribution())" ] }, { "cell_type": "markdown", "id": "f1205e38-8afc-45fc-ba5f-59292aad9e21", "metadata": {}, "source": [ "## Team-centric analytics" ] }, { "cell_type": "code", "execution_count": 28, "id": "1b63af09-9383-4c5a-ae1c-dadb1a57193a", "metadata": { "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Home win to nil: 0.2843980843256169\n", "Away win to nil: 0.09706429990582308\n", "Expected points (Home): 1.922261085417821\n", "Expected points (Away): 0.8594011262663108\n" ] } ], "source": [ "print(\"Home win to nil:\", pred.win_to_nil_home())\n", "print(\"Away win to nil:\", pred.win_to_nil_away())\n", "print(\"Expected points (Home):\", pred.expected_points_home())\n", "print(\"Expected points (Away):\", pred.expected_points_away())" ] }, { "cell_type": "markdown", "id": "daa72793", "metadata": {}, "source": [ "## Normalisation & truncation notes" ] }, { "cell_type": "code", "execution_count": 32, "id": "d5e49496", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Grid sum (should be 1.0 if normalize=True): 1.0\n", "Raw grid sum (<= 1.0): 0.9999999974786222\n" ] } ], "source": [ "print(\"Grid sum (should be 1.0 if normalize=True):\", round(pred.grid.sum(), 6))\n", "\n", "# If you need to audit truncation effects, you can re-run with normalize=False\n", "pred_raw = model.predict(home_team, away_team, max_goals=15, normalize=False)\n", "print(\"Raw grid sum (<= 1.0):\", pred_raw.grid.sum())" ] }, { "cell_type": "markdown", "id": "657bb86c", "metadata": {}, "source": [ "## Save & load a fitted model" ] }, { "cell_type": "code", "execution_count": 34, "id": "05b51dad", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loaded model 1X2: [0.5679744323673176, 0.21833778831586806, 0.21368777931681426]\n" ] } ], "source": [ "file_path = \"/tmp/dixon_coles.pkl\"\n", "\n", "model.save(file_path)\n", "\n", "loaded = pb.models.DixonColesGoalModel.load(file_path)\n", "pred2 = loaded.predict(home_team, away_team) # same API\n", "print(\"Loaded model 1X2:\", pred2.home_draw_away)" ] } ], "metadata": { "kernelspec": { "display_name": "venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.13.1" } }, "nbformat": 4, "nbformat_minor": 5 }