feat(cli): migrate build_season_schedule and compute_ratings to typer CLI

- add typer-based CLI to build_season_schedule.py for structured option handling
- refactor compute_ratings.py to remove argparse and support typer CLI
- improve typing and option descriptions in compute_ratings.py main function
- add .gitignore entry for __pycache__
- add requirements.txt with dependencies for the project
This commit is contained in:
2025-08-29 16:14:50 -05:00
parent 5cecc6e280
commit c541c3fc51
4 changed files with 76 additions and 69 deletions

1
.gitignore vendored
View File

@@ -1,2 +1,3 @@
/*.csv /*.csv
/*.numbers /*.numbers
**/__pycache__

View File

@@ -30,6 +30,7 @@ from urllib.parse import urlencode
import requests import requests
from bs4 import BeautifulSoup from bs4 import BeautifulSoup
from dateutil import parser as dtp from dateutil import parser as dtp
import typer
# ----------------- logging ----------------- # ----------------- logging -----------------
logging.basicConfig( logging.basicConfig(
@@ -264,16 +265,14 @@ def fetch_game_time(game_id: str, session: requests.Session) -> Optional[str]:
return None return None
# ----------------- build & merge ----------------- # ----------------- build & merge -----------------
def main(): def main(
ap = argparse.ArgumentParser(description="Build a deduped season schedule with IDs, winners/losers, runs, and times.") subseason: str = typer.Option(..., help="Subseason ID, e.g. 942425"),
ap.add_argument("--subseason", required=True, help="Subseason ID, e.g. 942425") teams: str = typer.Option(..., help="Path to teams.json (array with team_id, team_slug, instance_id, teamName)"),
ap.add_argument("--teams", required=True, help="Path to teams.json (array with team_id, team_slug, instance_id, teamName)") out: str = typer.Option("season_schedule.csv", help="Output CSV path"),
ap.add_argument("--out", default="season_schedule.csv", help="Output CSV path") fetch_time: bool = typer.Option(False, help="Fetch game time from /game/show/<id>"),
ap.add_argument("--fetch-time", action="store_true", help="Fetch game time from /game/show/<id>") sleep: float = typer.Option(0.35, help="Delay between requests (seconds)")
ap.add_argument("--sleep", type=float, default=0.35, help="Delay between requests (seconds)") ):
args = ap.parse_args() by_instance, by_slug, by_norm = load_teams(teams)
by_instance, by_slug, by_norm = load_teams(args.teams)
instance_ids = sorted(by_instance.keys()) instance_ids = sorted(by_instance.keys())
session = requests.Session() session = requests.Session()
@@ -283,8 +282,8 @@ def main():
raw: List[dict] = [] raw: List[dict] = []
for i, iid in enumerate(instance_ids, 1): for i, iid in enumerate(instance_ids, 1):
logging.info(f"[{i}/{len(instance_ids)}] Fetching schedule for instance {iid}") logging.info(f"[{i}/{len(instance_ids)}] Fetching schedule for instance {iid}")
raw.extend(parse_printable(iid, args.subseason, session=session)) raw.extend(parse_printable(iid, subseason, session=session))
time.sleep(args.sleep) # be polite time.sleep(sleep) # be polite
def rec_from_instance(iid: str) -> Optional[TeamRec]: def rec_from_instance(iid: str) -> Optional[TeamRec]:
return by_instance.get(iid) return by_instance.get(iid)
@@ -407,7 +406,7 @@ def main():
# -------- NEW: fetch game start time from game page -------- # -------- NEW: fetch game start time from game page --------
time_local = "" time_local = ""
if args.fetch_time and game_id: if fetch_time and game_id:
if game_id in time_cache: if game_id in time_cache:
tval = time_cache[game_id] tval = time_cache[game_id]
else: else:
@@ -415,8 +414,7 @@ def main():
tval = fetch_game_time(game_id, session=session) tval = fetch_game_time(game_id, session=session)
time_cache[game_id] = tval time_cache[game_id] = tval
if tval is None: if tval is None:
# small backoff to be nice if many misses time.sleep(min(sleep * 2, 1.0))
time.sleep(min(args.sleep * 2, 1.0))
if tval: if tval:
time_local = tval time_local = tval
@@ -452,13 +450,13 @@ def main():
"loser_slug","loser_instance","loser_id", "loser_slug","loser_instance","loser_id",
"location","status","game_id","source_urls", "location","status","game_id","source_urls",
] ]
with open(args.out, "w", newline="", encoding="utf-8") as f: with open(out, "w", newline="", encoding="utf-8") as f:
w = csv.DictWriter(f, fieldnames=fieldnames) w = csv.DictWriter(f, fieldnames=fieldnames)
w.writeheader() w.writeheader()
for r in out_rows: for r in out_rows:
w.writerow(r) w.writerow(r)
logging.info(f"Wrote {len(out_rows)} games → {args.out}") logging.info(f"Wrote {len(out_rows)} games → {out}")
if __name__ == "__main__": if __name__ == "__main__":
main() typer.run(main)

View File

@@ -19,43 +19,27 @@ Defaults:
""" """
from __future__ import annotations from __future__ import annotations
import argparse
import math import math
import numpy as np import numpy as np
import pandas as pd import pandas as pd
import typer
def parse_args(): def load_games(
p = argparse.ArgumentParser(description="Power ratings from season_schedule.csv") inp: str,
p.add_argument("--in", dest="inp", required=True, help="Input CSV (season_schedule.csv)") team_id: str = "names",
p.add_argument("--out", dest="out", required=True, help="Output ratings CSV") final_status: str | None = None,
p.add_argument("--team-id", choices=["names","slugs"], default="names", ) -> pd.DataFrame:
help="Use team names or slugs as identifiers (default: names)") df = pd.read_csv(inp)
p.add_argument("--final-status", default=None,
help="Only include games where status == this value (e.g., 'final'). If omitted, any row with scores is included.")
# Tunables
p.add_argument("--pyexp", type=float, default=1.83, help="Pythagorean exponent")
p.add_argument("--massey-cap", type=float, default=8.0, help="Cap for run margins in Massey")
p.add_argument("--no-massey-home-adj", action="store_true",
help="Disable subtracting estimated home-field runs in Massey")
p.add_argument("--elo-k", type=float, default=24.0, help="Elo K-factor")
p.add_argument("--elo-home", type=float, default=30.0, help="Elo home bonus (points)")
p.add_argument("--elo-mcap", type=float, default=2.0, help="Cap for margin factor ln(|m|+1)")
p.add_argument("--elo-shuffles", type=int, default=20, help="Random shuffles to average Elo")
p.add_argument("--elo-seed", type=int, default=42, help="RNG seed for shuffles")
return p.parse_args()
def load_games(a) -> pd.DataFrame:
df = pd.read_csv(a.inp)
# Choose identifiers # Choose identifiers
home_id_col = "home_name" if a.team_id == "names" else "home_slug" home_id_col = "home_name" if team_id == "names" else "home_slug"
away_id_col = "away_name" if a.team_id == "names" else "away_slug" away_id_col = "away_name" if team_id == "names" else "away_slug"
for c in [home_id_col, away_id_col, "home_runs", "away_runs"]: for c in [home_id_col, away_id_col, "home_runs", "away_runs"]:
if c not in df.columns: if c not in df.columns:
raise ValueError(f"Missing required column: {c}") raise ValueError(f"Missing required column: {c}")
# Optional status filter (helps exclude postponed/canceled) # Optional status filter (helps exclude postponed/canceled)
if a.final_status is not None and "status" in df.columns: if final_status is not None and "status" in df.columns:
df = df[df["status"].astype(str).str.lower() == str(a.final_status).lower()] df = df[df["status"].astype(str).str.lower() == str(final_status).lower()]
# Keep only games with numeric scores # Keep only games with numeric scores
df = df.copy() df = df.copy()
@@ -173,52 +157,71 @@ def zscore(s: pd.Series) -> pd.Series:
mu, sd = s.mean(), s.std(ddof=0) mu, sd = s.mean(), s.std(ddof=0)
return pd.Series(0.0, index=s.index) if (sd == 0 or np.isnan(sd)) else (s - mu) / sd return pd.Series(0.0, index=s.index) if (sd == 0 or np.isnan(sd)) else (s - mu) / sd
def main(): def main(
a = parse_args() inp: str = typer.Option(..., help="Input CSV (season_schedule.csv)"),
games = load_games(a) out: str = typer.Option(..., help="Output ratings CSV"),
team_id: str = typer.Option(
"names",
help="Use team names or slugs as identifiers (default: names)",
show_default=True,
case_sensitive=False,
prompt=False,
),
final_status: str | None = typer.Option(None, help="Only include games where status == this value (e.g., 'final'). If omitted, any row with scores is included."),
pyexp: float = typer.Option(1.83, help="Pythagorean exponent"),
massey_cap: float = typer.Option(8.0, help="Cap for run margins in Massey"),
no_massey_home_adj: bool = typer.Option(False, help="Disable subtracting estimated home-field runs in Massey"),
elo_k: float = typer.Option(24.0, help="Elo K-factor"),
elo_home: float = typer.Option(30.0, help="Elo home bonus (points)"),
elo_mcap: float = typer.Option(2.0, help="Cap for margin factor ln(|m|+1)"),
elo_shuffles: int = typer.Option(20, help="Random shuffles to average Elo"),
elo_seed: int = typer.Option(42, help="RNG seed for shuffles")
):
team_id = team_id.lower()
# Load games
games = load_games(inp, team_id=team_id, final_status=final_status)
# Aggregates # Aggregates
team = aggregate_team_stats(games) team = aggregate_team_stats(games)
team["PythagoreanWinPct"] = pythagorean(team["RS"], team["RA"], a.pyexp) team["PythagoreanWinPct"] = pythagorean(team["RS"], team["RA"], pyexp)
# Ratings # Ratings
massey_r, h_runs = massey(games, cap=a.massey_cap, subtract_home=(not a.no_massey_home_adj)) massey_r, h_runs = massey(games, cap=massey_cap, subtract_home=not no_massey_home_adj)
sos = (
games.assign(OppTeam=np.where(True, games["AwayTeam"], games["AwayTeam"])) # placeholder # Strength of schedule
)
# Strength of schedule: avg opponent Massey rating faced
opps = {t: [] for t in massey_r.index} opps = {t: [] for t in massey_r.index}
for _, r in games.iterrows(): for _, r in games.iterrows():
opps[r["HomeTeam"]].append(r["AwayTeam"]) opps[r["HomeTeam"]].append(r["AwayTeam"])
opps[r["AwayTeam"]].append(r["HomeTeam"]) opps[r["AwayTeam"]].append(r["HomeTeam"])
sos_series = pd.Series({t: (float(massey_r[opps[t]].mean()) if opps[t] else 0.0) for t in opps}) sos_series = pd.Series({t: (float(massey_r[opps[t]].mean()) if opps[t] else 0.0) for t in opps})
elo_r = elo(games, K=a.elo_k, H=a.elo_home, mcap=a.elo_mcap, shuffles=a.elo_shuffles, seed=a.elo_seed) elo_r = elo(games, K=elo_k, H=elo_home, mcap=elo_mcap, shuffles=elo_shuffles, seed=elo_seed)
# Merge # Merge
out = team.set_index("Team") out_df = team.set_index("Team")
out["MasseyRating"] = massey_r out_df["MasseyRating"] = massey_r
out["EloRating"] = elo_r out_df["EloRating"] = elo_r
out["StrengthOfSchedule"] = sos_series out_df["StrengthOfSchedule"] = sos_series
# Composite # Composite
Z_r, Z_e, Z_p = zscore(out["MasseyRating"]), zscore(out["EloRating"]), zscore(out["PythagoreanWinPct"]) Z_r, Z_e, Z_p = zscore(out_df["MasseyRating"]), zscore(out_df["EloRating"]), zscore(out_df["PythagoreanWinPct"])
out["CompositeRating"] = 0.45*Z_r + 0.35*Z_e + 0.20*Z_p out_df["CompositeRating"] = 0.45*Z_r + 0.35*Z_e + 0.20*Z_p
out = out.reset_index() out_df = out_df.reset_index()
out = out[[ out_df = out_df[[
"Team","GP","W","L","T","WinPct","RS","RA","RunDiff", "Team","GP","W","L","T","WinPct","RS","RA","RunDiff",
"PythagoreanWinPct","MasseyRating","EloRating","StrengthOfSchedule","CompositeRating" "PythagoreanWinPct","MasseyRating","EloRating","StrengthOfSchedule","CompositeRating"
]].sort_values("CompositeRating", ascending=False) ]].sort_values("CompositeRating", ascending=False)
# Round for readability # Round for readability
for c in ["WinPct","PythagoreanWinPct","MasseyRating","EloRating","StrengthOfSchedule","CompositeRating"]: for c in ["WinPct","PythagoreanWinPct","MasseyRating","EloRating","StrengthOfSchedule","CompositeRating"]:
out[c] = out[c].astype(float).round(5) out_df[c] = out_df[c].astype(float).round(5)
out.to_csv(a.out, index=False) out_df.to_csv(out, index=False)
print(f"Done. Estimated home-field (runs) used in Massey: {h_runs:.3f}") print(f"Done. Estimated home-field (runs) used in Massey: {h_runs:.3f}")
print(f"Teams ranked: {len(out)} | Games processed: {len(games)}") print(f"Teams ranked: {len(out_df)} | Games processed: {len(games)}")
print(f"Output -> {a.out}") print(f"Output -> {out}")
if __name__ == "__main__": if __name__ == "__main__":
main() typer.run(main)

5
requirements.txt Normal file
View File

@@ -0,0 +1,5 @@
typer[all]==0.16.1
pandas==2.3.2
numpy==2.3.2
beautifulsoup4==4.13.5
requests==2.32.5