Convert scripts to use Typer and add requirements.txt

- Replaced argparse with Typer for CLI argument parsing in both scripts
- Updated function signatures and calls accordingly in build_season_schedule.py and compute_ratings.py
- Added requirements.txt listing dependencies including typer[all], pandas, numpy, etc.
This commit is contained in:
2025-08-28 15:06:10 -05:00
parent 5cecc6e280
commit ef11cdbac3
4 changed files with 76 additions and 80 deletions

View File

@@ -30,6 +30,7 @@ from urllib.parse import urlencode
import requests
from bs4 import BeautifulSoup
from dateutil import parser as dtp
import typer
# ----------------- logging -----------------
logging.basicConfig(
@@ -264,16 +265,14 @@ def fetch_game_time(game_id: str, session: requests.Session) -> Optional[str]:
return None
# ----------------- build & merge -----------------
def main():
ap = argparse.ArgumentParser(description="Build a deduped season schedule with IDs, winners/losers, runs, and times.")
ap.add_argument("--subseason", required=True, help="Subseason ID, e.g. 942425")
ap.add_argument("--teams", required=True, help="Path to teams.json (array with team_id, team_slug, instance_id, teamName)")
ap.add_argument("--out", default="season_schedule.csv", help="Output CSV path")
ap.add_argument("--fetch-time", action="store_true", help="Fetch game time from /game/show/<id>")
ap.add_argument("--sleep", type=float, default=0.35, help="Delay between requests (seconds)")
args = ap.parse_args()
by_instance, by_slug, by_norm = load_teams(args.teams)
def main(
subseason: str = typer.Option(..., help="Subseason ID, e.g. 942425"),
teams: str = typer.Option(..., help="Path to teams.json (array with team_id, team_slug, instance_id, teamName)"),
out: str = typer.Option("season_schedule.csv", help="Output CSV path"),
fetch_time: bool = typer.Option(False, help="Fetch game time from /game/show/<id>"),
sleep: float = typer.Option(0.35, help="Delay between requests (seconds)")
):
by_instance, by_slug, by_norm = load_teams(teams)
instance_ids = sorted(by_instance.keys())
session = requests.Session()
@@ -283,8 +282,8 @@ def main():
raw: List[dict] = []
for i, iid in enumerate(instance_ids, 1):
logging.info(f"[{i}/{len(instance_ids)}] Fetching schedule for instance {iid}")
raw.extend(parse_printable(iid, args.subseason, session=session))
time.sleep(args.sleep) # be polite
raw.extend(parse_printable(iid, subseason, session=session))
time.sleep(sleep) # be polite
def rec_from_instance(iid: str) -> Optional[TeamRec]:
return by_instance.get(iid)
@@ -292,7 +291,6 @@ def main():
def match_opponent(text: str) -> Optional[TeamRec]:
return best_match_team(text, by_slug, by_norm)
# Group by game_id if available; otherwise fallback on (date + unordered pair + raw score text if present)
buckets: Dict[str, dict] = {}
fallback_rows = 0
@@ -337,7 +335,6 @@ def main():
if fallback_rows:
logging.info(f"Used fallback dedupe for {fallback_rows} rows without game_id.")
# Merge perspectives into a single home/away row
out_rows = []
time_cache: Dict[str, Optional[str]] = {}
@@ -346,11 +343,9 @@ def main():
date = p[0]["date"]
game_id = bucket.get("game_id", "")
# Identify home/away perspectives
p_home = next((x for x in p if x["is_away"] is False), None)
p_away = next((x for x in p if x["is_away"] is True), None)
# Team identities
home_team = (p_home["team"] if p_home else (p_away["opp"] if p_away else None))
away_team = (p_away["team"] if p_away else (p_home["opp"] if p_home else None))
@@ -359,7 +354,6 @@ def main():
return rec.slug, rec.instance_id, rec.team_id, rec.name
return fallback_slug, "", "", fallback_slug.replace("-", " ").title()
# Prefer runs from the explicit perspective (home if available; otherwise away)
home_runs = away_runs = None
if p_home and isinstance(p_home["team_runs"], int) and isinstance(p_home["opp_runs"], int):
home_runs = p_home["team_runs"]
@@ -368,7 +362,6 @@ def main():
away_runs = p_away["team_runs"]
home_runs = p_away["opp_runs"]
# Fallback: single perspective present but numbers known → place by is_away
if (home_runs is None or away_runs is None) and p:
one = p[0]
if isinstance(one["team_runs"], int) and isinstance(one["opp_runs"], int):
@@ -379,7 +372,6 @@ def main():
home_runs = one["team_runs"]; away_runs = one["opp_runs"]
home_team = one["team"]; away_team = one["opp"] if one["opp"] else away_team
# Pack final team identifiers (fallback slug = guess from perspectives)
guess_home_fallback = (p_home["team"].slug if p_home and p_home["team"] else
p_away["opp"].slug if p_away and p_away["opp"] else
p[0]["pair"][0])
@@ -390,7 +382,6 @@ def main():
home_slug, home_inst, home_id, home_name = pack_team(home_team, guess_home_fallback)
away_slug, away_inst, away_id, away_name = pack_team(away_team, guess_away_fallback)
# Winner/loser
winner_slug = winner_inst = winner_id = loser_slug = loser_inst = loser_id = ""
if isinstance(home_runs, int) and isinstance(away_runs, int):
if home_runs > away_runs:
@@ -400,14 +391,12 @@ def main():
winner_slug, winner_inst, winner_id = away_slug, away_inst, away_id
loser_slug, loser_inst, loser_id = home_slug, home_inst, home_id
# Meta from perspectives
loc = (p_home["location"] if p_home else "") or (p_away["location"] if p_away else "")
status = (p_home["status"] if p_home else "") or (p_away["status"] if p_away else "")
source_urls = sorted({x["source_url"] for x in p})
# -------- NEW: fetch game start time from game page --------
time_local = ""
if args.fetch_time and game_id:
if fetch_time and game_id:
if game_id in time_cache:
tval = time_cache[game_id]
else:
@@ -415,8 +404,7 @@ def main():
tval = fetch_game_time(game_id, session=session)
time_cache[game_id] = tval
if tval is None:
# small backoff to be nice if many misses
time.sleep(min(args.sleep * 2, 1.0))
time.sleep(min(sleep * 2, 1.0))
if tval:
time_local = tval
@@ -452,13 +440,13 @@ def main():
"loser_slug","loser_instance","loser_id",
"location","status","game_id","source_urls",
]
with open(args.out, "w", newline="", encoding="utf-8") as f:
with open(out, "w", newline="", encoding="utf-8") as f:
w = csv.DictWriter(f, fieldnames=fieldnames)
w.writeheader()
for r in out_rows:
w.writerow(r)
logging.info(f"Wrote {len(out_rows)} games → {args.out}")
logging.info(f"Wrote {len(out_rows)} games → {out}")
if __name__ == "__main__":
main()
typer.run(main)