import csv import re from typing import List, Dict from dateutil import parser from pathlib import Path from rich.console import Console from rich.table import Table def normalize_header_key(key: str) -> str: key_mapping = { "away": "visitor", "results": "results", "final score": "results", "venue": "field", "location":"field", "result": "results", "w":"win", "l":"loss", "t":"tie", "div":"division", "rf":"runs_for", "runs":"runs_against" } return key_mapping.get(key.lower().strip(), key.lower().strip()) def validate_csv_header(header: List[str]) -> bool: required_keys = ["date", "time", "field", "visitor", "home", "results"] normalized_header = [normalize_header_key(key) for key in header] return all(key in normalized_header for key in required_keys) def read_csv(file_path: Path) -> List[dict]: data = [] with open(file_path, "r", newline="") as csvfile: reader = csv.DictReader(csvfile) for row in reader: normalized_row = {normalize_header_key(key): value.strip() for key, value in row.items()} data.append(normalized_row) return data def write_csv(file_path: Path, data: List[dict]) -> None: with open(file_path, "w", newline="") as csvfile: fieldnames = data[0].keys() writer = csv.DictWriter(csvfile, fieldnames=fieldnames) writer.writeheader() writer.writerows(data) def parse_score(score_str: str, reverse_order: bool = False) -> Dict[str, int]: """ Parse a score string and extract home and visitor scores. Args: score_str (str): The score string contain somewhere "visitor-home". reverse_order (bool, optional): If True, the order of the scores is reversed (home first). Defaults to False. Returns: Dict[str, int]: A dictionary containing home and visitor scores. """ regex = re.compile(r"^(?P
.*?)?(?:(?P\d+)-(?P \d+))?(?P .*?)?$") match = regex.match(score_str) if match: score = {} if match.group("pre"): score["pre"] = match.group("pre") if match.group("post"): score["post"] = match.group("post") if match.group("runs_first") and match.group("runs_second"): score['has_result'] = True runs_first, runs_second = int(match.group("runs_first")), int(match.group("runs_second")) if not reverse_order: score.update({ "home_runs_for": runs_second, "visitor_runs_for": runs_first, "home_runs_against":runs_first, "visitor_runs_against": runs_second }) elif reverse_order: score.update({ "home_runs_for": runs_first, "visitor_runs_for": runs_second, "home_runs_against": runs_second, "visitor_runs_against": runs_first }) if score["home_runs_for"] > score["visitor_runs_for"]: score["home_outcome"] = "win" score["visitor_outcome"] = "loss" if "forfeit" in score.get("post",""): score["visitor_outcome"] = "forfeit" elif score["home_runs_for"] < score["visitor_runs_for"]: score["home_outcome"] = "loss" score["visitor_outcome"] = "win" if "forfeit" in score.get("post",""): score["home_outcome"] = "forfeit" else: score["home_outcome"] = "tie" score["visitor_outcome"] = "tie" else: score['has_result'] = False return score raise ValueError("Invalid score format") def is_visitor_home_order_reversed(header: List[str]) -> bool: """ Determine if the order of 'visitor' and 'home' in the header suggests reversed order. convention is that home is second. Args: header (List[str]): The list of header keys. Returns: bool: True if the 'home' key comes before the 'visitor' key, indicating reversed order. """ return header.index('visitor') > header.index('home') def process_data(data: List[Dict], visitor_home_order_reversed = False) -> List[Dict]: for row in data: parsed_score = parse_score(row["results"], visitor_home_order_reversed) row.update(parsed_score) try: row['datetime'] = parser.parse(f"{row['date']} {row['time']}") except parser.ParserError as e: raise e return data def aggregate_teams(data: List[Dict[str, str]]) -> List[Dict[str, int]]: """ Aggregate data by team, summing up wins, losses, and ties. Args: data (List[Dict[str, str]]): A list of dictionaries representing the CSV data. Returns: List[Dict[str, int]]: A list of dictionaries containing aggregated data for each team. """ team_stats = {} for row in data: if not row["has_result"]: continue home_team = row["home"] visitor_team = row["visitor"] team_stats.setdefault(home_team, {"win": 0, "loss": 0, "tie": 0, "gp": 0, "runs_for": 0, "runs_against":0}) team_stats.setdefault(visitor_team, {"win": 0, "loss": 0, "tie": 0, "gp": 0, "runs_for": 0, "runs_against":0}) team_stats[home_team]['gp'] += 1 team_stats[visitor_team]['gp'] += 1 for outcome in ["win", "loss", "tie"]: if row["home_outcome"] == outcome: team_stats[home_team][outcome] += 1 # team_stats[home_team]["games"].append(f"{row['datetime']}: {visitor_team}: {outcome[0].upper()} {row['home_runs_for']}-{row['home_runs_against']}") if row["visitor_outcome"] == outcome: team_stats[visitor_team][outcome] += 1 # team_stats[visitor_team]["games"].append(f"{row['datetime']}: {home_team}: {outcome[0].upper()} {row['visitor_runs_for']}-{row['visitor_runs_against']}") team_stats[home_team]["runs_for"] += row["home_runs_for"] team_stats[home_team]["runs_against"] += row["home_runs_against"] team_stats[visitor_team]["runs_for"] += row["visitor_runs_for"] team_stats[visitor_team]["runs_against"] += row["visitor_runs_against"] # Convert team_stats dictionary to a list of dictionaries aggregated_data = [{"team": team, **stats} for team, stats in team_stats.items()] # Sort the list by team name sorted_aggregated_data = sorted(aggregated_data, key=lambda x: x["win"], reverse=True) return sorted_aggregated_data def write_sportspress_csv(data: List[Dict], file_path: Path, only_with_outcome:bool = False): """ Writes sports event data to a CSV file in a specific format. Parameters: - data (List[Dict]): List of dictionaries where each dictionary represents a sports event. - file_path (Path): The Path object representing the file path where the CSV file will be created. - only_with_outcome (bool, optional): If True, only events with outcomes will be included in the CSV. Default is False. Returns: None Example: >>> data = [...] # List of dictionaries representing sports events >>> file_path = Path("output.csv") >>> write_sportspress_csv(data, file_path) """ with file_path.open('w') as output_csv_file: writer = csv.writer(output_csv_file) fieldnames = [ "Format", #Competitive or Friendly # "Competition", "Season", # "Date Format", "Date", "Time", "Venue", "Team", "Results", "Outcome", # "Players", # "Performance", ] # Write the header writer.writerow(fieldnames) # Write the data for row in data: if only_with_outcome and not row['has_result']: continue writer.writerow( [ row["datetime"].strftime("%Y/%m/%d"), row["datetime"].strftime("%H:%M"), row.get("field", ""), row["home"], "|".join([str(row.get(k,"")) for k in [ "home_runs_for_inning_1", "home_runs_for_inning_2", "home_runs_for_inning_3", "home_runs_for_inning_4", "home_runs_for_inning_5", "home_runs_for_inning_6", "home_runs_for_inning_7", "home_runs_for_inning_8", "home_runs_for_inning_9", "home_runs_for_inning_10", "home_runs_for", "home_errors", "home_hits" ]]), row.get("home_outcome") ] ) writer.writerow( [ "", "", "", row["visitor"], "|".join([str(row.get(k,"")) for k in [ "visitor_runs_for_inning_1", "visitor_runs_for_inning_2", "visitor_runs_for_inning_3", "visitor_runs_for_inning_4", "visitor_runs_for_inning_5", "visitor_runs_for_inning_6", "visitor_runs_for_inning_7", "visitor_runs_for_inning_8", "visitor_runs_for_inning_9", "visitor_runs_for_inning_10", "visitor_runs_for", "visitor_errors", "visitor_hits" ]]), row.get("visitor_outcome") ] )