initial commit

This commit is contained in:
2023-12-31 14:28:02 -06:00
commit 4105cc2373
9 changed files with 653 additions and 0 deletions

View File

@@ -0,0 +1,263 @@
import csv
import re
from typing import List, Dict
from dateutil import parser
from pathlib import Path
from rich.console import Console
from rich.table import Table
def normalize_header_key(key: str) -> str:
key_mapping = {
"away": "visitor",
"results": "results",
"final score": "results",
"venue": "field",
"location":"field",
"result": "results",
"w":"win",
"l":"loss",
"t":"tie",
"div":"division",
"rf":"runs_for",
"runs":"runs_against"
}
return key_mapping.get(key.lower().strip(), key.lower().strip())
def validate_csv_header(header: List[str]) -> bool:
required_keys = ["date", "time", "field", "visitor", "home", "results"]
normalized_header = [normalize_header_key(key) for key in header]
return all(key in normalized_header for key in required_keys)
def read_csv(file_path: Path) -> List[dict]:
data = []
with open(file_path, "r", newline="") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
normalized_row = {normalize_header_key(key): value.strip() for key, value in row.items()}
data.append(normalized_row)
return data
def write_csv(file_path: Path, data: List[dict]) -> None:
with open(file_path, "w", newline="") as csvfile:
fieldnames = data[0].keys()
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(data)
def parse_score(score_str: str, reverse_order: bool = False) -> Dict[str, int]:
"""
Parse a score string and extract home and visitor scores.
Args:
score_str (str): The score string contain somewhere "visitor-home".
reverse_order (bool, optional): If True, the order of the scores is reversed (home first).
Defaults to False.
Returns:
Dict[str, int]: A dictionary containing home and visitor scores.
"""
regex = re.compile(r"^(?P<pre>.*?)?(?:(?P<runs_first>\d+)-(?P<runs_second>\d+))?(?P<post>.*?)?$")
match = regex.match(score_str)
if match:
score = {}
if match.group("pre"): score["pre"] = match.group("pre")
if match.group("post"): score["post"] = match.group("post")
if match.group("runs_first") and match.group("runs_second"):
score['has_result'] = True
runs_first, runs_second = int(match.group("runs_first")), int(match.group("runs_second"))
if not reverse_order:
score.update({
"home_runs_for": runs_second, "visitor_runs_for": runs_first,
"home_runs_against":runs_first, "visitor_runs_against": runs_second
})
elif reverse_order:
score.update({
"home_runs_for": runs_first, "visitor_runs_for": runs_second,
"home_runs_against": runs_second, "visitor_runs_against": runs_first
})
if score["home_runs_for"] > score["visitor_runs_for"]:
score["home_outcome"] = "win"
score["visitor_outcome"] = "loss"
if "forfeit" in score.get("post",""):
score["visitor_outcome"] = "forfeit"
elif score["home_runs_for"] < score["visitor_runs_for"]:
score["home_outcome"] = "loss"
score["visitor_outcome"] = "win"
if "forfeit" in score.get("post",""):
score["home_outcome"] = "forfeit"
else:
score["home_outcome"] = "tie"
score["visitor_outcome"] = "tie"
else:
score['has_result'] = False
return score
raise ValueError("Invalid score format")
def is_visitor_home_order_reversed(header: List[str]) -> bool:
"""
Determine if the order of 'visitor' and 'home' in the header suggests reversed order.
convention is that home is second.
Args:
header (List[str]): The list of header keys.
Returns:
bool: True if the 'home' key comes before the 'visitor' key, indicating reversed order.
"""
return header.index('visitor') > header.index('home')
def process_data(data: List[Dict], visitor_home_order_reversed = False) -> List[Dict]:
for row in data:
parsed_score = parse_score(row["results"], visitor_home_order_reversed)
row.update(parsed_score)
try:
row['datetime'] = parser.parse(f"{row['date']} {row['time']}")
except parser.ParserError as e:
raise e
return data
def aggregate_teams(data: List[Dict[str, str]]) -> List[Dict[str, int]]:
"""
Aggregate data by team, summing up wins, losses, and ties.
Args:
data (List[Dict[str, str]]): A list of dictionaries representing the CSV data.
Returns:
List[Dict[str, int]]: A list of dictionaries containing aggregated data for each team.
"""
team_stats = {}
for row in data:
if not row["has_result"]:
continue
home_team = row["home"]
visitor_team = row["visitor"]
team_stats.setdefault(home_team, {"win": 0, "loss": 0, "tie": 0, "gp": 0, "runs_for": 0, "runs_against":0})
team_stats.setdefault(visitor_team, {"win": 0, "loss": 0, "tie": 0, "gp": 0, "runs_for": 0, "runs_against":0})
team_stats[home_team]['gp'] += 1
team_stats[visitor_team]['gp'] += 1
for outcome in ["win", "loss", "tie"]:
if row["home_outcome"] == outcome:
team_stats[home_team][outcome] += 1
# team_stats[home_team]["games"].append(f"{row['datetime']}: {visitor_team}: {outcome[0].upper()} {row['home_runs_for']}-{row['home_runs_against']}")
if row["visitor_outcome"] == outcome:
team_stats[visitor_team][outcome] += 1
# team_stats[visitor_team]["games"].append(f"{row['datetime']}: {home_team}: {outcome[0].upper()} {row['visitor_runs_for']}-{row['visitor_runs_against']}")
team_stats[home_team]["runs_for"] += row["home_runs_for"]
team_stats[home_team]["runs_against"] += row["home_runs_against"]
team_stats[visitor_team]["runs_for"] += row["visitor_runs_for"]
team_stats[visitor_team]["runs_against"] += row["visitor_runs_against"]
# Convert team_stats dictionary to a list of dictionaries
aggregated_data = [{"team": team, **stats} for team, stats in team_stats.items()]
# Sort the list by team name
sorted_aggregated_data = sorted(aggregated_data, key=lambda x: x["win"], reverse=True)
return sorted_aggregated_data
def write_sportspress_csv(data: List[Dict], file_path: Path, only_with_outcome:bool = False):
"""
Writes sports event data to a CSV file in a specific format.
Parameters:
- data (List[Dict]): List of dictionaries where each dictionary represents a sports event.
- file_path (Path): The Path object representing the file path where the CSV file will be created.
- only_with_outcome (bool, optional): If True, only events with outcomes will be included in the CSV. Default is False.
Returns:
None
Example:
>>> data = [...] # List of dictionaries representing sports events
>>> file_path = Path("output.csv")
>>> write_sportspress_csv(data, file_path)
"""
with file_path.open('w') as output_csv_file:
writer = csv.writer(output_csv_file)
fieldnames = [
"Format", #Competitive or Friendly
# "Competition",
"Season",
# "Date Format",
"Date",
"Time",
"Venue",
"Team",
"Results",
"Outcome",
# "Players",
# "Performance",
]
# Write the header
writer.writerow(fieldnames)
# Write the data
for row in data:
if only_with_outcome and not row['has_result']:
continue
writer.writerow(
[
row["datetime"].strftime("%Y/%m/%d"),
row["datetime"].strftime("%H:%M"),
row.get("field", ""),
row["home"],
"|".join([str(row.get(k,"")) for k in [
"home_runs_for_inning_1",
"home_runs_for_inning_2",
"home_runs_for_inning_3",
"home_runs_for_inning_4",
"home_runs_for_inning_5",
"home_runs_for_inning_6",
"home_runs_for_inning_7",
"home_runs_for_inning_8",
"home_runs_for_inning_9",
"home_runs_for_inning_10",
"home_runs_for",
"home_errors",
"home_hits"
]]),
row.get("home_outcome")
]
)
writer.writerow(
[
"",
"",
"",
row["visitor"],
"|".join([str(row.get(k,"")) for k in [
"visitor_runs_for_inning_1",
"visitor_runs_for_inning_2",
"visitor_runs_for_inning_3",
"visitor_runs_for_inning_4",
"visitor_runs_for_inning_5",
"visitor_runs_for_inning_6",
"visitor_runs_for_inning_7",
"visitor_runs_for_inning_8",
"visitor_runs_for_inning_9",
"visitor_runs_for_inning_10",
"visitor_runs_for",
"visitor_errors",
"visitor_hits"
]]),
row.get("visitor_outcome")
]
)