Files
baseball-db/src/utils/sportspress.py
2025-04-18 08:14:37 -05:00

105 lines
3.4 KiB
Python

from typing import List, Dict
from pathlib import Path
import csv
REQUIRED_KEYS=["date", "time", "field", "visitor", "home"]
def validate_keys(header: List[str]) -> bool:
required_keys = REQUIRED_KEYS
return all(key in header for key in required_keys)
def write_sportspress_csv(data: List[Dict], file: Path, only_with_outcome:bool = False):
"""
Writes sports event data to a CSV file in a specific format.
Parameters:
- data (List[Dict]): List of dictionaries where each dictionary represents a sports event.
- file_path (Path): The Path object representing the file path where the CSV file will be created.
- only_with_outcome (bool, optional): If True, only events with outcomes will be included in the CSV. Default is False.
Returns:
None
Example:
>>> data = [...] # List of dictionaries representing sports events
>>> file_path = Path("output.csv")
>>> write_sportspress_csv(data, file_path)
"""
header = data[0].keys()
if not validate_keys(header):
raise KeyError(f"Missing Keys. Requires: {REQUIRED_KEYS}, provided: {list(header)}")
writer = csv.writer(file)
fieldnames = [
"Format", #Competitive or Friendly
# "Competition",
"Season",
# "Date Format",
"Date",
"Time",
"Venue",
"Team",
"Results",
"Outcome",
# "Players",
# "Performance",
]
# Write the header
writer.writerow(fieldnames)
# Write the data
for row in data:
if only_with_outcome and not row.get('has_result'):
continue
writer.writerow(
[
row["datetime"].strftime("%Y/%m/%d"),
row["datetime"].strftime("%H:%M"),
row.get("field", ""),
row["home"],
"|".join([str(row.get(k,"")) for k in [
"home_runs_for_inning_1",
"home_runs_for_inning_2",
"home_runs_for_inning_3",
"home_runs_for_inning_4",
"home_runs_for_inning_5",
"home_runs_for_inning_6",
"home_runs_for_inning_7",
"home_runs_for_inning_8",
"home_runs_for_inning_9",
"home_runs_for_inning_10",
"home_runs_for",
"home_errors",
"home_hits"
]]),
row.get("home_outcome")
]
)
writer.writerow(
[
"",
"",
"",
row["visitor"],
"|".join([str(row.get(k,"")) for k in [
# "visitor_runs_for_inning_1",
# "visitor_runs_for_inning_2",
# "visitor_runs_for_inning_3",
# "visitor_runs_for_inning_4",
# "visitor_runs_for_inning_5",
# "visitor_runs_for_inning_6",
# "visitor_runs_for_inning_7",
# "visitor_runs_for_inning_8",
# "visitor_runs_for_inning_9",
# "visitor_runs_for_inning_10",
"visitor_runs_for",
"visitor_errors",
"visitor_hits"
]]),
row.get("visitor_outcome")
]
)