Add support for xlsx files

2025-04-25 10:48:37 -04:00
parent 2503141c34
commit 86dfe349e8
6 changed files with 38 additions and 26 deletions
--- a/requirements.txt
+++ b/requirements.txt
@@ -2,3 +2,4 @@ typer[all]==0.9.0
 python-dateutil==2.8.2
 toml==0.10.2
 pillow
+xlsx2csv
--- a/src/apps/clean/clean.py
+++ b/src/apps/clean/clean.py
@@ -5,7 +5,7 @@ from rich.columns import Columns
 from rich.panel import Panel
 from pathlib import Path
 import csv
-from ...utils.common import list_key_values, read_and_normalize_csv
+from ...utils.common import list_key_values, read_and_normalize_csv_or_xlsx
 from ...utils.normalize import normalize_header_key, replace_key_values, DEFAULT_NORMALIZATION_PATH
 from typing import Annotated, List
 import re
@@ -14,7 +14,7 @@ app = typer.Typer()

@app.command("replace")
 def replace_values_for_key(
-    input_file: Annotated[List[typer.FileText], typer.Argument(..., help="Path(s) to the CSV file")],
+    input_file: Annotated[List[Path], typer.Argument(..., help="Path(s) to the CSV or XLSX file")],
    output_file: Annotated[List[typer.FileText], typer.Option(..., "--output-file", "-o", help="Specify output file.")],
    key: str = typer.Argument(..., help=""), 
    match: str = typer.Argument(..., help=""), 
@@ -30,7 +30,7 @@ def replace_values_for_key(

    # Read CSV data
    for f in input_file:
-      data = read_and_normalize_csv(f)
+      data = read_and_normalize_csv_or_xlsx(f)

      before_table = Table(Column(), show_header=False, title="Before")
      for value in sorted(list_key_values(data, key)):
@@ -73,7 +73,7 @@ def replace_values_for_key(

@app.command("add-key")
 def add_values_for_key(
-    file_path: Path = typer.Argument(..., help="Path to the CSV file"), 
+    file_path: Path = typer.Argument(..., help="Path to the CSV or XLSX file"), 
    key: str = typer.Argument(..., help=""), 
    value: str = typer.Argument("", help=""), 
    in_place: bool = typer.Option(False, "--in-place", "-p", help="Modify file in place."),
@@ -87,7 +87,7 @@ def add_values_for_key(
    console = Console()

    # Read CSV data
-    data = read_and_normalize_csv(file_path)
+    data = read_and_normalize_csv_or_xlsx(file_path)

    # data = add_key_values(data, key, value)

--- a/src/apps/convert/convert.py
+++ b/src/apps/convert/convert.py
@@ -3,7 +3,7 @@ from typing import Annotated
 from pathlib import Path
 from ...utils.sportspress import validate_keys
 from ...utils.normalize import normalize_header_key, load_config
-from ...utils.common import read_and_normalize_csv, is_visitor_home_order_reversed, import_gamebygame
+from ...utils.common import read_and_normalize_csv_or_xlsx, is_visitor_home_order_reversed, import_gamebygame
 from ...utils.sportspress import write_sportspress_csv
 import csv

@@ -11,7 +11,7 @@ app = typer.Typer()

@app.command(name="sportspress")
 def sportspress_csv(
-    input_file: Annotated[typer.FileText, typer.Argument(..., help="Path to the CSV file")], 
+    input_file: Annotated[Path, typer.Argument(..., help="Path to the or XLSX file")], 
    file_output_path: Annotated[typer.FileTextWrite, typer.Argument(..., help="Path to the output CSV file")], 
    only_with_outcome: bool = typer.Option(default=False, is_flag=True, help="")
    ):
@@ -28,7 +28,7 @@ def sportspress_csv(

@app.command(name="teamsnap")
 def sportspress_csv(
-    input_file: Annotated[typer.FileText, typer.Argument(..., help="Path to the CSV file")], 
+    input_file: Annotated[Path, typer.Argument(..., help="Path to the CSV or XLSX file")], 
    file_output_path: Annotated[typer.FileTextWrite, typer.Argument(..., help="Path to the output CSV file")], 
    only_with_outcome: bool = typer.Option(default=False, is_flag=True, help="")
    ):
--- a/src/apps/generate/calendar.py
+++ b/src/apps/generate/calendar.py
@@ -4,7 +4,7 @@ from typing import Annotated, List
 from pathlib import Path
 from ...utils.sportspress import validate_keys
 from ...utils.normalize import normalize_header_key, load_config
-from ...utils.common import read_and_normalize_csv, is_visitor_home_order_reversed, import_gamebygame, parse_datetime, personalize_data_for_team
+from ...utils.common import read_and_normalize_csv_or_xlsx, is_visitor_home_order_reversed, import_gamebygame, parse_datetime, personalize_data_for_team
 from ...utils.sportspress import write_sportspress_csv
 from .calendar_utils import generate_calendar
 from collections import defaultdict 
@@ -14,10 +14,10 @@ app = typer.Typer()

@app.command(name="calendar")
 def generate_calendar_app(
-    input_file: Annotated[List[typer.FileText], typer.Argument(..., help="Path(s) to the CSV file")],
+    input_file: Annotated[List[Path], typer.Argument(..., help="Path(s) to the CSV file")],
    ):
    # Read CSV data
-    data = read_and_normalize_csv(input_file)
+    data = read_and_normalize_csv_or_xlsx(input_file)
    data = personalize_data_for_team(data, "Hounds")
    # data = parse_datetime(data)

@@ -26,10 +26,10 @@ def generate_calendar_app(

@app.command(name="calendar-config")
 def generate_calendar_configs(
-    input_file: Annotated[List[typer.FileText], typer.Argument(..., help="Path(s) to the CSV file")],
+    input_file: Annotated[List[Path], typer.Argument(..., help="Path(s) to the CSV file")],
    output_file: Annotated[Path, typer.Argument(..., help="Path(s) to the output config file")]
 ):
-    data = read_and_normalize_csv(input_file)
+    data = read_and_normalize_csv_or_xlsx(input_file)
    teams = {row.get('visitor') for row in data}
    teams.update({row.get('home') for row in data})
    fields = {row.get('field') for row in data}
--- a/src/apps/read/read.py
+++ b/src/apps/read/read.py
@@ -4,7 +4,7 @@ from rich.console import Console
 from rich.columns import Columns
 from pathlib import Path
 import csv
-from ...utils.common import list_key_values, read_and_normalize_csv, import_gamebygame, aggregate_teams, aggregate_teams_by_season
+from ...utils.common import list_key_values, read_and_normalize_csv_or_xlsx, import_gamebygame, aggregate_teams, aggregate_teams_by_season
 from ...utils.normalize import normalize_header_key
 from typing import Annotated, List

@@ -12,13 +12,13 @@ app = typer.Typer()

@app.command("list-values")
 def print_values_for_key(
-    input_file: Annotated[List[typer.FileText], typer.Argument(..., help="Path(s) to the CSV file")], 
+    input_file: Annotated[List[Path], typer.Argument(..., help="Path(s) to the CSV or XLSX file")], 
    key: str = typer.Argument(..., help="The key to retrieve to generate list.")
    ):
    # Read CSV data
    data = []
    for f in input_file:
-      data.extend(read_and_normalize_csv(f))
+      data.extend(read_and_normalize_csv_or_xlsx(f))
    values = list_key_values(data, key)
    
    console = Console()
@@ -33,12 +33,12 @@ def print_values_for_key(

@app.command("print")
 def print_table(
-    input_file: Annotated[List[typer.FileText], typer.Argument(..., help="Path(s) to the CSV file")]
+    input_file: Annotated[List[Path], typer.Argument(..., help="Path(s) to the CSV or XLSX file")]
    ):
    # Read CSV data
    data = []
    for f in input_file:
-      data.extend(read_and_normalize_csv(f))
+      data.extend(read_and_normalize_csv_or_xlsx(f))
    
    console = Console()
    table = Table()
@@ -56,7 +56,7 @@ def print_table(

@app.command()
 def standings(
-    input_file: Annotated[List[typer.FileText], typer.Argument(..., help="Path(s) to the CSV file")], 
+    input_file: Annotated[List[Path], typer.Argument(..., help="Path(s) to the CSV or XLSX file")], 
    ):

    # Read CSV data
@@ -92,13 +92,13 @@ def standings(

@app.command()
 def seasons(
-    input_file: Annotated[List[typer.FileText], typer.Argument(..., help="Path(s) to the CSV file")], 
+    input_file: Annotated[List[Path], typer.Argument(..., help="Path(s) to the CSV or XLSX file")], 
    ):

    # Read CSV data
    data=[]
    for f in input_file:
-      data.extend(read_and_normalize_csv(f))
+      data.extend(read_and_normalize_csv_or_xlsx(f))

    aggregate_team_data = aggregate_teams_by_season(data)

--- a/src/utils/common.py
+++ b/src/utils/common.py
@@ -1,7 +1,8 @@
 import csv
 import re
 from typing import List, Dict, Union, TextIO
-from io import TextIOBase
+from io import TextIOBase, StringIO
+from xlsx2csv import Xlsx2csv
 from dateutil import parser
 from pathlib import Path
 from rich.console import Console
@@ -24,7 +25,7 @@ def list_key_values(data: List[Dict], key):
    output.discard(None)
    return output

-def read_and_normalize_csv(input_file: Union[List[TextIO], List[Path], TextIO, Path]) -> List[dict]:
+def read_and_normalize_csv_or_xlsx(input_file: Union[List[TextIO], List[Path], TextIO, Path]) -> List[dict]:
    """
    Reads CSV file(s) from the provided input file path(s) or file object(s), 
    and returns a list of dictionaries with normalized keys and values 
@@ -51,8 +52,18 @@ def read_and_normalize_csv(input_file: Union[List[TextIO], List[Path], TextIO, P
    
    for f in file_list:
        if isinstance(f, Path):
-            f = f.open()    
-        reader = csv.DictReader(f)
+            if f.suffix.lower() == ".csv":
+                with f.open("r", encoding="utf-8") as f:
+                    reader = csv.DictReader(f)
+
+            elif f.suffix.lower() == ".xlsx":
+                output = StringIO()
+                Xlsx2csv(f, outputencoding="utf-8").convert(output)
+                output.seek(0)
+                reader = csv.DictReader(output)
+
+            else:
+                raise ValueError("File must be a .csv or .xlsx")
        for row in reader:
            normalized_row = normalize_row(row, normalization_config)
            result_data.append(normalized_row)
@@ -157,7 +168,7 @@ def parse_datetime(data: List[Dict]):

 def import_gamebygame(data: Union[List[Dict], TextIO, Path]) -> List[Dict]:
    if isinstance(data, TextIOBase) or isinstance(data, Path) :
-        data = read_and_normalize_csv(data)
+        data = read_and_normalize_csv_or_xlsx(data)

    header = data[0].keys()
    visitor_home_order_reversed = is_visitor_home_order_reversed(list(header))