baseball-db/tests/test_utils.py

import unittest
from pathlib import Path
# from convert_to_sportspress
from src.utils.common import validate_csv_header, normalize_header_key, read_and_normalize_csv_or_xlsx, parse_score, is_visitor_home_order_reversed, import_gamebygame, aggregate_teams
from src.utils.normalize import normalize_value, normalize_header_key, load_config
import toml

class TestConvertToSportsPress(unittest.TestCase):
    def setUp(self):
        # Path to the test CSV file
        self.test_csv_path_2009 = Path("data/2009.csv")

    def test_validate_csv_header(self):
        header = ["Date", "Time", "Field", "Visitor", "Home", "Results", "Results"]
        self.assertTrue(validate_csv_header(header))
        header = ["Time", "Field", "Visitor", "Home", "Results", "Results"]
        self.assertFalse(validate_csv_header(header))

    def test_normalize_header_key(self):
        self.assertEqual(normalize_header_key("Away"), "visitor")
        self.assertEqual(normalize_header_key("Visitor"), "visitor")
        self.assertEqual(normalize_header_key("Results"), "results")
        self.assertEqual(normalize_header_key("Final Score"), "results")

    def test_read_csv(self):
        # Assuming that the CSV file has a valid header
        with self.subTest("Read CSV data"):
            data = read_and_normalize_csv_or_xlsx(self.test_csv_path_2009)
            self.assertIsInstance(data, list)
            self.assertTrue(all(isinstance(row, dict) for row in data))
        with self.subTest("Normalized keys"):
            normalized_data = read_and_normalize_csv_or_xlsx(self.test_csv_path_2009)
            self.assertTrue(all("visitor" in row.keys() and "results" in row.keys() for row in normalized_data))

    def test_parse_score_visitor_first(self):
        with self.subTest('visitor win'):
            score_str = "5-3"
            expected_result = {
                "has_result":True,
                "home_outcome":"loss",
                "visitor_outcome":"win",
                "home_runs_for": 3, "visitor_runs_for": 5,
                "home_runs_against": 5, "visitor_runs_against": 3
                }
            result = parse_score(score_str)
            self.assertDictEqual(result, expected_result)
        with self.subTest('visitor loss'):
            score_str = "3-5"
            expected_result = {
                "has_result":True,
                "home_outcome":"win",
                "visitor_outcome":"loss",
                "home_runs_for": 5, "visitor_runs_for": 3,
                "home_runs_against": 3, "visitor_runs_against": 5
                }
            result = parse_score(score_str)
            self.assertDictEqual(result, expected_result)

    def test_parse_score_visitor_first_with_pre_post(self):
        score_str = "5-3xxxx"
        expected_result = {
            "has_result":True,
            "home_outcome":"loss",
            "visitor_outcome":"win",
            "home_runs_for": 3, "visitor_runs_for": 5,
            "home_runs_against": 5, "visitor_runs_against": 3,
            "post":"xxxx"
            }
        result = parse_score(score_str)
        self.assertEqual(result, expected_result)
        # score_str = "xxxx5-3xx"
        # expected_result = {"home_runs_for": 3, "visitor_runs_for": 5, "home_runs_against": 5, "visitor_runs_against": 3, "pre":"xxxx", "post":"xx"}
        # result = parse_score(score_str)
        # self.assertDictEqual(result, expected_result)


    def test_parse_score_home_first(self):
        score_str = "2-4"
        with self.subTest("home loss"):
            expected_result = {
                "has_result":True,
                "home_outcome":"loss",
                "visitor_outcome":"win",
                "home_runs_for": 2, "visitor_runs_for": 4,
                "home_runs_against": 4, "visitor_runs_against": 2
                }
            score_str = "2-4"
            result = parse_score(score_str, reverse_order=True)
            self.assertDictEqual(result, expected_result)
        with self.subTest("home win"):
            expected_result = {
                "has_result":True,
                "home_outcome":"win",
                "visitor_outcome":"loss",
                "home_runs_for": 4, "visitor_runs_for": 2,
                "home_runs_against": 2, "visitor_runs_against": 4
                }
            score_str = "4-2"
            result = parse_score(score_str, reverse_order=True)
            self.assertDictEqual(result, expected_result)

    def test_parse_score_invalid_format(self):
        score_str = "invalid_format"
        expected_result = {'has_result': False, "post":"invalid_format"}
        result = parse_score(score_str)
        self.assertDictEqual(result, expected_result)

    def test_is_visitor_home_order_reversed_true(self):
        header = ["date", "time", "field", "visitor", "home", "results", "results"]
        result = is_visitor_home_order_reversed(header)
        self.assertFalse(result)

    def test_is_visitor_home_order_reversed_false(self):
        header = ["date", "time", "field", "home", "visitor", "results", "results"]
        result = is_visitor_home_order_reversed(header)
        self.assertTrue(result)

    def test_process_data(self):
        # Assuming that the CSV file has a valid header and read_csv is good
        data = read_and_normalize_csv_or_xlsx(self.test_csv_path_2009)
        processed_data = import_gamebygame(data)
        aggregate_team_data = aggregate_teams(processed_data)
        expected_result = [
            {"team": "Marlins", "gp": 28, "win": 23, "loss": 5, "tie": 0, "pts": 46, "runs_for": 249, "runs_against": 117},
            {"team": "Mets", "gp": 28, "win": 20, "loss": 8, "tie": 0, "pts": 40, "runs_for": 265, "runs_against": 150},
            {"team": "Browns", "gp": 28, "win": 17, "loss": 11, "tie": 0, "pts": 34, "runs_for": 221, "runs_against": 201},
            {"team": "Yankees", "gp": 28, "win": 15, "loss": 12, "tie": 1, "pts": 31, "runs_for": 189, "runs_against": 163},
            {"team": "Rangers", "gp": 28, "win": 15, "loss": 13, "tie": 0, "pts": 30, "runs_for": 203, "runs_against": 188},
            {"team": "Hounds", "gp": 28, "win": 14, "loss": 14, "tie": 0, "pts": 28, "runs_for": 181, "runs_against": 161},
            {"team": "Electrons", "gp": 28, "win": 13, "loss": 14, "tie": 1, "pts": 27, "runs_for": 168, "runs_against": 185},
            {"team": "Vikings", "gp": 28, "win": 12, "loss": 16, "tie": 0, "pts": 24, "runs_for": 201, "runs_against": 229},
            {"team": "Athletics", "gp": 28, "win": 8, "loss": 18, "tie": 2, "pts": 18, "runs_for": 157, "runs_against": 258},
            {"team": "Red Sox", "gp": 28, "win": 8, "loss": 20, "tie": 0, "pts": 16, "runs_for": 156, "runs_against": 244},
            {"team": "Aviators", "gp": 28, "win": 7, "loss": 21, "tie": 0, "pts": 14, "runs_for": 168, "runs_against": 262}
        ]
        for team, expected_dict in [(row['team'], row) for row in expected_result]:
            with self.subTest(f'Contains team "{team}"'):
                aggregate_team_data_dict = [item for item in aggregate_team_data if item.get('team') == team]
                len(aggregate_team_data_dict) == 1
                aggregate_team_data_dict = aggregate_team_data_dict[0]
            with self.subTest(f'Results of "{team}"'):
                self.assertDictContainsSubset(aggregate_team_data_dict, expected_dict)

class TestNormalization(unittest.TestCase):

    def test_normalize_key(self):
        header_key_normalization = {
            "date": {"potential_keys": ["Date", "EventDate"]},
            "time": {"potential_keys": ["Time", "EventTime"]},
            "visitor": {"potential_keys": ["Away"]},
            "field":
                            {
                                "potential_keys": ["Field", "Location", "Venue"],
                                "values": [{"original": ["Winnemac"], "normalized": "Winnemac Park"}],
                            }

             }
        # Test cases for normalize_key function
        self.assertEqual(normalize_header_key("Date", header_key_normalization), "date")
        self.assertEqual(normalize_header_key("Time", header_key_normalization), "time")
        self.assertEqual(normalize_header_key("Venue", header_key_normalization), "field")
        self.assertEqual(normalize_header_key("Away", header_key_normalization), "visitor")

    def test_load_config_file(self):
        expected = {
            "win": {"potential_keys": ["w", "wins"]},
            "loss": {"potential_keys": ["l", "losses"]},
            "tie": {"potential_keys": ["t", "ties"]},
            "points": {"potential_keys": ["pts.", "pts", "pt"]},
            "runs_for": {"potential_keys": ["rf", "rs"]},
            "runs_against": {"potential_keys": ["ra"]},
            "division": {"potential_keys": ["div"]},
            "date": {"potential_keys": ["Date", "EventDate"]},
            "time": {"potential_keys": ["Time", "EventTime"]},
            "visitor": {"potential_keys": ["Away"]},
            "field": {
                "potential_keys": ["Field", "Location", "Venue"],
                "values": [{"original": ["Winnemac"], "normalized": "Winnemac Park"}],
            },
            "results": {"potential_keys": ["Final Score", "Score", "Result", "Outcome"]},
            "team": {
                "values": [
                    {
                        "original": ["Hounds", "Chicago Hounds", "Winnemac Hounds", "Hound"],
                        "normalized": "Hounds",
                    },
                    {"original": ["Chicago Red Sox"], "normalized": "Red Sox"},
                ]
            },
        }


        config_path = "normalization.toml"
        config = load_config(config_path)

        self.assertEqual(config, expected)


    def test_normalize_value(self):
        # Test cases for normalize_value function
        team_normalization = {
            "team": {
                "values": [
                    {
                        "original": ["Hounds", "Chicago Hounds", "Winnemac Hounds", "Hound"],
                        "normalized": "Hounds",
                    },
                    {"original": ["Chicago Red Sox"], "normalized": "Red Sox"},
                ]
            }
        }

        # Test case with normalization
        self.assertEqual(normalize_value("Chicago Hounds", 'team', team_normalization), "Hounds")

        # Test case without title case normalization
        # self.assertEqual(normalize_value("red sox", team_normalization, 'team'), "Red Sox")

        # Add more test cases for other values

if __name__ == '__main__':
    unittest.main()

if __name__ == "__main__":
    unittest.main()