from __future__ import annotations import argparse import json import re from pathlib import Path from typing import Any from record_game_playwright import PITCH_RESULT_LABEL_MAP, infer_batter_result_label, infer_runner_action_label from register_game_playwright import DEFAULT_GAME_ID, DEFAULT_REPORT_DIR, load_report def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="history.txt와 report.json의 기록 시퀀스를 비교합니다.") parser.add_argument("--game-id", default=DEFAULT_GAME_ID) parser.add_argument("--report-path") parser.add_argument("--history-path", default="history.txt") parser.add_argument("--output-json") parser.add_argument("--output-txt") return parser.parse_args() def report_path_from_args(args: argparse.Namespace) -> Path: if args.report_path: return Path(args.report_path) return DEFAULT_REPORT_DIR / f"{args.game_id}_report.json" def output_paths(args: argparse.Namespace) -> tuple[Path, Path]: if args.output_json: json_path = Path(args.output_json) else: json_path = Path("output") / f"{args.game_id}_history_compare.json" if args.output_txt: txt_path = Path(args.output_txt) else: txt_path = Path("output") / f"{args.game_id}_history_compare.txt" json_path.parent.mkdir(parents=True, exist_ok=True) txt_path.parent.mkdir(parents=True, exist_ok=True) return json_path, txt_path def normalize_name(text: str) -> str: text = (text or "").replace("*", "").strip() text = re.sub(r"\s+", " ", text) return text def normalize_entry(text: str) -> str: text = normalize_name(text) text = text.replace(" - ", "-") text = re.sub(r"\s+", "", text) return text def batter_name(batter_text: str) -> str: match = re.search(r"\d+번타자\s+(.+)$", batter_text or "") return normalize_name(match.group(1) if match else (batter_text or "")) def runner_name(runner_text: str) -> str: match = re.search(r"[123]루주자\s+(.+?)\s*:", runner_text or "") if match: return normalize_name(match.group(1)) match = re.search(r"주자\s+(.+?)\s*:", runner_text or "") return normalize_name(match.group(1) if match else "") def history_entries_from_text(raw: str) -> list[str]: collapsed = re.sub(r"\r?\n+", "", raw.strip()) if not collapsed: return [] collapsed = re.sub(r"(?=(?:타자|[123]루주자)\s*:)", "\n", collapsed) return [line.strip() for line in collapsed.splitlines() if line.strip()] def pitch_label(pitch: dict[str, Any]) -> str | None: result_text = (pitch.get("pitchResultText") or "").strip() if result_text == "타격": return None return PITCH_RESULT_LABEL_MAP.get(result_text, result_text or None) def expected_entries(report: dict[str, Any]) -> list[str]: entries: list[str] = [] for half in report.get("game_contents") or []: for event in half.get("events") or []: if event.get("event_type") != "at_bat": continue batter = batter_name(event.get("batter") or "") pitches = event.get("pitches") or [] for pitch in pitches: label = pitch_label(pitch) if label: entries.append(f"타자 : {batter} - {label}") for runner_event in pitch.get("runnerEvents") or []: from_base = runner_event.get("fromBase") label = infer_runner_action_label(event, runner_event) name = runner_name(runner_event.get("text") or "") if from_base and name and label: entries.append(f"{from_base}루주자 : {name} - {label}") result = event.get("result") or {} result_label = infer_batter_result_label(result, event) if batter and result_label: entries.append(f"타자 : {batter} - {result_label}") for runner_event in event.get("runnerEvents") or []: from_base = runner_event.get("fromBase") label = infer_runner_action_label(event, runner_event) name = runner_name(runner_event.get("text") or "") if from_base and name and label: entries.append(f"{from_base}루주자 : {name} - {label}") return entries def compare_sequences(expected: list[str], actual: list[str]) -> dict[str, Any]: expected_norm = [normalize_entry(item) for item in expected] actual_norm = [normalize_entry(item) for item in actual] mismatch_index = None mismatch = None for index, (left, right) in enumerate(zip(expected_norm, actual_norm)): if left != right: mismatch_index = index mismatch = { "index": index, "expected": expected[index], "actual": actual[index], } break missing = [] extra = [] if len(expected) > len(actual): missing = expected[len(actual):] elif len(actual) > len(expected): extra = actual[len(expected):] return { "expected_count": len(expected), "actual_count": len(actual), "matches_exactly": expected_norm == actual_norm, "first_mismatch": mismatch, "missing_tail": missing[:50], "extra_tail": extra[:50], } def build_text_summary(result: dict[str, Any], expected: list[str], actual: list[str]) -> str: lines = [ f"expected_count: {result['expected_count']}", f"actual_count: {result['actual_count']}", f"matches_exactly: {result['matches_exactly']}", ] mismatch = result.get("first_mismatch") if mismatch: lines.extend( [ "", f"first_mismatch_index: {mismatch['index']}", f"expected: {mismatch['expected']}", f"actual: {mismatch['actual']}", ] ) if result.get("missing_tail"): lines.append("") lines.append("missing_tail:") lines.extend(f"- {item}" for item in result["missing_tail"]) if result.get("extra_tail"): lines.append("") lines.append("extra_tail:") lines.extend(f"- {item}" for item in result["extra_tail"]) return "\n".join(lines) + "\n" def main() -> None: args = parse_args() report = load_report(report_path_from_args(args)) history_path = Path(args.history_path) raw_history = history_path.read_text(encoding="utf-8") actual = history_entries_from_text(raw_history) expected = expected_entries(report) result = compare_sequences(expected, actual) payload = { "game_id": report.get("game_id") or args.game_id, "history_path": str(history_path), "report_path": str(report_path_from_args(args)), "comparison": result, "expected_preview": expected[:200], "actual_preview": actual[:200], } json_path, txt_path = output_paths(args) json_path.write_text(json.dumps(payload, ensure_ascii=False, indent=2), encoding="utf-8") txt_path.write_text(build_text_summary(result, expected, actual), encoding="utf-8") print(f"비교 완료: {json_path}") print(f"비교 요약: {txt_path}") if __name__ == "__main__": main()