""" crawler/report_builder.py — 최종 JSON 리포트 생성 네이버 API 데이터를 수집하고, relay 파싱 결과를 합쳐서 정규화된 게임 리포트 JSON을 생성/저장합니다. """ from __future__ import annotations import json from collections import defaultdict from pathlib import Path from typing import Any from core.config_loader import max_inning from crawler.naver_api import ( NaverApiClient, build_iso_datetime, clean_game_id, derive_umpires, extract_pitching_summary, get_team_names, infer_game_type, ) from crawler.relay_parser import build_half_inning, parse_inning_value from crawler.lineup_builder import build_lineup_summary # ────────────────────────────────────────────── # 이닝 데이터 수집 # ────────────────────────────────────────────── def collect_inning_data( api: NaverApiClient, game_id: str, start_inning_val: str | None = None, end_inning_val: str | None = None, ) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: """모든 이닝 relay 데이터를 수집하여 구조화""" innings: list[dict[str, Any]] = [] raw_relays: list[dict[str, Any]] = [] start_score = parse_inning_value(start_inning_val, 0.0) end_score = parse_inning_value(end_inning_val, 99.0) for inning in range(1, max_inning() + 1): try: relay_data = api.fetch_relay(game_id, inning=inning) except Exception: break relays = relay_data.get("textRelays", []) if not relays: break grouped: dict[int, list[dict[str, Any]]] = defaultdict(list) for relay in relays: grouped[int(relay.get("homeOrAway", -1))].append(relay) raw_relays.append(relay) for home_or_away in (0, 1): half_relays = grouped.get(home_or_away, []) if not half_relays: continue current_score = inning + (0.5 if home_or_away == 1 else 0.0) if current_score < start_score or current_score > end_score: continue innings.append(build_half_inning(inning, home_or_away, half_relays)) return innings, raw_relays # ────────────────────────────────────────────── # 점수 타임라인 & 블론세이브 # ────────────────────────────────────────────── def _collect_score_timeline(raw_relays: list[dict[str, Any]]) -> list[dict[str, Any]]: timeline: list[dict[str, Any]] = [] for relay in raw_relays: for option in relay.get("textOptions", []): state = option.get("currentGameState") or {} if not state: continue timeline.append({ "seqno": option.get("seqno"), "home_score": int(state.get("homeScore", 0)), "away_score": int(state.get("awayScore", 0)), }) timeline.sort(key=lambda item: item["seqno"]) return timeline def _collect_blown_saves( raw_relays: list[dict[str, Any]], away_name: str, home_name: str, ) -> list[str]: timeline = _collect_score_timeline(raw_relays) blown_save_pitchers: list[str] = [] pitcher_entries: list[dict[str, Any]] = [] for relay in raw_relays: inning = int(relay.get("inn", 0) or 0) if inning < 7: continue batting_side = int(relay.get("homeOrAway", -1)) pitcher_team = "home" if batting_side == 0 else "away" pitcher_team_name = home_name if pitcher_team == "home" else away_name for option in relay.get("textOptions", []): if option.get("type") != 2: continue player_change = option.get("playerChange") or {} in_player = player_change.get("inPlayer") or {} if in_player.get("playerPos") != "투수": continue state = option.get("currentGameState") or {} pitcher_entries.append({ "name": in_player.get("playerName"), "team": pitcher_team, "team_name": pitcher_team_name, "entry_seqno": option.get("seqno"), "home_score": int(state.get("homeScore", 0)), "away_score": int(state.get("awayScore", 0)), }) for entry in pitcher_entries: team_score = entry["home_score"] if entry["team"] == "home" else entry["away_score"] opp_score = entry["away_score"] if entry["team"] == "home" else entry["home_score"] if team_score <= opp_score: continue for state in timeline: if state["seqno"] <= entry["entry_seqno"]: continue current_team = state["home_score"] if entry["team"] == "home" else state["away_score"] current_opp = state["away_score"] if entry["team"] == "home" else state["home_score"] if current_team <= current_opp: blown_save_pitchers.append(entry["name"]) break return sorted(set(blown_save_pitchers)) # ────────────────────────────────────────────── # 게임 정보 빌드 # ────────────────────────────────────────────── def _build_game_info( game_info: dict[str, Any], record_data: dict[str, Any], review_meta: dict[str, Any], ) -> dict[str, Any]: end_time = build_iso_datetime(game_info.get("gameDate"), review_meta.get("END_TM")) return { "date": game_info.get("gameDate"), "stadium": game_info.get("stadium"), "start_time": game_info.get("gameDateTime"), "end_time": end_time, "season": game_info.get("seasonYear"), "game_type": infer_game_type(game_info), "home_team": game_info.get("homeTeamName"), "away_team": game_info.get("awayTeamName"), "attendance": review_meta.get("CROWD_CN"), "umpires": derive_umpires(record_data), } def _build_pitcher_section( record_data: dict[str, Any], raw_relays: list[dict[str, Any]], away_name: str, home_name: str, ) -> dict[str, list[str]]: summary = extract_pitching_summary(record_data) summary["블론세이브"] = _collect_blown_saves(raw_relays, away_name, home_name) return summary # ────────────────────────────────────────────── # 리포트 빌드 & 저장 # ────────────────────────────────────────────── def build_report( game_id: str, start_inning: str | None = None, end_inning: str | None = None, ) -> dict[str, Any]: """게임 ID로 전체 리포트 생성 네이버 API 4종 + KBO 메타를 수집하여 정규화된 JSON dict 반환. """ game_id = clean_game_id(game_id) with NaverApiClient() as api: relay_data = api.fetch_relay(game_id) record_data = api.fetch_record(game_id) game_info = api.fetch_game_info(game_id) preview_data = api.fetch_preview(game_id) review_meta = api.fetch_kbo_review_meta(game_id, game_info) lineup_summary = build_lineup_summary(game_id, game_info, relay_data, preview_data) innings, raw_relays = collect_inning_data( api, game_id, start_inning_val=start_inning, end_inning_val=end_inning, ) pitcher_section = _build_pitcher_section( record_data, raw_relays, lineup_summary["away_team"]["team_name"], lineup_summary["home_team"]["team_name"], ) return { "game_id": game_id, "game_info": _build_game_info(game_info, record_data, review_meta), "lineups": lineup_summary, "game_contents": innings, "pitching_summary": pitcher_section, } def filter_report( report: dict[str, Any], inning: str | None = None, lineup_only: bool = False, start_inning: str | None = None, end_inning: str | None = None, ) -> dict[str, Any]: """리포트에서 특정 이닝만 필터링""" filtered = json.loads(json.dumps(report, ensure_ascii=False)) if lineup_only: filtered["game_contents"] = [] filtered["pitching_summary"] = { "승리투수": [], "패전투수": [], "홀드": [], "세이브": [], "블론세이브": [], } return filtered start_v = parse_inning_value(start_inning, 0.0) end_v = parse_inning_value(end_inning, 99.0) if inning is not None: iv = parse_inning_value(inning, 0.0) start_v = iv end_v = iv + 0.5 filtered["game_contents"] = [ half for half in filtered.get("game_contents", []) if start_v <= ( float(half.get("inning") or 0) + (0.5 if half.get("half") == "bottom" else 0.0) ) <= end_v ] return filtered def save_report( report: dict[str, Any], output_dir: Path, output_json: Path | None = None, ) -> Path: """리포트를 JSON 파일로 저장""" output_dir.mkdir(parents=True, exist_ok=True) game_id = report["game_id"] json_path = output_json or (output_dir / f"{game_id}_report.json") json_path.parent.mkdir(parents=True, exist_ok=True) json_path.write_text( json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8", ) return json_path