Files
baseball-automation/crawler/report_builder.py
2026-05-02 16:24:42 +09:00

271 lines
9.7 KiB
Python

"""
crawler/report_builder.py — 최종 JSON 리포트 생성
네이버 API 데이터를 수집하고, relay 파싱 결과를 합쳐서
정규화된 게임 리포트 JSON을 생성/저장합니다.
"""
from __future__ import annotations
import json
from collections import defaultdict
from pathlib import Path
from typing import Any
from core.config_loader import max_inning
from crawler.naver_api import (
NaverApiClient,
build_iso_datetime,
clean_game_id,
derive_umpires,
extract_pitching_summary,
get_team_names,
infer_game_type,
)
from crawler.relay_parser import build_half_inning, parse_inning_value
from crawler.lineup_builder import build_lineup_summary
# ──────────────────────────────────────────────
# 이닝 데이터 수집
# ──────────────────────────────────────────────
def collect_inning_data(
api: NaverApiClient,
game_id: str,
start_inning_val: str | None = None,
end_inning_val: str | None = None,
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
"""모든 이닝 relay 데이터를 수집하여 구조화"""
innings: list[dict[str, Any]] = []
raw_relays: list[dict[str, Any]] = []
start_score = parse_inning_value(start_inning_val, 0.0)
end_score = parse_inning_value(end_inning_val, 99.0)
for inning in range(1, max_inning() + 1):
try:
relay_data = api.fetch_relay(game_id, inning=inning)
except Exception:
break
relays = relay_data.get("textRelays", [])
if not relays:
break
grouped: dict[int, list[dict[str, Any]]] = defaultdict(list)
for relay in relays:
grouped[int(relay.get("homeOrAway", -1))].append(relay)
raw_relays.append(relay)
for home_or_away in (0, 1):
half_relays = grouped.get(home_or_away, [])
if not half_relays:
continue
current_score = inning + (0.5 if home_or_away == 1 else 0.0)
if current_score < start_score or current_score > end_score:
continue
innings.append(build_half_inning(inning, home_or_away, half_relays))
return innings, raw_relays
# ──────────────────────────────────────────────
# 점수 타임라인 & 블론세이브
# ──────────────────────────────────────────────
def _collect_score_timeline(raw_relays: list[dict[str, Any]]) -> list[dict[str, Any]]:
timeline: list[dict[str, Any]] = []
for relay in raw_relays:
for option in relay.get("textOptions", []):
state = option.get("currentGameState") or {}
if not state:
continue
timeline.append({
"seqno": option.get("seqno"),
"home_score": int(state.get("homeScore", 0)),
"away_score": int(state.get("awayScore", 0)),
})
timeline.sort(key=lambda item: item["seqno"])
return timeline
def _collect_blown_saves(
raw_relays: list[dict[str, Any]], away_name: str, home_name: str,
) -> list[str]:
timeline = _collect_score_timeline(raw_relays)
blown_save_pitchers: list[str] = []
pitcher_entries: list[dict[str, Any]] = []
for relay in raw_relays:
inning = int(relay.get("inn", 0) or 0)
if inning < 7:
continue
batting_side = int(relay.get("homeOrAway", -1))
pitcher_team = "home" if batting_side == 0 else "away"
pitcher_team_name = home_name if pitcher_team == "home" else away_name
for option in relay.get("textOptions", []):
if option.get("type") != 2:
continue
player_change = option.get("playerChange") or {}
in_player = player_change.get("inPlayer") or {}
if in_player.get("playerPos") != "투수":
continue
state = option.get("currentGameState") or {}
pitcher_entries.append({
"name": in_player.get("playerName"),
"team": pitcher_team,
"team_name": pitcher_team_name,
"entry_seqno": option.get("seqno"),
"home_score": int(state.get("homeScore", 0)),
"away_score": int(state.get("awayScore", 0)),
})
for entry in pitcher_entries:
team_score = entry["home_score"] if entry["team"] == "home" else entry["away_score"]
opp_score = entry["away_score"] if entry["team"] == "home" else entry["home_score"]
if team_score <= opp_score:
continue
for state in timeline:
if state["seqno"] <= entry["entry_seqno"]:
continue
current_team = state["home_score"] if entry["team"] == "home" else state["away_score"]
current_opp = state["away_score"] if entry["team"] == "home" else state["home_score"]
if current_team <= current_opp:
blown_save_pitchers.append(entry["name"])
break
return sorted(set(blown_save_pitchers))
# ──────────────────────────────────────────────
# 게임 정보 빌드
# ──────────────────────────────────────────────
def _build_game_info(
game_info: dict[str, Any],
record_data: dict[str, Any],
review_meta: dict[str, Any],
) -> dict[str, Any]:
end_time = build_iso_datetime(game_info.get("gameDate"), review_meta.get("END_TM"))
return {
"date": game_info.get("gameDate"),
"stadium": game_info.get("stadium"),
"start_time": game_info.get("gameDateTime"),
"end_time": end_time,
"season": game_info.get("seasonYear"),
"game_type": infer_game_type(game_info),
"home_team": game_info.get("homeTeamName"),
"away_team": game_info.get("awayTeamName"),
"attendance": review_meta.get("CROWD_CN"),
"umpires": derive_umpires(record_data),
}
def _build_pitcher_section(
record_data: dict[str, Any],
raw_relays: list[dict[str, Any]],
away_name: str,
home_name: str,
) -> dict[str, list[str]]:
summary = extract_pitching_summary(record_data)
summary["블론세이브"] = _collect_blown_saves(raw_relays, away_name, home_name)
return summary
# ──────────────────────────────────────────────
# 리포트 빌드 & 저장
# ──────────────────────────────────────────────
def build_report(
game_id: str,
start_inning: str | None = None,
end_inning: str | None = None,
) -> dict[str, Any]:
"""게임 ID로 전체 리포트 생성
네이버 API 4종 + KBO 메타를 수집하여 정규화된 JSON dict 반환.
"""
game_id = clean_game_id(game_id)
with NaverApiClient() as api:
relay_data = api.fetch_relay(game_id)
record_data = api.fetch_record(game_id)
game_info = api.fetch_game_info(game_id)
preview_data = api.fetch_preview(game_id)
review_meta = api.fetch_kbo_review_meta(game_id, game_info)
lineup_summary = build_lineup_summary(game_id, game_info, relay_data, preview_data)
innings, raw_relays = collect_inning_data(
api, game_id,
start_inning_val=start_inning,
end_inning_val=end_inning,
)
pitcher_section = _build_pitcher_section(
record_data, raw_relays,
lineup_summary["away_team"]["team_name"],
lineup_summary["home_team"]["team_name"],
)
return {
"game_id": game_id,
"game_info": _build_game_info(game_info, record_data, review_meta),
"lineups": lineup_summary,
"game_contents": innings,
"pitching_summary": pitcher_section,
}
def filter_report(
report: dict[str, Any],
inning: str | None = None,
lineup_only: bool = False,
start_inning: str | None = None,
end_inning: str | None = None,
) -> dict[str, Any]:
"""리포트에서 특정 이닝만 필터링"""
filtered = json.loads(json.dumps(report, ensure_ascii=False))
if lineup_only:
filtered["game_contents"] = []
filtered["pitching_summary"] = {
"승리투수": [], "패전투수": [], "홀드": [], "세이브": [], "블론세이브": [],
}
return filtered
start_v = parse_inning_value(start_inning, 0.0)
end_v = parse_inning_value(end_inning, 99.0)
if inning is not None:
iv = parse_inning_value(inning, 0.0)
start_v = iv
end_v = iv + 0.5
filtered["game_contents"] = [
half
for half in filtered.get("game_contents", [])
if start_v <= (
float(half.get("inning") or 0)
+ (0.5 if half.get("half") == "bottom" else 0.0)
) <= end_v
]
return filtered
def save_report(
report: dict[str, Any],
output_dir: Path,
output_json: Path | None = None,
) -> Path:
"""리포트를 JSON 파일로 저장"""
output_dir.mkdir(parents=True, exist_ok=True)
game_id = report["game_id"]
json_path = output_json or (output_dir / f"{game_id}_report.json")
json_path.parent.mkdir(parents=True, exist_ok=True)
json_path.write_text(
json.dumps(report, ensure_ascii=False, indent=2),
encoding="utf-8",
)
return json_path