refactoring
This commit is contained in:
270
crawler/report_builder.py
Normal file
270
crawler/report_builder.py
Normal file
@@ -0,0 +1,270 @@
|
||||
"""
|
||||
crawler/report_builder.py — 최종 JSON 리포트 생성
|
||||
|
||||
네이버 API 데이터를 수집하고, relay 파싱 결과를 합쳐서
|
||||
정규화된 게임 리포트 JSON을 생성/저장합니다.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
from typing import Any
|
||||
|
||||
from core.config_loader import max_inning
|
||||
|
||||
from crawler.naver_api import (
|
||||
NaverApiClient,
|
||||
build_iso_datetime,
|
||||
clean_game_id,
|
||||
derive_umpires,
|
||||
extract_pitching_summary,
|
||||
get_team_names,
|
||||
infer_game_type,
|
||||
)
|
||||
from crawler.relay_parser import build_half_inning, parse_inning_value
|
||||
from crawler.lineup_builder import build_lineup_summary
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# 이닝 데이터 수집
|
||||
# ──────────────────────────────────────────────
|
||||
|
||||
def collect_inning_data(
|
||||
api: NaverApiClient,
|
||||
game_id: str,
|
||||
start_inning_val: str | None = None,
|
||||
end_inning_val: str | None = None,
|
||||
) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
|
||||
"""모든 이닝 relay 데이터를 수집하여 구조화"""
|
||||
innings: list[dict[str, Any]] = []
|
||||
raw_relays: list[dict[str, Any]] = []
|
||||
|
||||
start_score = parse_inning_value(start_inning_val, 0.0)
|
||||
end_score = parse_inning_value(end_inning_val, 99.0)
|
||||
|
||||
for inning in range(1, max_inning() + 1):
|
||||
try:
|
||||
relay_data = api.fetch_relay(game_id, inning=inning)
|
||||
except Exception:
|
||||
break
|
||||
|
||||
relays = relay_data.get("textRelays", [])
|
||||
if not relays:
|
||||
break
|
||||
|
||||
grouped: dict[int, list[dict[str, Any]]] = defaultdict(list)
|
||||
for relay in relays:
|
||||
grouped[int(relay.get("homeOrAway", -1))].append(relay)
|
||||
raw_relays.append(relay)
|
||||
|
||||
for home_or_away in (0, 1):
|
||||
half_relays = grouped.get(home_or_away, [])
|
||||
if not half_relays:
|
||||
continue
|
||||
current_score = inning + (0.5 if home_or_away == 1 else 0.0)
|
||||
if current_score < start_score or current_score > end_score:
|
||||
continue
|
||||
innings.append(build_half_inning(inning, home_or_away, half_relays))
|
||||
|
||||
return innings, raw_relays
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# 점수 타임라인 & 블론세이브
|
||||
# ──────────────────────────────────────────────
|
||||
|
||||
def _collect_score_timeline(raw_relays: list[dict[str, Any]]) -> list[dict[str, Any]]:
|
||||
timeline: list[dict[str, Any]] = []
|
||||
for relay in raw_relays:
|
||||
for option in relay.get("textOptions", []):
|
||||
state = option.get("currentGameState") or {}
|
||||
if not state:
|
||||
continue
|
||||
timeline.append({
|
||||
"seqno": option.get("seqno"),
|
||||
"home_score": int(state.get("homeScore", 0)),
|
||||
"away_score": int(state.get("awayScore", 0)),
|
||||
})
|
||||
timeline.sort(key=lambda item: item["seqno"])
|
||||
return timeline
|
||||
|
||||
|
||||
def _collect_blown_saves(
|
||||
raw_relays: list[dict[str, Any]], away_name: str, home_name: str,
|
||||
) -> list[str]:
|
||||
timeline = _collect_score_timeline(raw_relays)
|
||||
blown_save_pitchers: list[str] = []
|
||||
|
||||
pitcher_entries: list[dict[str, Any]] = []
|
||||
for relay in raw_relays:
|
||||
inning = int(relay.get("inn", 0) or 0)
|
||||
if inning < 7:
|
||||
continue
|
||||
batting_side = int(relay.get("homeOrAway", -1))
|
||||
pitcher_team = "home" if batting_side == 0 else "away"
|
||||
pitcher_team_name = home_name if pitcher_team == "home" else away_name
|
||||
|
||||
for option in relay.get("textOptions", []):
|
||||
if option.get("type") != 2:
|
||||
continue
|
||||
player_change = option.get("playerChange") or {}
|
||||
in_player = player_change.get("inPlayer") or {}
|
||||
if in_player.get("playerPos") != "투수":
|
||||
continue
|
||||
state = option.get("currentGameState") or {}
|
||||
pitcher_entries.append({
|
||||
"name": in_player.get("playerName"),
|
||||
"team": pitcher_team,
|
||||
"team_name": pitcher_team_name,
|
||||
"entry_seqno": option.get("seqno"),
|
||||
"home_score": int(state.get("homeScore", 0)),
|
||||
"away_score": int(state.get("awayScore", 0)),
|
||||
})
|
||||
|
||||
for entry in pitcher_entries:
|
||||
team_score = entry["home_score"] if entry["team"] == "home" else entry["away_score"]
|
||||
opp_score = entry["away_score"] if entry["team"] == "home" else entry["home_score"]
|
||||
if team_score <= opp_score:
|
||||
continue
|
||||
for state in timeline:
|
||||
if state["seqno"] <= entry["entry_seqno"]:
|
||||
continue
|
||||
current_team = state["home_score"] if entry["team"] == "home" else state["away_score"]
|
||||
current_opp = state["away_score"] if entry["team"] == "home" else state["home_score"]
|
||||
if current_team <= current_opp:
|
||||
blown_save_pitchers.append(entry["name"])
|
||||
break
|
||||
|
||||
return sorted(set(blown_save_pitchers))
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# 게임 정보 빌드
|
||||
# ──────────────────────────────────────────────
|
||||
|
||||
def _build_game_info(
|
||||
game_info: dict[str, Any],
|
||||
record_data: dict[str, Any],
|
||||
review_meta: dict[str, Any],
|
||||
) -> dict[str, Any]:
|
||||
end_time = build_iso_datetime(game_info.get("gameDate"), review_meta.get("END_TM"))
|
||||
return {
|
||||
"date": game_info.get("gameDate"),
|
||||
"stadium": game_info.get("stadium"),
|
||||
"start_time": game_info.get("gameDateTime"),
|
||||
"end_time": end_time,
|
||||
"season": game_info.get("seasonYear"),
|
||||
"game_type": infer_game_type(game_info),
|
||||
"home_team": game_info.get("homeTeamName"),
|
||||
"away_team": game_info.get("awayTeamName"),
|
||||
"attendance": review_meta.get("CROWD_CN"),
|
||||
"umpires": derive_umpires(record_data),
|
||||
}
|
||||
|
||||
|
||||
def _build_pitcher_section(
|
||||
record_data: dict[str, Any],
|
||||
raw_relays: list[dict[str, Any]],
|
||||
away_name: str,
|
||||
home_name: str,
|
||||
) -> dict[str, list[str]]:
|
||||
summary = extract_pitching_summary(record_data)
|
||||
summary["블론세이브"] = _collect_blown_saves(raw_relays, away_name, home_name)
|
||||
return summary
|
||||
|
||||
|
||||
# ──────────────────────────────────────────────
|
||||
# 리포트 빌드 & 저장
|
||||
# ──────────────────────────────────────────────
|
||||
|
||||
def build_report(
|
||||
game_id: str,
|
||||
start_inning: str | None = None,
|
||||
end_inning: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""게임 ID로 전체 리포트 생성
|
||||
|
||||
네이버 API 4종 + KBO 메타를 수집하여 정규화된 JSON dict 반환.
|
||||
"""
|
||||
game_id = clean_game_id(game_id)
|
||||
|
||||
with NaverApiClient() as api:
|
||||
relay_data = api.fetch_relay(game_id)
|
||||
record_data = api.fetch_record(game_id)
|
||||
game_info = api.fetch_game_info(game_id)
|
||||
preview_data = api.fetch_preview(game_id)
|
||||
review_meta = api.fetch_kbo_review_meta(game_id, game_info)
|
||||
|
||||
lineup_summary = build_lineup_summary(game_id, game_info, relay_data, preview_data)
|
||||
innings, raw_relays = collect_inning_data(
|
||||
api, game_id,
|
||||
start_inning_val=start_inning,
|
||||
end_inning_val=end_inning,
|
||||
)
|
||||
pitcher_section = _build_pitcher_section(
|
||||
record_data, raw_relays,
|
||||
lineup_summary["away_team"]["team_name"],
|
||||
lineup_summary["home_team"]["team_name"],
|
||||
)
|
||||
|
||||
return {
|
||||
"game_id": game_id,
|
||||
"game_info": _build_game_info(game_info, record_data, review_meta),
|
||||
"lineups": lineup_summary,
|
||||
"game_contents": innings,
|
||||
"pitching_summary": pitcher_section,
|
||||
}
|
||||
|
||||
|
||||
def filter_report(
|
||||
report: dict[str, Any],
|
||||
inning: str | None = None,
|
||||
lineup_only: bool = False,
|
||||
start_inning: str | None = None,
|
||||
end_inning: str | None = None,
|
||||
) -> dict[str, Any]:
|
||||
"""리포트에서 특정 이닝만 필터링"""
|
||||
filtered = json.loads(json.dumps(report, ensure_ascii=False))
|
||||
|
||||
if lineup_only:
|
||||
filtered["game_contents"] = []
|
||||
filtered["pitching_summary"] = {
|
||||
"승리투수": [], "패전투수": [], "홀드": [], "세이브": [], "블론세이브": [],
|
||||
}
|
||||
return filtered
|
||||
|
||||
start_v = parse_inning_value(start_inning, 0.0)
|
||||
end_v = parse_inning_value(end_inning, 99.0)
|
||||
|
||||
if inning is not None:
|
||||
iv = parse_inning_value(inning, 0.0)
|
||||
start_v = iv
|
||||
end_v = iv + 0.5
|
||||
|
||||
filtered["game_contents"] = [
|
||||
half
|
||||
for half in filtered.get("game_contents", [])
|
||||
if start_v <= (
|
||||
float(half.get("inning") or 0)
|
||||
+ (0.5 if half.get("half") == "bottom" else 0.0)
|
||||
) <= end_v
|
||||
]
|
||||
return filtered
|
||||
|
||||
|
||||
def save_report(
|
||||
report: dict[str, Any],
|
||||
output_dir: Path,
|
||||
output_json: Path | None = None,
|
||||
) -> Path:
|
||||
"""리포트를 JSON 파일로 저장"""
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
game_id = report["game_id"]
|
||||
json_path = output_json or (output_dir / f"{game_id}_report.json")
|
||||
json_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
json_path.write_text(
|
||||
json.dumps(report, ensure_ascii=False, indent=2),
|
||||
encoding="utf-8",
|
||||
)
|
||||
return json_path
|
||||
Reference in New Issue
Block a user