""" crawler/naver_api.py — 네이버 스포츠 API HTTP 클라이언트 모든 네이버 API 호출을 캡슐화합니다. """ from __future__ import annotations import re from datetime import datetime from typing import Any import httpx from core.config_loader import ( crawler_headers, game_type_map, kbo_sr_id_candidates, result_labels, team_code_map, ) BASE_URL = "https://api-gw.sports.naver.com/schedule/games" KBO_URL = "https://www.koreabaseball.com/ws/Schedule.asmx/GetScoreBoardScroll" class NaverApiClient: """네이버 스포츠 API 클라이언트 httpx.Client를 래핑하여 게임 정보, relay, 라인업, 기록 등을 가져옵니다. with 문으로 사용하세요: with NaverApiClient() as api: relay = api.fetch_relay(game_id) """ def __init__(self, timeout: float = 20.0): self._client: httpx.Client | None = None self._timeout = timeout def __enter__(self) -> "NaverApiClient": self._client = httpx.Client(headers=crawler_headers(), timeout=self._timeout) return self def __exit__(self, *args: Any) -> None: if self._client: self._client.close() self._client = None @property def client(self) -> httpx.Client: if self._client is None: raise RuntimeError("NaverApiClient는 with 문 안에서 사용하세요.") return self._client def _get_json(self, url: str) -> dict[str, Any]: resp = self.client.get(url) resp.raise_for_status() return resp.json() # ────────────────────────────────────────── # 게임 정보 # ────────────────────────────────────────── def fetch_game_info(self, game_id: str) -> dict[str, Any]: """게임 기본 정보""" payload = self._get_json(f"{BASE_URL}/{game_id}") return payload["result"]["game"] def fetch_relay(self, game_id: str, inning: int | None = None) -> dict[str, Any]: """relay 데이터 (전체 또는 특정 이닝)""" url = f"{BASE_URL}/{game_id}/relay" if inning is not None: url += f"?inning={inning}" payload = self._get_json(url) return payload["result"]["textRelayData"] def fetch_record(self, game_id: str) -> dict[str, Any]: """기록 데이터 (투수/타자 기록)""" payload = self._get_json(f"{BASE_URL}/{game_id}/record?fields=all") return payload["result"]["recordData"] def fetch_preview(self, game_id: str) -> dict[str, Any]: """프리뷰 데이터 (예비 라인업 포함)""" payload = self._get_json(f"{BASE_URL}/{game_id}/preview") return payload["result"].get("previewData") or {} # ────────────────────────────────────────── # KBO 공식 사이트 데이터 # ────────────────────────────────────────── def fetch_kbo_review_meta( self, game_id: str, game_info: dict[str, Any], ) -> dict[str, Any]: """KBO 공식 사이트에서 종료시간/관중수 등 메타 정보 조회""" game_type = infer_game_type(game_info) candidates = kbo_sr_id_candidates().get(game_type, kbo_sr_id_candidates()["정규경기"]) kbo_game_id = to_kbo_game_id(game_id) for sr_id in candidates: resp = self.client.post( KBO_URL, data={ "leId": "1", "srId": sr_id, "seasonId": str(game_info.get("seasonYear") or ""), "gameId": kbo_game_id, }, ) resp.raise_for_status() payload = resp.json() if str(payload.get("code")) != "100": continue if not any(payload.get(key) for key in ("END_TM", "START_TM", "USE_TM", "CROWD_CN")): continue return payload return {} # ────────────────────────────────────────────── # 유틸리티 함수 (순수) # ────────────────────────────────────────────── def clean_game_id(game_id: str) -> str: """game_id에서 알파벳+숫자만 추출""" return "".join(re.findall(r"[A-Za-z0-9]", game_id)) def get_team_names( game_id: str, game_info: dict[str, Any] | None = None, ) -> tuple[str, str]: """game_id 또는 game_info에서 원정/홈 팀명 추출""" if game_info: return game_info["awayTeamName"], game_info["homeTeamName"] code_map = team_code_map() away_code = game_id[8:10] home_code = game_id[10:12] return code_map.get(away_code, away_code), code_map.get(home_code, home_code) def infer_game_type(game_info: dict[str, Any]) -> str: """게임 정보에서 경기유형 추론""" round_code = str(game_info.get("roundCode") or "").lower() round_name = str(game_info.get("roundName") or "").strip() if round_name: return round_name gt_map = game_type_map() for key, label in gt_map.items(): if key in round_code: return label return "정규경기" def to_kbo_game_id(game_id: str) -> str: """네이버 game_id → KBO 공식 game_id""" return f"{game_id[:12]}0" def build_iso_datetime(game_date: str | None, hhmm: str | None) -> str | None: """날짜 + 시:분 → ISO datetime 문자열""" if not game_date or not hhmm: return None time_text = hhmm.strip() if not time_text or ":" not in time_text: return None hour_text, minute_text = time_text.split(":", 1) try: dt = datetime.fromisoformat(f"{game_date}T{int(hour_text):02d}:{int(minute_text):02d}:00") except ValueError: return None return dt.isoformat() def derive_umpires(record_data: dict[str, Any]) -> dict[str, str | None]: """기록 데이터에서 심판 정보 추출""" umpire_record = next( (item for item in record_data.get("etcRecords", []) if item.get("how") == "심판"), None, ) names = umpire_record.get("result", "").split() if umpire_record else [] return { "chief": names[0] if len(names) > 0 else None, "first_base": names[1] if len(names) > 1 else None, "second_base": names[2] if len(names) > 2 else None, "third_base": names[3] if len(names) > 3 else None, } def extract_pitching_summary(record_data: dict[str, Any]) -> dict[str, list[str]]: """기록 데이터에서 투수 결과 요약 추출""" label_map = result_labels() summary: dict[str, list[str]] = {"승리투수": [], "패전투수": [], "홀드": [], "세이브": []} for pitcher in record_data.get("pitchingResult", []): label = label_map.get(pitcher.get("wls")) if label and label in summary: summary[label].append(pitcher["name"]) return summary