Files
baseball-automation/crawler/naver_api.py
2026-05-02 16:24:42 +09:00

198 lines
7.1 KiB
Python

"""
crawler/naver_api.py — 네이버 스포츠 API HTTP 클라이언트
모든 네이버 API 호출을 캡슐화합니다.
"""
from __future__ import annotations
import re
from datetime import datetime
from typing import Any
import httpx
from core.config_loader import (
crawler_headers,
game_type_map,
kbo_sr_id_candidates,
result_labels,
team_code_map,
)
BASE_URL = "https://api-gw.sports.naver.com/schedule/games"
KBO_URL = "https://www.koreabaseball.com/ws/Schedule.asmx/GetScoreBoardScroll"
class NaverApiClient:
"""네이버 스포츠 API 클라이언트
httpx.Client를 래핑하여 게임 정보, relay, 라인업, 기록 등을 가져옵니다.
with 문으로 사용하세요:
with NaverApiClient() as api:
relay = api.fetch_relay(game_id)
"""
def __init__(self, timeout: float = 20.0):
self._client: httpx.Client | None = None
self._timeout = timeout
def __enter__(self) -> "NaverApiClient":
self._client = httpx.Client(headers=crawler_headers(), timeout=self._timeout)
return self
def __exit__(self, *args: Any) -> None:
if self._client:
self._client.close()
self._client = None
@property
def client(self) -> httpx.Client:
if self._client is None:
raise RuntimeError("NaverApiClient는 with 문 안에서 사용하세요.")
return self._client
def _get_json(self, url: str) -> dict[str, Any]:
resp = self.client.get(url)
resp.raise_for_status()
return resp.json()
# ──────────────────────────────────────────
# 게임 정보
# ──────────────────────────────────────────
def fetch_game_info(self, game_id: str) -> dict[str, Any]:
"""게임 기본 정보"""
payload = self._get_json(f"{BASE_URL}/{game_id}")
return payload["result"]["game"]
def fetch_relay(self, game_id: str, inning: int | None = None) -> dict[str, Any]:
"""relay 데이터 (전체 또는 특정 이닝)"""
url = f"{BASE_URL}/{game_id}/relay"
if inning is not None:
url += f"?inning={inning}"
payload = self._get_json(url)
return payload["result"]["textRelayData"]
def fetch_record(self, game_id: str) -> dict[str, Any]:
"""기록 데이터 (투수/타자 기록)"""
payload = self._get_json(f"{BASE_URL}/{game_id}/record?fields=all")
return payload["result"]["recordData"]
def fetch_preview(self, game_id: str) -> dict[str, Any]:
"""프리뷰 데이터 (예비 라인업 포함)"""
payload = self._get_json(f"{BASE_URL}/{game_id}/preview")
return payload["result"].get("previewData") or {}
# ──────────────────────────────────────────
# KBO 공식 사이트 데이터
# ──────────────────────────────────────────
def fetch_kbo_review_meta(
self, game_id: str, game_info: dict[str, Any],
) -> dict[str, Any]:
"""KBO 공식 사이트에서 종료시간/관중수 등 메타 정보 조회"""
game_type = infer_game_type(game_info)
candidates = kbo_sr_id_candidates().get(game_type, kbo_sr_id_candidates()["정규경기"])
kbo_game_id = to_kbo_game_id(game_id)
for sr_id in candidates:
resp = self.client.post(
KBO_URL,
data={
"leId": "1",
"srId": sr_id,
"seasonId": str(game_info.get("seasonYear") or ""),
"gameId": kbo_game_id,
},
)
resp.raise_for_status()
payload = resp.json()
if str(payload.get("code")) != "100":
continue
if not any(payload.get(key) for key in ("END_TM", "START_TM", "USE_TM", "CROWD_CN")):
continue
return payload
return {}
# ──────────────────────────────────────────────
# 유틸리티 함수 (순수)
# ──────────────────────────────────────────────
def clean_game_id(game_id: str) -> str:
"""game_id에서 알파벳+숫자만 추출"""
return "".join(re.findall(r"[A-Za-z0-9]", game_id))
def get_team_names(
game_id: str, game_info: dict[str, Any] | None = None,
) -> tuple[str, str]:
"""game_id 또는 game_info에서 원정/홈 팀명 추출"""
if game_info:
return game_info["awayTeamName"], game_info["homeTeamName"]
code_map = team_code_map()
away_code = game_id[8:10]
home_code = game_id[10:12]
return code_map.get(away_code, away_code), code_map.get(home_code, home_code)
def infer_game_type(game_info: dict[str, Any]) -> str:
"""게임 정보에서 경기유형 추론"""
round_code = str(game_info.get("roundCode") or "").lower()
round_name = str(game_info.get("roundName") or "").strip()
if round_name:
return round_name
gt_map = game_type_map()
for key, label in gt_map.items():
if key in round_code:
return label
return "정규경기"
def to_kbo_game_id(game_id: str) -> str:
"""네이버 game_id → KBO 공식 game_id"""
return f"{game_id[:12]}0"
def build_iso_datetime(game_date: str | None, hhmm: str | None) -> str | None:
"""날짜 + 시:분 → ISO datetime 문자열"""
if not game_date or not hhmm:
return None
time_text = hhmm.strip()
if not time_text or ":" not in time_text:
return None
hour_text, minute_text = time_text.split(":", 1)
try:
dt = datetime.fromisoformat(f"{game_date}T{int(hour_text):02d}:{int(minute_text):02d}:00")
except ValueError:
return None
return dt.isoformat()
def derive_umpires(record_data: dict[str, Any]) -> dict[str, str | None]:
"""기록 데이터에서 심판 정보 추출"""
umpire_record = next(
(item for item in record_data.get("etcRecords", []) if item.get("how") == "심판"),
None,
)
names = umpire_record.get("result", "").split() if umpire_record else []
return {
"chief": names[0] if len(names) > 0 else None,
"first_base": names[1] if len(names) > 1 else None,
"second_base": names[2] if len(names) > 2 else None,
"third_base": names[3] if len(names) > 3 else None,
}
def extract_pitching_summary(record_data: dict[str, Any]) -> dict[str, list[str]]:
"""기록 데이터에서 투수 결과 요약 추출"""
label_map = result_labels()
summary: dict[str, list[str]] = {"승리투수": [], "패전투수": [], "홀드": [], "세이브": []}
for pitcher in record_data.get("pitchingResult", []):
label = label_map.get(pitcher.get("wls"))
if label and label in summary:
summary[label].append(pitcher["name"])
return summary