Files
baseball-automation/game_report.py
2026-05-02 11:12:13 +09:00

1091 lines
42 KiB
Python

from __future__ import annotations
import argparse
import json
from collections import defaultdict
from datetime import datetime
from pathlib import Path
import re
from typing import Any
import httpx
DEFAULT_GAME_ID = "20260414LTLG02026"
MAX_INNING = 20
TEAM_CODE_MAP = {
"HH": "한화",
"HT": "KIA",
"KT": "KT",
"LG": "LG",
"LT": "롯데",
"NC": "NC",
"OB": "두산",
"SK": "SSG",
"SS": "삼성",
"WO": "키움",
}
HEADERS = {
"User-Agent": "Mozilla/5.0",
"Accept": "application/json, text/plain, */*",
"Accept-Language": "ko-KR,ko;q=0.9",
"Origin": "https://m.sports.naver.com",
"x-sports-backend": "kotlin",
}
SKIP_OPTION_TYPES = {0, 8, 98, 99}
HIDDEN_EVENT_TEXTS = {"투수 투수판 이탈"}
CHANGE_KEYWORDS = ("(으)로 교체", "수비위치 변경")
RESULT_LABELS = {"W": "승리투수", "L": "패전투수", "H": "홀드", "S": "세이브"}
HIDDEN_EVENT_TEXTS.update({"코칭스태프 마운드 방문", "포수 마운드 방문"})
GAME_TYPE_MAP = {
"kbo_r": "정규경기",
"wildcard": "와일드카드",
"wc": "와일드카드",
"semi_playoff": "준플레이오프",
"semi_po": "준플레이오프",
"playoff": "플레이오프",
"po": "플레이오프",
"korean_series": "한국시리즈",
"ks": "한국시리즈",
}
KBO_SR_ID_CANDIDATES = {
"정규경기": ["0", "1", "2", "3", "4", "5", "7", "8", "9"],
"와일드카드": ["3", "0", "1", "2", "4", "5", "7", "8", "9"],
"준플레이오프": ["4", "0", "1", "2", "3", "5", "7", "8", "9"],
"플레이오프": ["5", "0", "1", "2", "3", "4", "7", "8", "9"],
"한국시리즈": ["7", "0", "1", "2", "3", "4", "5", "8", "9"],
}
REVIEW_RESULT_GROUPS = {
"홈런타구 페어 파울": ("페어", "파울"),
"외야타구 페어 파울": ("페어", "파울"),
"포수/태그플레이 아웃/세이프": ("아웃", "세이프"),
"야수의 포구": ("아웃", "세이프"),
"몸에 맞는 공": ("인정", "불인정"),
"파울": ("인정", "불인정"),
"헛스윙": ("인정", "불인정"),
"기타": ("인정", "불인정"),
}
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="라인업, 이닝별 타석 로그, 투수 결과를 json/txt 파일로 저장합니다."
)
parser.add_argument("--game-id", default=DEFAULT_GAME_ID, help="예: 20250425NCSS02025")
parser.add_argument("--output-dir", default="output", help="저장할 폴더 경로")
parser.add_argument("--start-inning", help="시작 이닝 (예: 1, 1T(초), 1B(말))")
parser.add_argument("--end-inning", help="종료 이닝 (예: 9, 9B)")
parser.add_argument("--lineup-only", action="store_true", help="라인업만 포함한 리포트를 생성합니다.")
parser.add_argument("--output-json", help="JSON 저장 경로를 직접 지정합니다.")
return parser.parse_args()
def request_json(client: httpx.Client, url: str) -> dict[str, Any]:
response = client.get(url)
response.raise_for_status()
return response.json()
def option_seqno(option: dict[str, Any]) -> int:
return int(option.get("seqno", -1))
def relay_seqno(relay: dict[str, Any]) -> int:
seqnos = [option_seqno(option) for option in relay.get("textOptions", []) if option.get("seqno") is not None]
return min(seqnos) if seqnos else -1
def get_team_names(game_id: str, game_info: dict[str, Any] | None = None) -> tuple[str, str]:
if game_info:
return game_info["awayTeamName"], game_info["homeTeamName"]
away_code = game_id[8:10]
home_code = game_id[10:12]
return TEAM_CODE_MAP.get(away_code, away_code), TEAM_CODE_MAP.get(home_code, home_code)
def get_starting_pitcher(pitchers: list[dict[str, Any]]) -> dict[str, Any] | None:
if not pitchers:
return None
return min(pitchers, key=lambda pitcher: pitcher.get("seqno", 999))
def get_starting_batters(batters: list[dict[str, Any]]) -> list[dict[str, Any]]:
starters_by_order: dict[int, dict[str, Any]] = {}
for batter in sorted(batters, key=lambda item: (item.get("batOrder", 999), item.get("seqno", 999))):
bat_order = batter.get("batOrder")
if bat_order is None or bat_order in starters_by_order:
continue
starters_by_order[bat_order] = batter
return [starters_by_order[order] for order in sorted(starters_by_order)]
def format_player_line(player: dict[str, Any]) -> str:
number = player.get("backnum") or "-"
handedness = player.get("hitType") or player.get("hittype") or "-"
position = player.get("posName") or player.get("pos") or "투수"
name = player.get("name") or "-"
return f"{name} (#{number}, {position}, {handedness})"
def build_lineup_team(team_name: str, lineup: dict[str, Any]) -> dict[str, Any]:
starter_pitcher = get_starting_pitcher(lineup.get("pitcher", []))
starting_batters = get_starting_batters(lineup.get("batter", []))
return {
"team_name": team_name,
"starter_pitcher": {
"name": starter_pitcher.get("name"),
"position": "투수",
"number": starter_pitcher.get("backnum"),
}
if starter_pitcher
else None,
"players": [
{
"bat_order": batter.get("batOrder"),
"name": batter.get("name"),
"position": batter.get("posName"),
"number": batter.get("backnum"),
}
for batter in starting_batters
],
}
def build_preview_lineup_team(team_name: str, preview_lineup: dict[str, Any] | None) -> dict[str, Any] | None:
if not preview_lineup:
return None
full_lineup = preview_lineup.get("fullLineUp") or []
starter_pitcher = next(
(
player
for player in full_lineup
if player.get("positionName") == "선발투수" or int(player.get("batorder", 0) or 0) == 0
),
None,
)
batters = sorted(
(player for player in full_lineup if int(player.get("batorder", 0) or 0) > 0),
key=lambda player: int(player.get("batorder", 99) or 99),
)
return {
"team_name": team_name,
"starter_pitcher": {
"name": starter_pitcher.get("playerName"),
"position": "투수",
"number": starter_pitcher.get("backnum"),
}
if starter_pitcher
else None,
"players": [
{
"bat_order": int(player.get("batorder")),
"name": player.get("playerName"),
"position": player.get("positionName"),
"number": player.get("backnum"),
}
for player in batters
],
}
def build_lineup_summary(
game_id: str,
game_info: dict[str, Any],
relay_data: dict[str, Any],
preview_data: dict[str, Any] | None = None,
) -> dict[str, Any]:
away_name, home_name = get_team_names(game_id, game_info)
away_preview = build_preview_lineup_team(away_name, (preview_data or {}).get("awayTeamLineUp"))
home_preview = build_preview_lineup_team(home_name, (preview_data or {}).get("homeTeamLineUp"))
return {
"away_team": away_preview or build_lineup_team(away_name, relay_data["awayLineup"]),
"home_team": home_preview or build_lineup_team(home_name, relay_data["homeLineup"]),
}
def infer_game_type(game_info: dict[str, Any]) -> str:
round_code = str(game_info.get("roundCode") or "").lower()
round_name = str(game_info.get("roundName") or "").strip()
if round_name:
return round_name
for key, label in GAME_TYPE_MAP.items():
if key in round_code:
return label
return "정규경기"
def derive_umpires(record_data: dict[str, Any]) -> dict[str, str | None]:
umpire_record = next((item for item in record_data.get("etcRecords", []) if item.get("how") == "심판"), None)
names = umpire_record.get("result", "").split() if umpire_record else []
return {
"chief": names[0] if len(names) > 0 else None,
"first_base": names[1] if len(names) > 1 else None,
"second_base": names[2] if len(names) > 2 else None,
"third_base": names[3] if len(names) > 3 else None,
}
def to_kbo_game_id(game_id: str) -> str:
return f"{game_id[:12]}0"
def build_iso_datetime(game_date: str | None, hhmm: str | None) -> str | None:
if not game_date or not hhmm:
return None
time_text = hhmm.strip()
if not time_text or ":" not in time_text:
return None
hour_text, minute_text = time_text.split(":", 1)
try:
dt = datetime.fromisoformat(f"{game_date}T{int(hour_text):02d}:{int(minute_text):02d}:00")
except ValueError:
return None
return dt.isoformat()
def fetch_kbo_review_meta(client: httpx.Client, game_id: str, game_info: dict[str, Any]) -> dict[str, Any]:
game_type = infer_game_type(game_info)
candidates = KBO_SR_ID_CANDIDATES.get(game_type, KBO_SR_ID_CANDIDATES["정규경기"])
kbo_game_id = to_kbo_game_id(game_id)
for sr_id in candidates:
response = client.post(
"https://www.koreabaseball.com/ws/Schedule.asmx/GetScoreBoardScroll",
data={
"leId": "1",
"srId": sr_id,
"seasonId": str(game_info.get("seasonYear") or ""),
"gameId": kbo_game_id,
},
)
response.raise_for_status()
payload = response.json()
if str(payload.get("code")) != "100":
continue
if not any(payload.get(key) for key in ("END_TM", "START_TM", "USE_TM", "CROWD_CN")):
continue
return payload
return {}
def build_game_info(game_info: dict[str, Any], record_data: dict[str, Any], review_meta: dict[str, Any]) -> dict[str, Any]:
end_time = build_iso_datetime(game_info.get("gameDate"), review_meta.get("END_TM"))
return {
"date": game_info.get("gameDate"),
"stadium": game_info.get("stadium"),
"start_time": game_info.get("gameDateTime"),
"end_time": end_time,
"season": game_info.get("seasonYear"),
"game_type": infer_game_type(game_info),
"home_team": game_info.get("homeTeamName"),
"away_team": game_info.get("awayTeamName"),
"attendance": review_meta.get("CROWD_CN"),
"umpires": derive_umpires(record_data),
}
def get_half_inning_title(relays: list[dict[str, Any]], inning: int, home_or_away: int) -> str:
for relay in relays:
for option in relay.get("textOptions", []):
if option.get("type") == 0:
return option.get("text", "").strip()
half_label = "" if home_or_away == 0 else ""
return f"{inning}{half_label}"
def get_batter_title(relay: dict[str, Any], options: list[dict[str, Any]]) -> str:
batter_title = next((option.get("text", "").strip() for option in options if option.get("type") == 8), "")
if batter_title:
return batter_title
title = (relay.get("title") or "").strip()
if title and "공격" not in title and not title.startswith("="):
return title
return ""
def format_pitch_text(option: dict[str, Any]) -> str:
text = option.get("text", "").strip()
speed = str(option.get("speed") or "").strip()
stuff = str(option.get("stuff") or "").strip()
details = []
if speed:
details.append(f"{speed}km")
if stuff:
details.append(stuff)
return f"{text} ({', '.join(details)})" if details else text
def classify_pitch_result(text: str, code: str | None) -> str:
normalized = text.replace(" ", "")
if any(key in normalized for key in ("번트헛스윙", "헛스윙번트", "번트시도스트라이크")):
return "BS"
if any(key in normalized for key in ("번트파울", "번트파울.")):
return "BF"
if code in {"BS", "BF", "B", "T", "S", "F", "H"}:
return code
if code and code != "V":
return code
mapping = {
"번트 헛스윙": "BS", # Bunt Strike
"번트헛스윙": "BS",
"번트 파울": "BF", # Bunt Foul
"번트파울": "BF",
"": "B",
"스트라이크": "T",
"헛스윙": "S",
"파울": "F",
"타격": "H",
}
for key, value in mapping.items():
if key in text:
return value
return ""
def classify_result_type(text: str) -> str:
clean_text = text.replace(" ", "")
if "낫아웃" in clean_text:
return "strikeout_not_out"
if "고의사구" in text:
return "intentional_walk"
if "볼넷" in text:
return "walk"
if "삼진" in text:
return "strikeout"
if any(k in text for k in ["몸에 맞는 볼", "몸에 맞는 공", "사구", "헤드샷"]):
return "hit_by_pitch"
if "홈런" in text:
return "home_run"
# 1/2/3루타 기본 감지 (runner event에 의해 확장될 수 있음)
if "3루타" in text:
return "triple"
if "2루타" in text:
return "double"
if "번트안타" in text:
return "bunt_hit"
if "1루타" in text or "내야안타" in text:
return "single"
if "실책" in text and "출루" in text:
return "reach_on_error"
if "야수선택" in text:
return "reach_on_fielder_choice"
if "땅볼로 출루" in text or "땅볼출루" in text:
return "reach_on_grounder"
if "희생번트" in text:
return "sacrifice_bunt"
if "희생플라이" in text:
return "sacrifice_fly"
if "병살타" in text:
return "double_play"
if any(k in text for k in ["플라이 아웃", "땅볼 아웃", "인필드플라이 아웃", "라인드라이브 아웃", "직선타 아웃", "라인드라이브", "직선타"]):
return "out"
return "play"
def parse_runner_event(text: str) -> dict[str, Any]:
event_type = "runner_event"
if "도루" in text:
if "실패" in text:
event_type = "steal_fail"
else:
event_type = "steal"
elif "홈인" in text:
event_type = "score"
elif "포스아웃" in text:
event_type = "force_out"
elif "견제사" in text:
event_type = "pickoff_out"
elif "태그아웃" in text:
event_type = "tag_out"
elif "실책" in text:
event_type = "error_advance"
elif "폭투" in text:
event_type = "wild_pitch_advance"
elif "포일" in text:
event_type = "passed_ball_advance"
elif "진루" in text:
event_type = "advance"
from_base = None
to_base = None
for label, base in (("1루주자", 1), ("2루주자", 2), ("3루주자", 3), ("1루", 1), ("2루", 2), ("3루", 3)):
if label in text and from_base is None:
from_base = base
for label, base in (("1루까지", 1), ("2루까지", 2), ("3루까지", 3)):
if label in text:
to_base = base
if "홈인" in text:
to_base = 4
runner_name = text.split(" : ", 1)[0].replace("1루주자 ", "").replace("2루주자 ", "").replace("3루주자 ", "").replace("대주자 ", "").strip()
extra_advance = 0
if "주자의 재치로" in text and from_base is not None and to_base is not None:
extra_advance = max(0, to_base - from_base)
# KBO 매니저 사이트 버튼 매핑용 라벨 (명시적 기록)
clean_text = text.replace(" ", "")
if "실책으로" in clean_text:
action_label = "수비 실책"
elif "도루" in clean_text:
action_label = "도루성공" if "실패" not in clean_text else "도루시도 아웃"
elif "폭투" in clean_text:
action_label = "폭투-진루성공"
elif "포일" in clean_text:
action_label = "포일-진루성공"
elif "태그" in clean_text:
action_label = "태그아웃"
elif "포스" in clean_text:
action_label = "포스아웃"
elif "견제" in clean_text:
action_label = "견제 아웃"
elif any(k in clean_text for k in ["볼넷", "포볼", "고의사구", "몸에맞는", "사구"]):
action_label = "볼넷 진루"
else:
action_label = "일반 진루"
return {
"type": event_type,
"runner": runner_name,
"fromBase": from_base,
"toBase": to_base,
"extra_advance": extra_advance,
"text": text,
"action_label": action_label
}
def normalize_review_result_token(token: str, review_item: str) -> str | None:
token = token.strip()
if not token:
return None
if review_item in {"홈런타구 페어 파울", "외야타구 페어 파울"}:
if "페어" in token:
return "페어"
if "파울" in token:
return "파울"
elif review_item in {"포수/태그플레이 아웃/세이프", "야수의 포구"}:
if "아웃" in token:
return "아웃"
if "세이프" in token:
return "세이프"
elif review_item == "헛스윙":
# 반드시 "노스윙"을 먼저 체크해야 함 ("노스윙"에도 "스윙"이 포함되어 있으므로)
if "불인정" in token or "노스윙" in token or "공포" in token:
return "노스윙"
if "스윙" in token or "인정" in token:
return "스윙"
else:
if "불인정" in token:
return "불인정"
if "인정" in token:
return "인정"
return None
def infer_review_item(detail_text: str) -> str:
if "체크스윙" in detail_text or "스윙" in detail_text:
return "헛스윙"
if "홈런" in detail_text:
return "홈런타구 페어 파울"
if "페어" in detail_text or "파울" in detail_text:
return "외야타구 페어 파울"
if "태그" in detail_text or "견제" in detail_text or "도루" in detail_text or "아웃" in detail_text or "세이프" in detail_text or "타구 관련" in detail_text:
return "포수/태그플레이 아웃/세이프"
if "포구" in detail_text or "노바운드" in detail_text or "바운드" in detail_text:
return "야수의 포구"
if "몸에 맞" in detail_text:
return "몸에 맞는 공"
return "기타"
def parse_review_event(text: str) -> dict[str, Any]:
inning_match = re.search(r"(\d+)회(초|말)", text)
request_team_match = re.search(r"([가-힣A-Za-z]+)요청\s*(?:비디오 판독|합의 판정)", text)
detail_match = re.search(r"(?:비디오 판독|합의 판정):\s*(.+?)\s*([가-힣][가-힣\s]*)→([가-힣][가-힣\s]*)\s*$", text)
detail_text = detail_match.group(1).strip() if detail_match else text
review_item = infer_review_item(detail_text)
before_result = normalize_review_result_token(detail_match.group(2), review_item) if detail_match else None
after_result = normalize_review_result_token(detail_match.group(3), review_item) if detail_match else None
timing = "before_pitch" if "초구 전" in text else "after_pitch"
return {
"type": "video_review",
"text": text,
"requestInningLabel": f"{inning_match.group(1)}{'' if inning_match.group(2) == '' else ''}" if inning_match else None,
"requestTeam": request_team_match.group(1) if request_team_match else None,
"reviewItem": review_item,
"beforeResult": before_result,
"finalResult": after_result,
"isSuccess": "성공" if before_result and after_result and before_result != after_result else "실패",
"timing": timing,
}
def extract_change_actor(text: str) -> tuple[str | None, str | None, str]:
lhs = text.split(" : ", 1)[0].strip()
if "번타자 " in lhs:
order_match = re.search(r"(\d+)번타자\s+(.+)$", lhs)
if order_match:
return "batter", order_match.group(1), order_match.group(2).strip()
for role in ("대타", "대주자", "1루주자", "2루주자", "3루주자", "주자", "투수", "포수", "1루수", "2루수", "3루수", "유격수", "좌익수", "중견수", "우익수"):
if lhs.startswith(role + " "):
return role, None, lhs[len(role):].strip()
return None, None, lhs
def is_merged_pitcher_substitution(actor_role: str | None, in_role: str | None) -> bool:
field_roles = {"포수", "1루수", "2루수", "3루수", "유격수", "좌익수", "중견수", "우익수"}
return actor_role in field_roles and in_role == "투수"
def parse_change_event(text: str) -> dict[str, Any]:
event: dict[str, Any] = {
"event_type": "change",
"change_type": "position_change" if "수비위치 변경" in text else "substitution",
"text": text,
}
actor_role, batter_order, actor_name = extract_change_actor(text)
event["actor_role"] = actor_role
event["actor_name"] = actor_name
if batter_order:
event["bat_order"] = int(batter_order)
if "수비위치 변경" in text:
to_position = text.split(" : ", 1)[1].split("(으)로", 1)[0].strip()
event["player_name"] = actor_name
event["to_position"] = to_position
return event
rhs = text.split(" : ", 1)[1].split("(으)로 교체", 1)[0].strip()
in_role, _, in_name = extract_change_actor(rhs)
event["out_player"] = actor_name
event["in_player"] = in_name
event["in_role"] = in_role
if is_merged_pitcher_substitution(actor_role, in_role):
event["change_type"] = "merged_pitcher_substitution"
event["player_name"] = actor_name
event["to_position"] = "지명타자"
event["pitcher_in_player"] = in_name
return event
if in_role in {"투수", "포수", "1루수", "2루수", "3루수", "유격수", "좌익수", "중견수", "우익수", "대타", "대주자"}:
event["to_position"] = in_role if in_role not in {"대타", "대주자"} else None
return event
def merge_runner_events(runner_events: list[dict[str, Any]]) -> list[dict[str, Any]]:
merged: dict[str, dict[str, Any]] = {}
for r in runner_events:
name = r.get("runner")
if not name:
continue
if name in merged:
merged[name]["type"] = r.get("type", merged[name]["type"])
merged[name]["text"] += f" / {r.get('text', '')}"
if r.get("toBase"):
merged[name]["toBase"] = r["toBase"]
if r.get("extra_advance"):
merged[name]["extra_advance"] = r["extra_advance"]
if "태그아웃" in r.get("text", "") or r.get("type") == "tag_out":
merged[name]["type"] = "tag_out"
else:
merged[name] = r
return list(merged.values())
def build_relay_events(relay: dict[str, Any]) -> list[dict[str, Any]]:
"""하나의 릴레이 블록을 분석하여, 투구 번호 리셋 등을 감지해 여러 개의 타석/교체 이벤트 리스트로 반환합니다."""
options = sorted(relay.get("textOptions", []), key=option_seqno)
# 1. 세그먼트 분리 (pitchNum 1이 새로 나오면 타자가 바뀐 것)
segments: list[list[dict[str, Any]]] = []
current_segment: list[dict[str, Any]] = []
for opt in options:
opt_type = opt.get("type")
# 투구가 1구인데 이미 현재 세그먼트에 투구가 들어있다면 새로운 타자 세그먼트 시작
if opt_type == 1 and opt.get("pitchNum") == 1:
if any(o.get("type") == 1 for o in current_segment):
segments.append(current_segment)
current_segment = []
current_segment.append(opt)
if current_segment:
segments.append(current_segment)
# 2. 각 세그먼트별로 이벤트 객체 생성
results = []
relay_batter_title = get_batter_title(relay, options)
for i, seg_options in enumerate(segments):
seg_changes = []
seg_event_texts = []
seg_pitches = []
seg_runner_events = []
seg_review_events = []
seg_extra_events = []
seg_result_text = None
# 해당 세그먼트만의 타자 이름 찾기
seg_batter_name = None
# 우선 type 8(타자 제목) 옵션이 있는지 확인
seg_batter_name = next((o.get("text", "").strip() for o in seg_options if o.get("type") == 8), None)
for opt in seg_options:
ot = opt.get("type")
txt = opt.get("text", "").strip()
if not txt or ot in SKIP_OPTION_TYPES:
continue
if txt in HIDDEN_EVENT_TEXTS:
continue
if any(k in txt for k in CHANGE_KEYWORDS):
seg_changes.append(parse_change_event(txt))
continue
if ot == 1:
seg_event_texts.append(format_pitch_text(opt))
seg_pitches.append({
"pitchNo": opt.get("pitchNum"),
"pitchResult": classify_pitch_result(txt, opt.get("pitchResult")),
"pitchResultText": txt.replace(f"{opt.get('pitchNum')}", "", 1),
"speedKmh": int(opt["speed"]) if opt.get("speed") not in (None, "") else None,
"pitchType": opt.get("stuff"),
"runnerEvents": [],
})
continue
if ot == 14:
if seg_pitches:
seg_pitches[-1]["runnerEvents"].append(parse_runner_event(txt))
else:
seg_runner_events.append(parse_runner_event(txt))
continue
if ot == 24:
seg_runner_events.append(parse_runner_event(txt))
continue
seg_event_texts.append(txt)
if "비디오 판독" in txt or "합의 판정" in txt:
seg_review_events.append(parse_review_event(txt))
elif "체크스윙" in txt:
seg_extra_events.append({"type": "appeal_or_judgement", "text": txt})
elif any(r in txt for r in ["1루주자", "2루주자", "3루주자", "대주자", "도루", "홈인", "포스아웃"]) or ("진루" in txt and "출루" not in txt):
seg_runner_events.append(parse_runner_event(txt))
else:
seg_result_text = txt
# 결과 텍스트에서 타자 이름 추론 (예: "고종욱 : 좌익수 플라이 아웃")
if " : " in txt and seg_batter_name is None:
name_part = txt.split(" : ", 1)[0].strip()
if name_part and len(name_part) < 10: # 너무 긴 문장은 제외
seg_batter_name = name_part
# 최종 타자 명칭 결정
if not seg_batter_name:
# 첫 세그먼트면 릴레이 전체 타이틀 사용, 아니면 직전 대타 정보 등에서 유추 (일단 타이틀로 보정)
seg_batter_name = relay_batter_title if i == 0 else ""
# 주자 이벤트 병합
for p in seg_pitches:
p["runnerEvents"] = merge_runner_events(p["runnerEvents"])
seg_merged_runner_events = merge_runner_events(seg_runner_events)
# 타자 결과 객체
res_obj = None
if seg_result_text:
base_type = classify_result_type(seg_result_text)
res_obj = {"type": base_type, "text": seg_result_text}
# 타자 본인의 이후 주루 정보 확인 (주루아웃/실책진루 등)
b_name = seg_batter_name.split()[-1] if seg_batter_name else ""
final_runners = []
for r in seg_merged_runner_events:
if b_name and r.get("runner") == b_name:
# 안타성 타구인 경우 주루 결과에 따라 타입 확장
if base_type in {"single", "double", "triple"}:
r_type = r.get("type", "")
# 1) 주루 아웃인 경우
if r_type in {"tag_out", "force_out", "steal_fail", "pickoff_out"}:
res_obj["type"] = f"{base_type}_runner_out"
# 2) 실책으로 인한 추가 진루인 경우
elif r_type == "error_advance":
res_obj["type"] = f"{base_type}_error_advance"
if r.get("toBase"): res_obj["toBase"] = r["toBase"]
if r.get("extra_advance"): res_obj["extra_advance"] = r["extra_advance"]
else:
final_runners.append(r)
seg_merged_runner_events = final_runners
# 세그먼트 결과 조립
if seg_changes:
results.extend(seg_changes)
if seg_event_texts:
full_txt = f"{seg_batter_name} : " + ", ".join(seg_event_texts) if seg_batter_name else ", ".join(seg_event_texts)
results.append({
"event_type": "at_bat",
"batter": seg_batter_name,
"rawText": full_txt,
"pitches": seg_pitches,
"result": res_obj,
"runnerEvents": seg_merged_runner_events,
"reviewEvents": seg_review_events,
"extraEvents": seg_extra_events,
"changes": [] # 세그먼트 내부에서는 별도로 다룸
})
return results
def build_half_inning(inning: int, home_or_away: int, relays: list[dict[str, Any]]) -> dict[str, Any]:
title = get_half_inning_title(relays, inning, home_or_away)
raw_events: list[dict[str, Any]] = []
for relay in sorted(relays, key=relay_seqno):
new_events = build_relay_events(relay)
raw_events.extend(new_events)
merged_events: list[dict[str, Any]] = []
for event in raw_events:
if not merged_events or event.get("event_type") != "at_bat":
merged_events.append(event)
continue
prev = merged_events[-1]
if prev.get("event_type") != "at_bat":
merged_events.append(event)
continue
# 병합 조건: 투구 번호가 1보다 크고 타자 이름이 같거나 유사한 경우
current_pitches = event.get("pitches") or []
first_pitch_no = current_pitches[0].get("pitchNo", 0) if current_pitches else 0
is_same_batter = (prev.get("batter") == event.get("batter"))
if first_pitch_no > 1 or is_same_batter:
# 병합 수행
prev["pitches"].extend(current_pitches)
if event.get("result"):
prev["result"] = event["result"]
if event.get("rawText"):
# "이름 : " 중복 제거하며 병합
current_txt = event["rawText"]
if " : " in current_txt: current_txt = current_txt.split(" : ", 1)[1]
prev["rawText"] += " / " + current_txt
prev["runnerEvents"].extend(event.get("runnerEvents") or [])
prev["reviewEvents"].extend(event.get("reviewEvents") or [])
prev["extraEvents"].extend(event.get("extraEvents") or [])
continue
merged_events.append(event)
return {
"inning": inning,
"half": "top" if home_or_away == 0 else "bottom",
"title": title,
"events": merged_events,
}
def parse_inning_value(val: Any, default: float) -> float:
if val is None:
return default
s = str(val).upper().strip()
if not s:
return default
# 1T, 1B 등 초/말 구분 처리
m = re.match(r"^(\d+)([TB]?)$", s)
if not m:
try:
return float(s)
except:
return default
num = int(m.group(1))
suffix = m.group(2)
if suffix == "T":
return float(num) # .0
if suffix == "B":
return num + 0.5
return float(num)
def collect_inning_data(client: httpx.Client, game_id: str, start_inning_val: str | None = None, end_inning_val: str | None = None) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
innings: list[dict[str, Any]] = []
raw_relays: list[dict[str, Any]] = []
start_score = parse_inning_value(start_inning_val, 0.0)
end_score = parse_inning_value(end_inning_val, 99.0)
for inning in range(1, MAX_INNING + 1):
url = f"https://api-gw.sports.naver.com/schedule/games/{game_id}/relay?inning={inning}"
try:
payload = request_json(client, url)
except Exception:
break
relays = payload.get("result", {}).get("textRelayData", {}).get("textRelays", [])
if not relays:
break
grouped: dict[int, list[dict[str, Any]]] = defaultdict(list)
for relay in relays:
grouped[int(relay.get("homeOrAway", -1))].append(relay)
raw_relays.append(relay)
for home_or_away in (0, 1):
half_relays = grouped.get(home_or_away, [])
if not half_relays:
continue
# 현재 이닝의 수치화 (0: 초=.0, 1: 말=.5)
current_score = inning + (0.5 if home_or_away == 1 else 0.0)
if current_score < start_score or current_score > end_score:
continue
innings.append(build_half_inning(inning, home_or_away, half_relays))
return innings, raw_relays
def extract_pitching_summary(record_data: dict[str, Any]) -> dict[str, list[str]]:
summary = {"승리투수": [], "패전투수": [], "홀드": [], "세이브": []}
for pitcher in record_data.get("pitchingResult", []):
label = RESULT_LABELS.get(pitcher.get("wls"))
if not label:
continue
summary[label].append(pitcher["name"])
return summary
def collect_score_timeline(raw_relays: list[dict[str, Any]]) -> list[dict[str, Any]]:
timeline: list[dict[str, Any]] = []
for relay in raw_relays:
for option in relay.get("textOptions", []):
state = option.get("currentGameState") or {}
if not state:
continue
timeline.append(
{
"seqno": option.get("seqno"),
"home_score": int(state.get("homeScore", 0)),
"away_score": int(state.get("awayScore", 0)),
}
)
timeline.sort(key=lambda item: item["seqno"])
return timeline
def collect_blown_saves(raw_relays: list[dict[str, Any]], away_name: str, home_name: str) -> list[str]:
timeline = collect_score_timeline(raw_relays)
blown_save_pitchers: list[str] = []
pitcher_entries: list[dict[str, Any]] = []
for relay in raw_relays:
inning = int(relay.get("inn", 0) or 0)
if inning < 7:
continue
batting_side = int(relay.get("homeOrAway", -1))
pitcher_team = "home" if batting_side == 0 else "away"
pitcher_team_name = home_name if pitcher_team == "home" else away_name
for option in relay.get("textOptions", []):
if option.get("type") != 2:
continue
player_change = option.get("playerChange") or {}
in_player = player_change.get("inPlayer") or {}
if in_player.get("playerPos") != "투수":
continue
state = option.get("currentGameState") or {}
pitcher_entries.append(
{
"name": in_player.get("playerName"),
"team": pitcher_team,
"team_name": pitcher_team_name,
"entry_seqno": option.get("seqno"),
"home_score": int(state.get("homeScore", 0)),
"away_score": int(state.get("awayScore", 0)),
}
)
for entry in pitcher_entries:
team_score = entry["home_score"] if entry["team"] == "home" else entry["away_score"]
opp_score = entry["away_score"] if entry["team"] == "home" else entry["home_score"]
if team_score <= opp_score:
continue
for state in timeline:
if state["seqno"] <= entry["entry_seqno"]:
continue
current_team_score = state["home_score"] if entry["team"] == "home" else state["away_score"]
current_opp_score = state["away_score"] if entry["team"] == "home" else state["home_score"]
if current_team_score <= current_opp_score:
blown_save_pitchers.append(entry["name"])
break
return sorted(set(blown_save_pitchers))
def build_pitcher_section(record_data: dict[str, Any], raw_relays: list[dict[str, Any]], away_name: str, home_name: str) -> dict[str, list[str]]:
summary = extract_pitching_summary(record_data)
summary["블론세이브"] = collect_blown_saves(raw_relays, away_name, home_name)
return summary
def render_lineup_text(lineup_summary: dict[str, Any]) -> list[str]:
lines = ["[라인업]"]
for team_key in ("away_team", "home_team"):
team = lineup_summary[team_key]
lines.append(f"[{team['team_name']}]")
if team["starter_pitcher"]:
pitcher = team["starter_pitcher"]
lines.append(f"선발투수: {pitcher['name']} (#{pitcher['number']}, {pitcher['position']})")
for player in team["players"]:
lines.append(f"{player['bat_order']}번: {player['name']} (#{player['number']}, {player['position']})")
lines.append("")
if lines[-1] == "":
lines.pop()
return lines
def render_innings_text(innings: list[dict[str, Any]]) -> list[str]:
lines = ["[이닝별 타석 로그]"]
for half_inning in innings:
lines.append(f"[{half_inning['title']}]")
for event in half_inning["events"]:
if event["event_type"] == "at_bat":
lines.append(event["rawText"])
else:
lines.append(event["text"])
lines.append("")
if lines[-1] == "":
lines.pop()
return lines
def render_pitcher_text(pitcher_section: dict[str, list[str]]) -> list[str]:
lines = ["[투수 결과]"]
for label in ("승리투수", "패전투수", "홀드", "세이브", "블론세이브"):
names = pitcher_section.get(label, [])
lines.append(f"{label}: {', '.join(names) if names else '-'}")
return lines
def render_text(report: dict[str, Any]) -> str:
sections = [
render_lineup_text(report["lineups"]),
render_innings_text(report["game_contents"]),
render_pitcher_text(report["pitching_summary"]),
]
return "\n\n".join("\n".join(section) for section in sections)
def save_outputs(report: dict[str, Any], output_dir: Path, output_json: Path | None = None) -> Path:
output_dir.mkdir(parents=True, exist_ok=True)
game_id = report["game_id"]
json_path = output_json or (output_dir / f"{game_id}_report.json")
json_path.parent.mkdir(parents=True, exist_ok=True)
json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8")
return json_path
def filter_report(report: dict[str, Any], inning: str | None = None, lineup_only: bool = False, start_inning: str | None = None, end_inning: str | None = None) -> dict[str, Any]:
filtered = json.loads(json.dumps(report, ensure_ascii=False))
if lineup_only:
filtered["game_contents"] = []
filtered["pitching_summary"] = {
"승리투수": [],
"패전투수": [],
"홀드": [],
"세이브": [],
"블론세이브": [],
}
return filtered
start_v = parse_inning_value(start_inning, 0.0)
end_v = parse_inning_value(end_inning, 99.0)
if inning is not None:
iv = parse_inning_value(inning, 0.0)
start_v = iv
end_v = iv + 0.5
filtered["game_contents"] = [
half_inning for half_inning in filtered.get("game_contents", [])
if start_v <= (float(half_inning.get("inning") or 0) + (0.5 if half_inning.get("half") == "bottom" else 0.0)) <= end_v
]
return filtered
def build_report(game_id: str, start_inning: str | None = None, end_inning: str | None = None) -> dict[str, Any]:
# game_id 정제: 한글, 공백, 하이픈 등 제거하여 순수 API 규격만 남김
game_id = "".join(re.findall(r"[A-Za-z0-9]", game_id))
with httpx.Client(headers=HEADERS, timeout=20.0) as client:
relay_payload = request_json(client, f"https://api-gw.sports.naver.com/schedule/games/{game_id}/relay")
record_payload = request_json(client, f"https://api-gw.sports.naver.com/schedule/games/{game_id}/record?fields=all")
game_payload = request_json(client, f"https://api-gw.sports.naver.com/schedule/games/{game_id}")
preview_payload = request_json(client, f"https://api-gw.sports.naver.com/schedule/games/{game_id}/preview")
relay_data = relay_payload["result"]["textRelayData"]
record_data = record_payload["result"]["recordData"]
game_info = game_payload["result"]["game"]
preview_data = preview_payload["result"].get("previewData") or {}
review_meta = fetch_kbo_review_meta(client, game_id, game_info)
lineup_summary = build_lineup_summary(game_id, game_info, relay_data, preview_data)
innings, raw_relays = collect_inning_data(client, game_id, start_inning_val=start_inning, end_inning_val=end_inning)
pitcher_section = build_pitcher_section(
record_data,
raw_relays,
lineup_summary["away_team"]["team_name"],
lineup_summary["home_team"]["team_name"],
)
return {
"game_id": game_id,
"game_info": build_game_info(game_info, record_data, review_meta),
"lineups": lineup_summary,
"game_contents": innings,
"pitching_summary": pitcher_section,
}
def main() -> None:
args = parse_args()
# game_id 정제 (알파벳+숫자만 추출)
cleaned_id = "".join(re.findall(r"[A-Za-z0-9]", args.game_id))
report = build_report(cleaned_id, start_inning=args.start_inning, end_inning=args.end_inning)
filtered_report = filter_report(
report,
inning=None,
lineup_only=args.lineup_only,
start_inning=args.start_inning,
end_inning=args.end_inning
)
save_outputs(
filtered_report,
Path(args.output_dir),
output_json=Path(args.output_json) if args.output_json else None,
)
if __name__ == "__main__":
main()