from __future__ import annotations import argparse import json from collections import defaultdict from datetime import datetime from pathlib import Path import re from typing import Any import httpx DEFAULT_GAME_ID = "20260414LTLG02026" MAX_INNING = 20 TEAM_CODE_MAP = { "HH": "한화", "HT": "KIA", "KT": "KT", "LG": "LG", "LT": "롯데", "NC": "NC", "OB": "두산", "SK": "SSG", "SS": "삼성", "WO": "키움", } HEADERS = { "User-Agent": "Mozilla/5.0", "Accept": "application/json, text/plain, */*", "Accept-Language": "ko-KR,ko;q=0.9", "Origin": "https://m.sports.naver.com", "x-sports-backend": "kotlin", } SKIP_OPTION_TYPES = {0, 8, 98, 99} HIDDEN_EVENT_TEXTS = {"투수 투수판 이탈"} CHANGE_KEYWORDS = ("(으)로 교체", "수비위치 변경") RESULT_LABELS = {"W": "승리투수", "L": "패전투수", "H": "홀드", "S": "세이브"} HIDDEN_EVENT_TEXTS.update({"코칭스태프 마운드 방문", "포수 마운드 방문"}) GAME_TYPE_MAP = { "kbo_r": "정규경기", "wildcard": "와일드카드", "wc": "와일드카드", "semi_playoff": "준플레이오프", "semi_po": "준플레이오프", "playoff": "플레이오프", "po": "플레이오프", "korean_series": "한국시리즈", "ks": "한국시리즈", } KBO_SR_ID_CANDIDATES = { "정규경기": ["0", "1", "2", "3", "4", "5", "7", "8", "9"], "와일드카드": ["3", "0", "1", "2", "4", "5", "7", "8", "9"], "준플레이오프": ["4", "0", "1", "2", "3", "5", "7", "8", "9"], "플레이오프": ["5", "0", "1", "2", "3", "4", "7", "8", "9"], "한국시리즈": ["7", "0", "1", "2", "3", "4", "5", "8", "9"], } REVIEW_RESULT_GROUPS = { "홈런타구 페어 파울": ("페어", "파울"), "외야타구 페어 파울": ("페어", "파울"), "포수/태그플레이 아웃/세이프": ("아웃", "세이프"), "야수의 포구": ("아웃", "세이프"), "몸에 맞는 공": ("인정", "불인정"), "파울": ("인정", "불인정"), "헛스윙": ("인정", "불인정"), "기타": ("인정", "불인정"), } def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser( description="라인업, 이닝별 타석 로그, 투수 결과를 json/txt 파일로 저장합니다." ) parser.add_argument("--game-id", default=DEFAULT_GAME_ID, help="예: 20250425NCSS02025") parser.add_argument("--output-dir", default="output", help="저장할 폴더 경로") parser.add_argument("--start-inning", help="시작 이닝 (예: 1, 1T(초), 1B(말))") parser.add_argument("--end-inning", help="종료 이닝 (예: 9, 9B)") parser.add_argument("--lineup-only", action="store_true", help="라인업만 포함한 리포트를 생성합니다.") parser.add_argument("--output-json", help="JSON 저장 경로를 직접 지정합니다.") return parser.parse_args() def request_json(client: httpx.Client, url: str) -> dict[str, Any]: response = client.get(url) response.raise_for_status() return response.json() def option_seqno(option: dict[str, Any]) -> int: return int(option.get("seqno", -1)) def relay_seqno(relay: dict[str, Any]) -> int: seqnos = [option_seqno(option) for option in relay.get("textOptions", []) if option.get("seqno") is not None] return min(seqnos) if seqnos else -1 def get_team_names(game_id: str, game_info: dict[str, Any] | None = None) -> tuple[str, str]: if game_info: return game_info["awayTeamName"], game_info["homeTeamName"] away_code = game_id[8:10] home_code = game_id[10:12] return TEAM_CODE_MAP.get(away_code, away_code), TEAM_CODE_MAP.get(home_code, home_code) def get_starting_pitcher(pitchers: list[dict[str, Any]]) -> dict[str, Any] | None: if not pitchers: return None return min(pitchers, key=lambda pitcher: pitcher.get("seqno", 999)) def get_starting_batters(batters: list[dict[str, Any]]) -> list[dict[str, Any]]: starters_by_order: dict[int, dict[str, Any]] = {} for batter in sorted(batters, key=lambda item: (item.get("batOrder", 999), item.get("seqno", 999))): bat_order = batter.get("batOrder") if bat_order is None or bat_order in starters_by_order: continue starters_by_order[bat_order] = batter return [starters_by_order[order] for order in sorted(starters_by_order)] def format_player_line(player: dict[str, Any]) -> str: number = player.get("backnum") or "-" handedness = player.get("hitType") or player.get("hittype") or "-" position = player.get("posName") or player.get("pos") or "투수" name = player.get("name") or "-" return f"{name} (#{number}, {position}, {handedness})" def build_lineup_team(team_name: str, lineup: dict[str, Any]) -> dict[str, Any]: starter_pitcher = get_starting_pitcher(lineup.get("pitcher", [])) starting_batters = get_starting_batters(lineup.get("batter", [])) return { "team_name": team_name, "starter_pitcher": { "name": starter_pitcher.get("name"), "position": "투수", "number": starter_pitcher.get("backnum"), } if starter_pitcher else None, "players": [ { "bat_order": batter.get("batOrder"), "name": batter.get("name"), "position": batter.get("posName"), "number": batter.get("backnum"), } for batter in starting_batters ], } def build_preview_lineup_team(team_name: str, preview_lineup: dict[str, Any] | None) -> dict[str, Any] | None: if not preview_lineup: return None full_lineup = preview_lineup.get("fullLineUp") or [] starter_pitcher = next( ( player for player in full_lineup if player.get("positionName") == "선발투수" or int(player.get("batorder", 0) or 0) == 0 ), None, ) batters = sorted( (player for player in full_lineup if int(player.get("batorder", 0) or 0) > 0), key=lambda player: int(player.get("batorder", 99) or 99), ) return { "team_name": team_name, "starter_pitcher": { "name": starter_pitcher.get("playerName"), "position": "투수", "number": starter_pitcher.get("backnum"), } if starter_pitcher else None, "players": [ { "bat_order": int(player.get("batorder")), "name": player.get("playerName"), "position": player.get("positionName"), "number": player.get("backnum"), } for player in batters ], } def build_lineup_summary( game_id: str, game_info: dict[str, Any], relay_data: dict[str, Any], preview_data: dict[str, Any] | None = None, ) -> dict[str, Any]: away_name, home_name = get_team_names(game_id, game_info) away_preview = build_preview_lineup_team(away_name, (preview_data or {}).get("awayTeamLineUp")) home_preview = build_preview_lineup_team(home_name, (preview_data or {}).get("homeTeamLineUp")) return { "away_team": away_preview or build_lineup_team(away_name, relay_data["awayLineup"]), "home_team": home_preview or build_lineup_team(home_name, relay_data["homeLineup"]), } def infer_game_type(game_info: dict[str, Any]) -> str: round_code = str(game_info.get("roundCode") or "").lower() round_name = str(game_info.get("roundName") or "").strip() if round_name: return round_name for key, label in GAME_TYPE_MAP.items(): if key in round_code: return label return "정규경기" def derive_umpires(record_data: dict[str, Any]) -> dict[str, str | None]: umpire_record = next((item for item in record_data.get("etcRecords", []) if item.get("how") == "심판"), None) names = umpire_record.get("result", "").split() if umpire_record else [] return { "chief": names[0] if len(names) > 0 else None, "first_base": names[1] if len(names) > 1 else None, "second_base": names[2] if len(names) > 2 else None, "third_base": names[3] if len(names) > 3 else None, } def to_kbo_game_id(game_id: str) -> str: return f"{game_id[:12]}0" def build_iso_datetime(game_date: str | None, hhmm: str | None) -> str | None: if not game_date or not hhmm: return None time_text = hhmm.strip() if not time_text or ":" not in time_text: return None hour_text, minute_text = time_text.split(":", 1) try: dt = datetime.fromisoformat(f"{game_date}T{int(hour_text):02d}:{int(minute_text):02d}:00") except ValueError: return None return dt.isoformat() def fetch_kbo_review_meta(client: httpx.Client, game_id: str, game_info: dict[str, Any]) -> dict[str, Any]: game_type = infer_game_type(game_info) candidates = KBO_SR_ID_CANDIDATES.get(game_type, KBO_SR_ID_CANDIDATES["정규경기"]) kbo_game_id = to_kbo_game_id(game_id) for sr_id in candidates: response = client.post( "https://www.koreabaseball.com/ws/Schedule.asmx/GetScoreBoardScroll", data={ "leId": "1", "srId": sr_id, "seasonId": str(game_info.get("seasonYear") or ""), "gameId": kbo_game_id, }, ) response.raise_for_status() payload = response.json() if str(payload.get("code")) != "100": continue if not any(payload.get(key) for key in ("END_TM", "START_TM", "USE_TM", "CROWD_CN")): continue return payload return {} def build_game_info(game_info: dict[str, Any], record_data: dict[str, Any], review_meta: dict[str, Any]) -> dict[str, Any]: end_time = build_iso_datetime(game_info.get("gameDate"), review_meta.get("END_TM")) return { "date": game_info.get("gameDate"), "stadium": game_info.get("stadium"), "start_time": game_info.get("gameDateTime"), "end_time": end_time, "season": game_info.get("seasonYear"), "game_type": infer_game_type(game_info), "home_team": game_info.get("homeTeamName"), "away_team": game_info.get("awayTeamName"), "attendance": review_meta.get("CROWD_CN"), "umpires": derive_umpires(record_data), } def get_half_inning_title(relays: list[dict[str, Any]], inning: int, home_or_away: int) -> str: for relay in relays: for option in relay.get("textOptions", []): if option.get("type") == 0: return option.get("text", "").strip() half_label = "초" if home_or_away == 0 else "말" return f"{inning}회{half_label}" def get_batter_title(relay: dict[str, Any], options: list[dict[str, Any]]) -> str: batter_title = next((option.get("text", "").strip() for option in options if option.get("type") == 8), "") if batter_title: return batter_title title = (relay.get("title") or "").strip() if title and "공격" not in title and not title.startswith("="): return title return "" def format_pitch_text(option: dict[str, Any]) -> str: text = option.get("text", "").strip() speed = str(option.get("speed") or "").strip() stuff = str(option.get("stuff") or "").strip() details = [] if speed: details.append(f"{speed}km") if stuff: details.append(stuff) return f"{text} ({', '.join(details)})" if details else text def classify_pitch_result(text: str, code: str | None) -> str: normalized = text.replace(" ", "") if any(key in normalized for key in ("번트헛스윙", "헛스윙번트", "번트시도스트라이크")): return "BS" if any(key in normalized for key in ("번트파울", "번트파울.")): return "BF" if code in {"BS", "BF", "B", "T", "S", "F", "H"}: return code if code and code != "V": return code mapping = { "번트 헛스윙": "BS", # Bunt Strike "번트헛스윙": "BS", "번트 파울": "BF", # Bunt Foul "번트파울": "BF", "볼": "B", "스트라이크": "T", "헛스윙": "S", "파울": "F", "타격": "H", } for key, value in mapping.items(): if key in text: return value return "" def classify_result_type(text: str) -> str: clean_text = text.replace(" ", "") if "낫아웃" in clean_text: return "strikeout_not_out" if "고의사구" in text: return "intentional_walk" if "볼넷" in text: return "walk" if "삼진" in text: return "strikeout" if any(k in text for k in ["몸에 맞는 볼", "몸에 맞는 공", "사구", "헤드샷"]): return "hit_by_pitch" if "홈런" in text: return "home_run" # 1/2/3루타 기본 감지 (runner event에 의해 확장될 수 있음) if "3루타" in text: return "triple" if "2루타" in text: return "double" if "번트안타" in text: return "bunt_hit" if "1루타" in text or "내야안타" in text: return "single" if "실책" in text and "출루" in text: return "reach_on_error" if "야수선택" in text: return "reach_on_fielder_choice" if "땅볼로 출루" in text or "땅볼출루" in text: return "reach_on_grounder" if "희생번트" in text: return "sacrifice_bunt" if "희생플라이" in text: return "sacrifice_fly" if "병살타" in text: return "double_play" if any(k in text for k in ["플라이 아웃", "땅볼 아웃", "인필드플라이 아웃", "라인드라이브 아웃", "직선타 아웃", "라인드라이브", "직선타"]): return "out" return "play" def parse_runner_event(text: str) -> dict[str, Any]: event_type = "runner_event" if "도루" in text: if "실패" in text: event_type = "steal_fail" else: event_type = "steal" elif "홈인" in text: event_type = "score" elif "포스아웃" in text: event_type = "force_out" elif "견제사" in text: event_type = "pickoff_out" elif "태그아웃" in text: event_type = "tag_out" elif "실책" in text: event_type = "error_advance" elif "폭투" in text: event_type = "wild_pitch_advance" elif "포일" in text: event_type = "passed_ball_advance" elif "진루" in text: event_type = "advance" from_base = None to_base = None for label, base in (("1루주자", 1), ("2루주자", 2), ("3루주자", 3), ("1루", 1), ("2루", 2), ("3루", 3)): if label in text and from_base is None: from_base = base for label, base in (("1루까지", 1), ("2루까지", 2), ("3루까지", 3)): if label in text: to_base = base if "홈인" in text: to_base = 4 runner_name = text.split(" : ", 1)[0].replace("1루주자 ", "").replace("2루주자 ", "").replace("3루주자 ", "").replace("대주자 ", "").strip() extra_advance = 0 if "주자의 재치로" in text and from_base is not None and to_base is not None: extra_advance = max(0, to_base - from_base) # KBO 매니저 사이트 버튼 매핑용 라벨 (명시적 기록) clean_text = text.replace(" ", "") if "실책으로" in clean_text: action_label = "수비 실책" elif "도루" in clean_text: action_label = "도루성공" if "실패" not in clean_text else "도루시도 아웃" elif "폭투" in clean_text: action_label = "폭투-진루성공" elif "포일" in clean_text: action_label = "포일-진루성공" elif "태그" in clean_text: action_label = "태그아웃" elif "포스" in clean_text: action_label = "포스아웃" elif "견제" in clean_text: action_label = "견제 아웃" elif any(k in clean_text for k in ["볼넷", "포볼", "고의사구", "몸에맞는", "사구"]): action_label = "볼넷 진루" else: action_label = "일반 진루" return { "type": event_type, "runner": runner_name, "fromBase": from_base, "toBase": to_base, "extra_advance": extra_advance, "text": text, "action_label": action_label } def normalize_review_result_token(token: str, review_item: str) -> str | None: token = token.strip() if not token: return None if review_item in {"홈런타구 페어 파울", "외야타구 페어 파울"}: if "페어" in token: return "페어" if "파울" in token: return "파울" elif review_item in {"포수/태그플레이 아웃/세이프", "야수의 포구"}: if "아웃" in token: return "아웃" if "세이프" in token: return "세이프" elif review_item == "헛스윙": # 반드시 "노스윙"을 먼저 체크해야 함 ("노스윙"에도 "스윙"이 포함되어 있으므로) if "불인정" in token or "노스윙" in token or "공포" in token: return "노스윙" if "스윙" in token or "인정" in token: return "스윙" else: if "불인정" in token: return "불인정" if "인정" in token: return "인정" return None def infer_review_item(detail_text: str) -> str: if "체크스윙" in detail_text or "스윙" in detail_text: return "헛스윙" if "홈런" in detail_text: return "홈런타구 페어 파울" if "페어" in detail_text or "파울" in detail_text: return "외야타구 페어 파울" if "태그" in detail_text or "견제" in detail_text or "도루" in detail_text or "아웃" in detail_text or "세이프" in detail_text or "타구 관련" in detail_text: return "포수/태그플레이 아웃/세이프" if "포구" in detail_text or "노바운드" in detail_text or "바운드" in detail_text: return "야수의 포구" if "몸에 맞" in detail_text: return "몸에 맞는 공" return "기타" def parse_review_event(text: str) -> dict[str, Any]: inning_match = re.search(r"(\d+)회(초|말)", text) request_team_match = re.search(r"([가-힣A-Za-z]+)요청\s*(?:비디오 판독|합의 판정)", text) detail_match = re.search(r"(?:비디오 판독|합의 판정):\s*(.+?)\s*([가-힣][가-힣\s]*)→([가-힣][가-힣\s]*)\s*$", text) detail_text = detail_match.group(1).strip() if detail_match else text review_item = infer_review_item(detail_text) before_result = normalize_review_result_token(detail_match.group(2), review_item) if detail_match else None after_result = normalize_review_result_token(detail_match.group(3), review_item) if detail_match else None timing = "before_pitch" if "초구 전" in text else "after_pitch" return { "type": "video_review", "text": text, "requestInningLabel": f"{inning_match.group(1)}{'초' if inning_match.group(2) == '초' else '말'}" if inning_match else None, "requestTeam": request_team_match.group(1) if request_team_match else None, "reviewItem": review_item, "beforeResult": before_result, "finalResult": after_result, "isSuccess": "성공" if before_result and after_result and before_result != after_result else "실패", "timing": timing, } def extract_change_actor(text: str) -> tuple[str | None, str | None, str]: lhs = text.split(" : ", 1)[0].strip() if "번타자 " in lhs: order_match = re.search(r"(\d+)번타자\s+(.+)$", lhs) if order_match: return "batter", order_match.group(1), order_match.group(2).strip() for role in ("대타", "대주자", "1루주자", "2루주자", "3루주자", "주자", "투수", "포수", "1루수", "2루수", "3루수", "유격수", "좌익수", "중견수", "우익수"): if lhs.startswith(role + " "): return role, None, lhs[len(role):].strip() return None, None, lhs def is_merged_pitcher_substitution(actor_role: str | None, in_role: str | None) -> bool: field_roles = {"포수", "1루수", "2루수", "3루수", "유격수", "좌익수", "중견수", "우익수"} return actor_role in field_roles and in_role == "투수" def parse_change_event(text: str) -> dict[str, Any]: event: dict[str, Any] = { "event_type": "change", "change_type": "position_change" if "수비위치 변경" in text else "substitution", "text": text, } actor_role, batter_order, actor_name = extract_change_actor(text) event["actor_role"] = actor_role event["actor_name"] = actor_name if batter_order: event["bat_order"] = int(batter_order) if "수비위치 변경" in text: to_position = text.split(" : ", 1)[1].split("(으)로", 1)[0].strip() event["player_name"] = actor_name event["to_position"] = to_position return event rhs = text.split(" : ", 1)[1].split("(으)로 교체", 1)[0].strip() in_role, _, in_name = extract_change_actor(rhs) event["out_player"] = actor_name event["in_player"] = in_name event["in_role"] = in_role if is_merged_pitcher_substitution(actor_role, in_role): event["change_type"] = "merged_pitcher_substitution" event["player_name"] = actor_name event["to_position"] = "지명타자" event["pitcher_in_player"] = in_name return event if in_role in {"투수", "포수", "1루수", "2루수", "3루수", "유격수", "좌익수", "중견수", "우익수", "대타", "대주자"}: event["to_position"] = in_role if in_role not in {"대타", "대주자"} else None return event def merge_runner_events(runner_events: list[dict[str, Any]]) -> list[dict[str, Any]]: merged: dict[str, dict[str, Any]] = {} for r in runner_events: name = r.get("runner") if not name: continue if name in merged: merged[name]["type"] = r.get("type", merged[name]["type"]) merged[name]["text"] += f" / {r.get('text', '')}" if r.get("toBase"): merged[name]["toBase"] = r["toBase"] if r.get("extra_advance"): merged[name]["extra_advance"] = r["extra_advance"] if "태그아웃" in r.get("text", "") or r.get("type") == "tag_out": merged[name]["type"] = "tag_out" else: merged[name] = r return list(merged.values()) def build_relay_events(relay: dict[str, Any]) -> list[dict[str, Any]]: """하나의 릴레이 블록을 분석하여, 투구 번호 리셋 등을 감지해 여러 개의 타석/교체 이벤트 리스트로 반환합니다.""" options = sorted(relay.get("textOptions", []), key=option_seqno) # 1. 세그먼트 분리 (pitchNum 1이 새로 나오면 타자가 바뀐 것) segments: list[list[dict[str, Any]]] = [] current_segment: list[dict[str, Any]] = [] for opt in options: opt_type = opt.get("type") # 투구가 1구인데 이미 현재 세그먼트에 투구가 들어있다면 새로운 타자 세그먼트 시작 if opt_type == 1 and opt.get("pitchNum") == 1: if any(o.get("type") == 1 for o in current_segment): segments.append(current_segment) current_segment = [] current_segment.append(opt) if current_segment: segments.append(current_segment) # 2. 각 세그먼트별로 이벤트 객체 생성 results = [] relay_batter_title = get_batter_title(relay, options) for i, seg_options in enumerate(segments): seg_changes = [] seg_event_texts = [] seg_pitches = [] seg_runner_events = [] seg_review_events = [] seg_extra_events = [] seg_result_text = None # 해당 세그먼트만의 타자 이름 찾기 seg_batter_name = None # 우선 type 8(타자 제목) 옵션이 있는지 확인 seg_batter_name = next((o.get("text", "").strip() for o in seg_options if o.get("type") == 8), None) for opt in seg_options: ot = opt.get("type") txt = opt.get("text", "").strip() if not txt or ot in SKIP_OPTION_TYPES: continue if txt in HIDDEN_EVENT_TEXTS: continue if any(k in txt for k in CHANGE_KEYWORDS): seg_changes.append(parse_change_event(txt)) continue if ot == 1: seg_event_texts.append(format_pitch_text(opt)) seg_pitches.append({ "pitchNo": opt.get("pitchNum"), "pitchResult": classify_pitch_result(txt, opt.get("pitchResult")), "pitchResultText": txt.replace(f"{opt.get('pitchNum')}구 ", "", 1), "speedKmh": int(opt["speed"]) if opt.get("speed") not in (None, "") else None, "pitchType": opt.get("stuff"), "runnerEvents": [], }) continue if ot == 14: if seg_pitches: seg_pitches[-1]["runnerEvents"].append(parse_runner_event(txt)) else: seg_runner_events.append(parse_runner_event(txt)) continue if ot == 24: seg_runner_events.append(parse_runner_event(txt)) continue seg_event_texts.append(txt) if "비디오 판독" in txt or "합의 판정" in txt: seg_review_events.append(parse_review_event(txt)) elif "체크스윙" in txt: seg_extra_events.append({"type": "appeal_or_judgement", "text": txt}) elif any(r in txt for r in ["1루주자", "2루주자", "3루주자", "대주자", "도루", "홈인", "포스아웃"]) or ("진루" in txt and "출루" not in txt): seg_runner_events.append(parse_runner_event(txt)) else: seg_result_text = txt # 결과 텍스트에서 타자 이름 추론 (예: "고종욱 : 좌익수 플라이 아웃") if " : " in txt and seg_batter_name is None: name_part = txt.split(" : ", 1)[0].strip() if name_part and len(name_part) < 10: # 너무 긴 문장은 제외 seg_batter_name = name_part # 최종 타자 명칭 결정 if not seg_batter_name: # 첫 세그먼트면 릴레이 전체 타이틀 사용, 아니면 직전 대타 정보 등에서 유추 (일단 타이틀로 보정) seg_batter_name = relay_batter_title if i == 0 else "" # 주자 이벤트 병합 for p in seg_pitches: p["runnerEvents"] = merge_runner_events(p["runnerEvents"]) seg_merged_runner_events = merge_runner_events(seg_runner_events) # 타자 결과 객체 res_obj = None if seg_result_text: base_type = classify_result_type(seg_result_text) res_obj = {"type": base_type, "text": seg_result_text} # 타자 본인의 이후 주루 정보 확인 (주루아웃/실책진루 등) b_name = seg_batter_name.split()[-1] if seg_batter_name else "" final_runners = [] for r in seg_merged_runner_events: if b_name and r.get("runner") == b_name: # 안타성 타구인 경우 주루 결과에 따라 타입 확장 if base_type in {"single", "double", "triple"}: r_type = r.get("type", "") # 1) 주루 아웃인 경우 if r_type in {"tag_out", "force_out", "steal_fail", "pickoff_out"}: res_obj["type"] = f"{base_type}_runner_out" # 2) 실책으로 인한 추가 진루인 경우 elif r_type == "error_advance": res_obj["type"] = f"{base_type}_error_advance" if r.get("toBase"): res_obj["toBase"] = r["toBase"] if r.get("extra_advance"): res_obj["extra_advance"] = r["extra_advance"] else: final_runners.append(r) seg_merged_runner_events = final_runners # 세그먼트 결과 조립 if seg_changes: results.extend(seg_changes) if seg_event_texts: full_txt = f"{seg_batter_name} : " + ", ".join(seg_event_texts) if seg_batter_name else ", ".join(seg_event_texts) results.append({ "event_type": "at_bat", "batter": seg_batter_name, "rawText": full_txt, "pitches": seg_pitches, "result": res_obj, "runnerEvents": seg_merged_runner_events, "reviewEvents": seg_review_events, "extraEvents": seg_extra_events, "changes": [] # 세그먼트 내부에서는 별도로 다룸 }) return results def build_half_inning(inning: int, home_or_away: int, relays: list[dict[str, Any]]) -> dict[str, Any]: title = get_half_inning_title(relays, inning, home_or_away) raw_events: list[dict[str, Any]] = [] for relay in sorted(relays, key=relay_seqno): new_events = build_relay_events(relay) raw_events.extend(new_events) merged_events: list[dict[str, Any]] = [] for event in raw_events: if not merged_events or event.get("event_type") != "at_bat": merged_events.append(event) continue prev = merged_events[-1] if prev.get("event_type") != "at_bat": merged_events.append(event) continue # 병합 조건: 투구 번호가 1보다 크고 타자 이름이 같거나 유사한 경우 current_pitches = event.get("pitches") or [] first_pitch_no = current_pitches[0].get("pitchNo", 0) if current_pitches else 0 is_same_batter = (prev.get("batter") == event.get("batter")) if first_pitch_no > 1 or is_same_batter: # 병합 수행 prev["pitches"].extend(current_pitches) if event.get("result"): prev["result"] = event["result"] if event.get("rawText"): # "이름 : " 중복 제거하며 병합 current_txt = event["rawText"] if " : " in current_txt: current_txt = current_txt.split(" : ", 1)[1] prev["rawText"] += " / " + current_txt prev["runnerEvents"].extend(event.get("runnerEvents") or []) prev["reviewEvents"].extend(event.get("reviewEvents") or []) prev["extraEvents"].extend(event.get("extraEvents") or []) continue merged_events.append(event) return { "inning": inning, "half": "top" if home_or_away == 0 else "bottom", "title": title, "events": merged_events, } def parse_inning_value(val: Any, default: float) -> float: if val is None: return default s = str(val).upper().strip() if not s: return default # 1T, 1B 등 초/말 구분 처리 m = re.match(r"^(\d+)([TB]?)$", s) if not m: try: return float(s) except: return default num = int(m.group(1)) suffix = m.group(2) if suffix == "T": return float(num) # .0 if suffix == "B": return num + 0.5 return float(num) def collect_inning_data(client: httpx.Client, game_id: str, start_inning_val: str | None = None, end_inning_val: str | None = None) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: innings: list[dict[str, Any]] = [] raw_relays: list[dict[str, Any]] = [] start_score = parse_inning_value(start_inning_val, 0.0) end_score = parse_inning_value(end_inning_val, 99.0) for inning in range(1, MAX_INNING + 1): url = f"https://api-gw.sports.naver.com/schedule/games/{game_id}/relay?inning={inning}" try: payload = request_json(client, url) except Exception: break relays = payload.get("result", {}).get("textRelayData", {}).get("textRelays", []) if not relays: break grouped: dict[int, list[dict[str, Any]]] = defaultdict(list) for relay in relays: grouped[int(relay.get("homeOrAway", -1))].append(relay) raw_relays.append(relay) for home_or_away in (0, 1): half_relays = grouped.get(home_or_away, []) if not half_relays: continue # 현재 이닝의 수치화 (0: 초=.0, 1: 말=.5) current_score = inning + (0.5 if home_or_away == 1 else 0.0) if current_score < start_score or current_score > end_score: continue innings.append(build_half_inning(inning, home_or_away, half_relays)) return innings, raw_relays def extract_pitching_summary(record_data: dict[str, Any]) -> dict[str, list[str]]: summary = {"승리투수": [], "패전투수": [], "홀드": [], "세이브": []} for pitcher in record_data.get("pitchingResult", []): label = RESULT_LABELS.get(pitcher.get("wls")) if not label: continue summary[label].append(pitcher["name"]) return summary def collect_score_timeline(raw_relays: list[dict[str, Any]]) -> list[dict[str, Any]]: timeline: list[dict[str, Any]] = [] for relay in raw_relays: for option in relay.get("textOptions", []): state = option.get("currentGameState") or {} if not state: continue timeline.append( { "seqno": option.get("seqno"), "home_score": int(state.get("homeScore", 0)), "away_score": int(state.get("awayScore", 0)), } ) timeline.sort(key=lambda item: item["seqno"]) return timeline def collect_blown_saves(raw_relays: list[dict[str, Any]], away_name: str, home_name: str) -> list[str]: timeline = collect_score_timeline(raw_relays) blown_save_pitchers: list[str] = [] pitcher_entries: list[dict[str, Any]] = [] for relay in raw_relays: inning = int(relay.get("inn", 0) or 0) if inning < 7: continue batting_side = int(relay.get("homeOrAway", -1)) pitcher_team = "home" if batting_side == 0 else "away" pitcher_team_name = home_name if pitcher_team == "home" else away_name for option in relay.get("textOptions", []): if option.get("type") != 2: continue player_change = option.get("playerChange") or {} in_player = player_change.get("inPlayer") or {} if in_player.get("playerPos") != "투수": continue state = option.get("currentGameState") or {} pitcher_entries.append( { "name": in_player.get("playerName"), "team": pitcher_team, "team_name": pitcher_team_name, "entry_seqno": option.get("seqno"), "home_score": int(state.get("homeScore", 0)), "away_score": int(state.get("awayScore", 0)), } ) for entry in pitcher_entries: team_score = entry["home_score"] if entry["team"] == "home" else entry["away_score"] opp_score = entry["away_score"] if entry["team"] == "home" else entry["home_score"] if team_score <= opp_score: continue for state in timeline: if state["seqno"] <= entry["entry_seqno"]: continue current_team_score = state["home_score"] if entry["team"] == "home" else state["away_score"] current_opp_score = state["away_score"] if entry["team"] == "home" else state["home_score"] if current_team_score <= current_opp_score: blown_save_pitchers.append(entry["name"]) break return sorted(set(blown_save_pitchers)) def build_pitcher_section(record_data: dict[str, Any], raw_relays: list[dict[str, Any]], away_name: str, home_name: str) -> dict[str, list[str]]: summary = extract_pitching_summary(record_data) summary["블론세이브"] = collect_blown_saves(raw_relays, away_name, home_name) return summary def render_lineup_text(lineup_summary: dict[str, Any]) -> list[str]: lines = ["[라인업]"] for team_key in ("away_team", "home_team"): team = lineup_summary[team_key] lines.append(f"[{team['team_name']}]") if team["starter_pitcher"]: pitcher = team["starter_pitcher"] lines.append(f"선발투수: {pitcher['name']} (#{pitcher['number']}, {pitcher['position']})") for player in team["players"]: lines.append(f"{player['bat_order']}번: {player['name']} (#{player['number']}, {player['position']})") lines.append("") if lines[-1] == "": lines.pop() return lines def render_innings_text(innings: list[dict[str, Any]]) -> list[str]: lines = ["[이닝별 타석 로그]"] for half_inning in innings: lines.append(f"[{half_inning['title']}]") for event in half_inning["events"]: if event["event_type"] == "at_bat": lines.append(event["rawText"]) else: lines.append(event["text"]) lines.append("") if lines[-1] == "": lines.pop() return lines def render_pitcher_text(pitcher_section: dict[str, list[str]]) -> list[str]: lines = ["[투수 결과]"] for label in ("승리투수", "패전투수", "홀드", "세이브", "블론세이브"): names = pitcher_section.get(label, []) lines.append(f"{label}: {', '.join(names) if names else '-'}") return lines def render_text(report: dict[str, Any]) -> str: sections = [ render_lineup_text(report["lineups"]), render_innings_text(report["game_contents"]), render_pitcher_text(report["pitching_summary"]), ] return "\n\n".join("\n".join(section) for section in sections) def save_outputs(report: dict[str, Any], output_dir: Path, output_json: Path | None = None) -> Path: output_dir.mkdir(parents=True, exist_ok=True) game_id = report["game_id"] json_path = output_json or (output_dir / f"{game_id}_report.json") json_path.parent.mkdir(parents=True, exist_ok=True) json_path.write_text(json.dumps(report, ensure_ascii=False, indent=2), encoding="utf-8") return json_path def filter_report(report: dict[str, Any], inning: str | None = None, lineup_only: bool = False, start_inning: str | None = None, end_inning: str | None = None) -> dict[str, Any]: filtered = json.loads(json.dumps(report, ensure_ascii=False)) if lineup_only: filtered["game_contents"] = [] filtered["pitching_summary"] = { "승리투수": [], "패전투수": [], "홀드": [], "세이브": [], "블론세이브": [], } return filtered start_v = parse_inning_value(start_inning, 0.0) end_v = parse_inning_value(end_inning, 99.0) if inning is not None: iv = parse_inning_value(inning, 0.0) start_v = iv end_v = iv + 0.5 filtered["game_contents"] = [ half_inning for half_inning in filtered.get("game_contents", []) if start_v <= (float(half_inning.get("inning") or 0) + (0.5 if half_inning.get("half") == "bottom" else 0.0)) <= end_v ] return filtered def build_report(game_id: str, start_inning: str | None = None, end_inning: str | None = None) -> dict[str, Any]: # game_id 정제: 한글, 공백, 하이픈 등 제거하여 순수 API 규격만 남김 game_id = "".join(re.findall(r"[A-Za-z0-9]", game_id)) with httpx.Client(headers=HEADERS, timeout=20.0) as client: relay_payload = request_json(client, f"https://api-gw.sports.naver.com/schedule/games/{game_id}/relay") record_payload = request_json(client, f"https://api-gw.sports.naver.com/schedule/games/{game_id}/record?fields=all") game_payload = request_json(client, f"https://api-gw.sports.naver.com/schedule/games/{game_id}") preview_payload = request_json(client, f"https://api-gw.sports.naver.com/schedule/games/{game_id}/preview") relay_data = relay_payload["result"]["textRelayData"] record_data = record_payload["result"]["recordData"] game_info = game_payload["result"]["game"] preview_data = preview_payload["result"].get("previewData") or {} review_meta = fetch_kbo_review_meta(client, game_id, game_info) lineup_summary = build_lineup_summary(game_id, game_info, relay_data, preview_data) innings, raw_relays = collect_inning_data(client, game_id, start_inning_val=start_inning, end_inning_val=end_inning) pitcher_section = build_pitcher_section( record_data, raw_relays, lineup_summary["away_team"]["team_name"], lineup_summary["home_team"]["team_name"], ) return { "game_id": game_id, "game_info": build_game_info(game_info, record_data, review_meta), "lineups": lineup_summary, "game_contents": innings, "pitching_summary": pitcher_section, } def main() -> None: args = parse_args() # game_id 정제 (알파벳+숫자만 추출) cleaned_id = "".join(re.findall(r"[A-Za-z0-9]", args.game_id)) report = build_report(cleaned_id, start_inning=args.start_inning, end_inning=args.end_inning) filtered_report = filter_report( report, inning=None, lineup_only=args.lineup_only, start_inning=args.start_inning, end_inning=args.end_inning ) save_outputs( filtered_report, Path(args.output_dir), output_json=Path(args.output_json) if args.output_json else None, ) if __name__ == "__main__": main()