86 lines
3.1 KiB
Python
86 lines
3.1 KiB
Python
"""
|
|
core/change_parser.py — 선수 교체 이벤트 파싱
|
|
|
|
교체 텍스트에서 선수명, 포지션, 교체 유형 등을 추출합니다.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import re
|
|
from typing import Any
|
|
|
|
from core.config_loader import position_to_defense_no
|
|
|
|
|
|
def extract_change_actor(text: str) -> tuple[str | None, int | None, str]:
|
|
"""교체 텍스트의 왼쪽(actor)에서 역할, 타순, 이름 추출
|
|
|
|
'5번타자 문보경' → ('batter', 5, '문보경')
|
|
'투수 임찬규' → ('투수', None, '임찬규')
|
|
"""
|
|
lhs = (text or "").split(" : ", 1)[0].strip()
|
|
|
|
batter_match = re.search(r"(\d+)번타자\s+(.+)$", lhs)
|
|
if batter_match:
|
|
return "batter", int(batter_match.group(1)), batter_match.group(2).strip()
|
|
|
|
roles = (
|
|
"대타", "대주자",
|
|
"1루주자", "2루주자", "3루주자", "주자",
|
|
"투수", "포수", "1루수", "2루수", "3루수",
|
|
"유격수", "좌익수", "중견수", "우익수",
|
|
)
|
|
for role in roles:
|
|
if lhs.startswith(role + " "):
|
|
return role, None, lhs[len(role):].strip()
|
|
|
|
return None, None, lhs
|
|
|
|
|
|
def is_merged_pitcher_substitution(actor_role: str | None, in_role: str | None) -> bool:
|
|
"""야수→투수 교체인지 확인 (투수가 DH로 전환되는 병합 교체)"""
|
|
field_roles = {"포수", "1루수", "2루수", "3루수", "유격수", "좌익수", "중견수", "우익수"}
|
|
return actor_role in field_roles and in_role == "투수"
|
|
|
|
|
|
def normalize_change_event(change_event: dict[str, Any]) -> dict[str, Any]:
|
|
"""교체 이벤트를 정규화
|
|
|
|
텍스트 파싱 → actor_name, out_player, in_player, change_type 등 추출
|
|
"""
|
|
if change_event.get("actor_name") or change_event.get("player_name"):
|
|
return change_event
|
|
|
|
text = change_event.get("text") or ""
|
|
normalized = dict(change_event)
|
|
normalized["change_type"] = "position_change" if "수비위치 변경" in text else "substitution"
|
|
actor_role, bat_order, actor_name = extract_change_actor(text)
|
|
normalized["actor_role"] = actor_role
|
|
normalized["actor_name"] = actor_name
|
|
if bat_order is not None:
|
|
normalized["bat_order"] = bat_order
|
|
|
|
if normalized["change_type"] == "position_change":
|
|
rhs = text.split(" : ", 1)[1] if " : " in text else ""
|
|
normalized["player_name"] = actor_name
|
|
normalized["to_position"] = rhs.split("(으)로", 1)[0].strip()
|
|
return normalized
|
|
|
|
rhs = text.split(" : ", 1)[1] if " : " in text else ""
|
|
rhs = rhs.split("(으)로 교체", 1)[0].strip()
|
|
in_role, _, in_name = extract_change_actor(rhs)
|
|
normalized["out_player"] = actor_name
|
|
normalized["in_player"] = in_name
|
|
normalized["in_role"] = in_role
|
|
|
|
pos_defense = position_to_defense_no()
|
|
if is_merged_pitcher_substitution(actor_role, in_role):
|
|
normalized["change_type"] = "merged_pitcher_substitution"
|
|
normalized["player_name"] = actor_name
|
|
normalized["to_position"] = "지명타자"
|
|
normalized["pitcher_in_player"] = in_name
|
|
return normalized
|
|
|
|
if in_role in pos_defense:
|
|
normalized["to_position"] = in_role
|
|
return normalized
|