# /var/www/html/bot/scripts/kepco_history_fill.py
from __future__ import annotations
import re
from typing import Any, Dict, List, Optional

# ---------- 내부 유틸 ----------
def _normalize(s: Optional[str]) -> str:
    if not s: return ""
    s = s.replace("\r", "")
    s = re.sub(r"[ \t]+", " ", s)
    s = re.sub(r"[ ]*\n[ ]*", "\n", s)
    return s.strip()

def _pick(pat: str, s: str) -> Optional[str]:
    m = re.search(pat, s)
    return m.group(1).strip() if m else None

def _extract_department_from_header(merged_text: str) -> Optional[str]:
    m = re.search(r"임원\s*현황\s*\n([^\n]+)\n임원\s*현황", merged_text)
    return m.group(1).strip() if m else None

def _split_position_sections(merged_text: str) -> List[str]:
    return [sec for sec in re.split(r"\n(?=직위\s)", merged_text) if sec.startswith("직위")]

def _parse_start_in_section(section_text: str) -> Optional[str]:
    m = re.search(r"임기\s*\(시작일\)\s*([^\n(]+?)\s*\(종료일\)\s*([^\n]+?)(?:\n|$)", section_text)
    if m: return re.sub(r"\s+", " ", m.group(1).strip())
    m = re.search(r"임기\s*[:：]?\s*([^\n~\-–—]+?)\s*[~\-–—]\s*[^\n]+", section_text)
    if m: return re.sub(r"\s+", " ", m.group(1).strip())
    m = re.search(r"임기\s*(?:시작(?:일)?)?\s*[:：]?\s*([^\n]+)", section_text)
    if m: return re.sub(r"\s+", " ", m.group(1).strip())
    return None

def _find_start_by_name_and_pos(merged_text: str, name: str, pos_hint: Optional[str]) -> Optional[str]:
    sections = _split_position_sections(merged_text)
    name_pat = re.compile(rf"성명\s*[:：]?\s*{re.escape(name)}(\s|$)")
    pos_hint_norm = _normalize(pos_hint or "")
    for sec in sections:
        if not name_pat.search(sec):
            continue
        sec_pos = _pick(r"직위\s*([^\n]+?)\s*성명", sec) or ""
        if pos_hint_norm and _normalize(sec_pos) and pos_hint_norm not in _normalize(sec_pos):
            continue
        start = _parse_start_in_section(sec)
        if start:
            return start
    return None

def _is_blank_date(s: Optional[str]) -> bool:
    if not s: return True
    v = re.sub(r"\s+", "", s).strip()
    return v in {"-", "–", "—"} or re.search(r"(미정|무기한)", v) is not None

VACANCY_PAT = re.compile(r"^\s*\(?공석\)?\s*$")
def _is_vacant_name(name: Optional[str]) -> bool:
    if not name: return False
    s = str(name).strip()
    if VACANCY_PAT.match(s): return True
    if s in {"결원", "-"}:  return True
    return False

# ---------- 공개 함수: 현원 파싱 ----------
def parse_people(merged_text: str, department_hint: Optional[str] = None) -> List[Dict[str, Any]]:
    """
    PDF에서 병합된 텍스트(merged_text)를 입력받아, 사람 리스트를 추출한다.
    반환: [{department, name, task, position, gender, start, end, career(list[str])}, ...]
    """
    department = department_hint or _extract_department_from_header(merged_text)
    start_idx = merged_text.find("직위 ")
    body = merged_text[start_idx:] if start_idx >= 0 else merged_text
    sections = [sec for sec in re.split(r"\n(?=직위\s)", body) if re.match(r"^직위\s", sec)]
    out: List[Dict[str, Any]] = []

    for raw in sections:
        sec = _normalize(raw)
        # “변경 전/후” 테이블 헤더/본문은 스킵
        if sec.startswith("직위 변경 전") or ("변경사유" in sec and not re.search(r"(임기|직책|성별)\s", sec)):
            continue

        position = _pick(r"직위\s*([^\n]+?)\s*성명", sec)
        name     = _pick(r"성명\s*([^\n]+?)(?=\s*(?:직책|성별|임기|\n))", sec)
        task     = _pick(r"직책\s*([^\n]+?)(?=\s*(?:성별|임기|\n))", sec)
        gender   = _pick(r"성별\s*([남여])", sec)

        m = re.search(r"임기\s*\(시작일\)\s*([^\n(]+?)\s*\(종료일\)\s*([^\n]+?)(?:\n|$)", sec)
        start_raw = m.group(1).strip() if m else None
        end_raw   = m.group(2).strip() if m else None
        start = None if _is_blank_date(start_raw) else re.sub(r"\s+", " ", start_raw or "")
        end   = None if _is_blank_date(end_raw)   else re.sub(r"\s+", " ", end_raw   or "")

        career_block = _pick(
            r"주요경력\s*([\s\S]*?)(?=\n\s*(?:선임절차|선임절차규정|당연직여부|직위\s|기준일|제출일|기관 공시 담당자|$))",
            sec
        )
        career: List[str] = []
        if career_block:
            career = [ln.strip() for ln in re.split(r"\n+", career_block) if ln.strip()]

        if (position or "").replace(" ", "") in {"변경전","직위"}:
            continue
        if (name or "").replace(" ", "") in {"변경후","성명"}:
            continue
        if not name:
            continue

        out.append({
            "department": department,
            "name": name,
            "task": task,
            "position": position,
            "gender": gender,
            "start": start,
            "end": end,
            "career": career,  # jsonb
        })
    return out
