import time
from typing import Optional, Dict, Any, List
import requests

from app.services.supabase_service import (
    upsert_kepco_id,
)

# PDF 파서(다운로드/추출) 가져오기
from app.crawlers.kepco_org_parser import (
    _download_pdf as download_pdf,
    _extract_org_rows as extract_org_rows,
)

BASE = "https://alio.go.kr"
PRIME_URL = f"{BASE}/item/itemOrganList.do"          # HTML (세션/쿠키 priming)
ORG_API   = f"{BASE}/item/itemOrganListSusi.json"    # 기관메타 (apbaType 등)
LIST_API  = f"{BASE}/item/itemReportListSusi.json"   # 보고서 목록 (수시/임원현황)
PDF_URL   = f"{BASE}/download/pdf.json"              # 실제로는 PDF 바이너리 반환

UA = "GovBot/1.0 (+https://work.jjickjjicks.com)"
DEFAULT_APBA_IDS = [
    "C0247","C0042","C0043","C0066","C0082","C0220","C0259","C0248","C0306","C0305","C0236"
]

def _sess() -> requests.Session:
    s = requests.Session()
    s.headers.update({
        "User-Agent": UA,
        "Accept": "*/*",
        "Referer": "https://alio.go.kr/",
        "Accept-Language": "ko-KR,ko;q=0.9,en;q=0.8",
    })
    return s

def _prime_session(s: requests.Session, apba_id: str, report_root: str = "20305", page_no: int = 1) -> None:
    params = {"apbaId": apba_id, "reportFormRootNo": report_root, "pageNo": page_no}
    r = s.get(PRIME_URL, params=params, timeout=20)
    print(f"[ALIO][PRIME][GET] {r.url} -> {r.status_code}, len={len(r.text)}")
    r.raise_for_status()

def _post_json(s: requests.Session, url: str, payload: Dict[str, Any]) -> Dict[str, Any] | None:
    r = s.post(url, json=payload, headers={"Content-Type": "application/json"}, timeout=20)
    print(f"[ALIO][POST] {url} payload_keys={list(payload.keys())}")
    print(f"[ALIO][POST] {url} -> {r.status_code}")
    r.raise_for_status()
    try:
        data = r.json()
    except Exception as e:
        print(f"[ALIO][POST][JSONERR] {url} err: {e}")
        return None
    return data

def _first_or_none(v):
    return v[0] if isinstance(v, list) and v else v

def fetch_latest_for_org(apba_id: str, *, throttle: float = 0.5) -> Optional[Dict[str, Any]]:
    """
    1) HTML visit (prime)
    2) ORG_API: apbaType, apbaNa 등
    3) LIST_API: 보고서 목록(임원현황). 최상단 1건만 추출
    """
    s = _sess()
    try:
        _prime_session(s, apba_id)
        time.sleep(throttle)

        # 2) 기관 메타
        org_payload = {
            "apbaType": [],
            "jidtDptm": [],
            "area": [],
            "apbaId": apba_id,
            "reportFormRootNo": "20305",
        }
        org = _post_json(s, ORG_API, org_payload)
        if not org or org.get("status") == "error":
            print(f"[ALIO][ORG][MISS] apbaId={apba_id}")
            return None

        # organList / apbaList 가 클 수 있으므로 organInfo 추정
        organ_list = (org.get("data") or {}).get("organList") or org.get("organList") or []
        apba_list  = (org.get("data") or {}).get("apbaList")  or org.get("apbaList")  or []
        organ_info = {"apbaId": apba_id}
        cand = [x for x in (organ_list or apba_list) if str(x.get("apbaId")) == apba_id]
        if cand:
            organ_info["apbaNa"]   = _first_or_none(cand).get("apbaNa")
            organ_info["apbaType"] = _first_or_none(cand).get("apbaType")
        else:
            organ_info.update((org.get("data") or {}).get("organInfo") or org.get("organInfo") or {})

        apba_type = (organ_info or {}).get("apbaType")
        apba_na   = (organ_info or {}).get("apbaNa")
        print(f"[ALIO][ORG] apbaId={apba_id} -> apbaType={apba_type}, apbaNa={apba_na}")

        # 3) 보고서 목록(최상단 1건)
        list_payload = {
            "pageNo": 1,
            "apbaId": apba_id,
            "apbaType": apba_type,
            "reportFormRootNo": "20305",
            "search_word": "",
            "search_flag": "title",
            "bid_type": "",
            "enfc_istt": "",
        }
        lst = _post_json(s, LIST_API, list_payload)
        if not lst or (lst.get("status") == "error"):
            return None

        data = lst.get("data") or lst
        results = data.get("result") or []
        if not results:
            return None

        top = results[0]
        disclosure_no = str(top.get("disclosureNo") or "").strip()
        idate = str(top.get("idate") or "").replace(".", "-")[:10] if top.get("idate") else None
        title = top.get("title") or "임원현황"

        print(f"[ALIO][LIST][TOP] apbaId={apba_id} -> disclosureNo={disclosure_no}, idate={idate}, title='{title}'")

        if not disclosure_no:
            return None

        return {
            "apbaId": apba_id,
            "disclosureNo": disclosure_no,
            "idate": idate,
            "title": title,
            "organInfo": organ_info,
        }

    finally:
        try:
            s.close()
        except Exception:
            pass

def run_ingest(apba_ids: Optional[List[str]] = None, throttle: float = 0.5) -> int:
    """
    최신 임원현황 1건씩을 kepco_id 에 저장 (멱등).
    kepco_id 스키마(NOT NULL 포함)에 맞춰 항상 값 채워넣음.
    """
    if apba_ids is None:
        apba_ids = DEFAULT_APBA_IDS

    new_count = 0
    for apba_id in apba_ids:
        info = fetch_latest_for_org(apba_id, throttle=throttle)
        if not info:
            print(f"[KEPCO] {apba_id}: no latest disclosure")
            continue

        organ_name = ((info.get("organInfo") or {}).get("apbaNa") or "").strip() or apba_id
        disclosure_no = info.get("disclosureNo")
        posted_at = info.get("idate")  # 'YYYY-MM-DD'
        title = info.get("title") or "임원현황"
        pdf_url = f"{PDF_URL}?disclosureNo={disclosure_no}"

        compound_id = f"{disclosure_no}-{organ_name}"

        # 멱등 upsert (on_conflict="department,disclosure_no")
        upsert_kepco_id(
            compound_id=compound_id,
            department=organ_name,
            disclosure_no=disclosure_no,
            title=title,
            tag=None,
            posted_at=posted_at,
            pdf_url=pdf_url,
        )

        print(f"[KEPCO] UPSERT ok: id={compound_id} dept={organ_name} disc={disclosure_no}")
        new_count += 1
        time.sleep(throttle)

    return new_count

# -------------------- 미리보기(저장 없이 콘솔 출력) --------------------

def preview_latest_officers(apba_id: str, *, throttle: float = 0.5) -> int:
    """
    해당 기관(apba_id)의 최신 '임원현황' PDF를 내려받아 파싱한 뒤,
    '직위/성명/직책/성별/임기시작일/임기종료일'만 콘솔에 예쁘게 출력 (DB 저장 안 함).
    """
    info = fetch_latest_for_org(apba_id, throttle=throttle)
    if not info:
        print(f"[PREVIEW] {apba_id}: latest disclosure not found")
        return 0

    dept = ((info.get("organInfo") or {}).get("apbaNa") or "").strip() or apba_id
    disc = info.get("disclosureNo")
    idate = info.get("idate")
    url = f"{PDF_URL}?disclosureNo={disc}"

    print(f"\n기관: {dept}  공시:{disc}  등록일:{idate}")
    print(f"PDF: {url}")

    try:
        pdf_bytes = download_pdf(url, timeout=30.0)
    except Exception as e:
        print(f"[PREVIEW][PDF][ERR] {e}")
        return 0

    try:
        rows = extract_org_rows(pdf_bytes)
    except Exception as e:
        print(f"[PREVIEW][PARSE][ERR] {e}")
        return 0

    print(f"파싱 행 수: {len(rows)}")
    print("-" * 80)
    for i, r in enumerate(rows, 1):
        print(f"[{i}]")
        print(f"직위        : {r.get('position','')}")
        print(f"성명        : {r.get('name','')}")
        print(f"직책        : {r.get('task','')}")
        print(f"성별        : {r.get('gender','')}")
        print(f"임기시작일  : {r.get('start','')}")
        print(f"임기종료일  : {r.get('end','')}")
        print()

    return len(rows)

__all__ = [
    "fetch_latest_for_org",
    "run_ingest",
    "preview_latest_officers",
    "DEFAULT_APBA_IDS",
]
