import os
import logging
import re
from typing import List, Dict
import requests
from bs4 import BeautifulSoup

from app.services.supabase_service import get_client

LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
logging.basicConfig(level=LOG_LEVEL, format="[%(levelname)s] %(message)s")
logger = logging.getLogger("moef_org_sync")

TABLE_NAME = "moef_org"
LIST_URL = ("https://www.moef.go.kr/mi/orgnzt/org.do"
            "?bbsId=MOSFBBS_000000000097&menuNo=9040100")
SEL_LINKS = 'a[href*="orgId="]'
HEADERS = {"User-Agent": "GovBot/1.0 (+https://work.jjickjjicks.com)"}

def _clean(s: str | None) -> str:
    if not s:
        return ""
    # NBSP → space 정규화
    return " ".join(s.replace("\u00A0", " ").split())

def wipe_moef_org() -> None:
    sb = get_client()
    try:
        sb.table(TABLE_NAME).delete().neq("name", "__never__").execute()
        try:
            sb.table(TABLE_NAME).delete().is_("name", "null").execute()
        except Exception:
            pass
        logger.info("%s wiped.", TABLE_NAME)
    except Exception as e:
        logger.warning("wipe %s failed (continuing): %s", TABLE_NAME, e)

def fetch_org_links() -> List[Dict]:
    r = requests.get(LIST_URL, headers=HEADERS, timeout=30)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")
    seen = set()
    out: List[Dict] = []
    for a in soup.select(SEL_LINKS):
        href = a.get("href", "").strip()
        if not href or "orgId=" not in href:
            continue
        title = a.get("title", "")
        link = href if href.startswith("/") else ("/" + href) if not href.startswith("http") else href
        if link in seen:
            continue
        seen.add(link)
        out.append({"link": link, "title": title})
    logger.info("collected %d org links", len(out))
    return out

# (핵심) name에서 position만 제거 - 다양한 꼬리 패턴 대응
def strip_position_from_name(name_raw: str, position_raw: str) -> str:
    n = _clean(name_raw)
    p = _clean(position_raw)
    if not n or not p:
        return n

    original = n

    # 직위의 "핵심 토큰" (괄호 이전/공백 이전/구분자 이전 첫 덩어리)
    base = re.split(r"[ \t(\[/·\-]", p)[0].strip()
    candidates = [c for c in {p, base} if c]  # p와 base 모두 시도

    for cand in candidates:
        # 1) 정확한 접미사 제거 (공백 유무 모두)
        for suf in (cand, " " + cand):
            if n.endswith(suf):
                n = n[: -len(suf)].rstrip(" -·/–—")
                n = _clean(n)

        # 2) 괄호형 접미사 제거: "(사무관)" 또는 "사무관(…)" 모두
        #   - ... ( cand )
        n = re.sub(rf"\s*\(\s*{re.escape(cand)}\s*\)\s*$", "", n).rstrip(" -·/–—")
        #   - ... cand(...)
        n = re.sub(rf"\s*{re.escape(cand)}\s*\([^)]+\)\s*$", "", n).rstrip(" -·/–—")

        # 3) 공백 없이 붙은 경우: "홍길동사무관"
        if n.endswith(cand):
            n = n[: -len(cand)].rstrip(" -·/–—")
            n = _clean(n)

    n = _clean(n)
    return n if n else original  # 비어버리면 안전하게 원본 유지

def parse_org_detail(link: str) -> List[Dict]:
    url = link if link.startswith("http") else "https://www.moef.go.kr" + link
    r = requests.get(url, headers=HEADERS, timeout=30)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")

    rows: List[Dict] = []
    for tr in soup.select("table tr"):
        if tr.find("th"):
            continue
        tds = tr.find_all("td")
        if len(tds) < 5:
            continue

        department     = _clean(tds[0].get_text(strip=True))
        name_raw       = _clean(tds[1].get_text(strip=True))
        position_raw   = _clean(tds[2].get_text(strip=True))
        phone          = _clean(tds[3].get_text(strip=True))
        responsibility = _clean(tds[4].get_text(strip=True))

        tel_a = tds[3].select_one('a[href^="tel:"]')
        if tel_a and tel_a.has_attr("href"):
            phone = tel_a["href"].replace("tel:", "").strip()

        # ✅ name만 정리 (position은 그대로 둠)
        name = strip_position_from_name(name_raw, position_raw)
        position = position_raw

        if not (name or position or department or responsibility or phone):
            continue

        rows.append({
            "department": department,
            "name": name,
            "position": position,
            "phone": phone,
            "task": responsibility
        })
    return rows

def bulk_insert(rows: List[Dict], chunk: int = 500) -> int:
    sb = get_client()
    total = 0
    for i in range(0, len(rows), chunk):
        part = rows[i:i + chunk]
        if not part:
            continue
        sb.table(TABLE_NAME).insert(part).execute()
        total += len(part)
    return total

def run_once():
    wipe_moef_org()
    links = fetch_org_links()
    if not links:
        logger.info("no org links found.")
        return {"source": "MOEF_ORG", "inserted": 0}
    all_rows: List[Dict] = []
    for i, item in enumerate(links, 1):
        link = item["link"]
        rows = parse_org_detail(link)
        logger.info("(%d/%d) %s -> %d rows", i, len(links), link, len(rows))
        all_rows.extend(rows)
    if not all_rows:
        logger.info("no rows parsed.")
        return {"source": "MOEF_ORG", "inserted": 0}
    inserted = bulk_insert(all_rows)
    logger.info("%s sync done: inserted=%d", TABLE_NAME, inserted)
    return {"source": "MOEF_ORG", "inserted": inserted}

if __name__ == "__main__":
    print(run_once())
