import re
import time
from typing import List, Dict
import requests
from bs4 import BeautifulSoup

from app.services.supabase_service import get_client, logger

BASE = "https://www.motie.go.kr"
LIST_URL = f"{BASE}/kor/25/empSearch"
HEADERS = {
    "User-Agent": "Mozilla/5.0"
}

def _fetch_last_page(sess: requests.Session) -> int:
    r = sess.get(LIST_URL, headers=HEADERS, timeout=30)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")
    last = soup.select_one("a.direction.last")
    if not last:
        return 1
    onclick = last.get("onclick", "")
    m = re.search(r"empSearch\.list\((\d+)\)", onclick)
    return int(m.group(1)) if m else 1

def _parse_page(sess: requests.Session, page: int) -> List[Dict]:
    url = f"{LIST_URL}?pageIndex={page}"
    r = sess.get(url, headers=HEADERS, timeout=30)
    r.raise_for_status()
    soup = BeautifulSoup(r.text, "html.parser")

    rows: List[Dict] = []
    for tr in soup.select("table tbody tr"):
        tds = tr.find_all("td")
        if len(tds) < 4:
            continue

        name = tds[0].get_text(strip=True)
        position = tds[1].get_text(strip=True)
        department = tds[2].get_text(strip=True)

        # 전화번호
        phone_td = tds[-1]
        phone = ""
        tel_a = phone_td.select_one('a[href^="tel:"]')
        if tel_a and tel_a.get("href"):
            phone = tel_a.get("href").replace("tel:", "").strip()
        else:
            last_text = phone_td.get_text(" ", strip=True)
            m = re.search(r"\d{2,4}-\d{3,4}-\d{4}", last_text)
            phone = m.group(0) if m else last_text

        # 담당업무: 마지막 전화 td 제외하고 3..len-2
        if len(tds) > 4:
            task = " ".join(td.get_text(" ", strip=True) for td in tds[3:-1]).strip()
        else:
            task = ""

        # task에 전화번호가 중복 포함되면 제거
        if phone and task:
            task = task.replace(phone, "").strip()

        # ✅ 업로드 제외 조건: 부서가 '파견'이고 업무에 '기획재정부' 포함
        if department == "파견" and "기획재정부" in task:
            logger.debug(f"[motie_org] skip(row): dept=파견, task contains '기획재정부' :: {name} / {position}")
            continue

        if not any([name, position, department, task, phone]):
            continue

        rows.append({
            "name": name,
            "position": position,
            "department": department,
            "task": task,
            "phone": phone,
        })
    return rows

def run_once() -> int:
    """MOTIE 조직도 전체 크롤링(n8n 방식) -> motie_org 테이블 전체 갱신"""
    supabase = get_client()

    with requests.Session() as sess:
        last = _fetch_last_page(sess)
        all_rows: List[Dict] = []
        for p in range(1, last + 1):
            all_rows.extend(_parse_page(sess, p))
            time.sleep(0.2)

    # 전체 삭제 후 삽입
    try:
        supabase.table("motie_org").delete().neq("name", "__keep__").execute()
    except Exception as e:
        logger.warning(f"[motie_org] wipe failed: {e}")

    inserted = 0
    batch = 500
    for i in range(0, len(all_rows), batch):
        chunk = all_rows[i:i + batch]
        if not chunk:
            continue
        supabase.table("motie_org").upsert(chunk).execute()
        inserted += len(chunk)

    logger.info(f"[motie_org] run_once completed: rows={inserted}, pages={last}")
    return inserted

if __name__ == "__main__":
    run_once()
