# app/sync2/pipeline.py
from __future__ import annotations
import re
from typing import Any, Dict, List, Tuple
from datetime import datetime, timezone, date

from app.services.supabase_service import get_client

sb = get_client()

# ---------- 작은 헬퍼 ----------
def _now_iso():
    return datetime.now(timezone.utc).isoformat()

def _norm_name(s: str) -> str:
    return re.sub(r"\s+", "", (s or "")).upper()

def _dept_tokens(s: str) -> set:
    return set([t for t in re.split(r"[\/\s>\-·]", s or "") if t])

def _jaccard(a: set, b: set) -> float:
    if not a and not b:
        return 0.0
    return len(a & b) / max(1, len(a | b))

def _pos_similarity(a: str, b: str) -> float:
    A = re.sub(r"\s+", "", a or "")
    B = re.sub(r"\s+", "", b or "")
    if not A or not B:
        return 0.0
    if A == B:
        return 1.0
    keys = ["사무관", "서기관", "주무관", "국장", "과장", "실장"]
    return 0.6 if any((k in A and k in B) for k in keys) else 0.0

# ---------- 배치 ----------
def start_batch(batch_id: str):
    sb.table("ingest_batches").insert({
        "batch_id": batch_id, "status": "RUNNING"
    }).execute()

def finish_batch(batch_id: str, status="COMPLETED", source_summary: Dict[str,int] | None = None):
    sb.table("ingest_batches").update({
        "finished_at": _now_iso(), "status": status,
        "source_summary": source_summary or {}
    }).eq("batch_id", batch_id).execute()

# ---------- 스테이징 적재 ----------
def ingest_staging(batch_id: str, rows: List[Dict[str, Any]]):
    payload = []
    for r in rows:
        payload.append({
            "batch_id": batch_id,
            "source": r["source"],
            "name": (r.get("name") or "").strip(),
            "department": r.get("department"),
            "position": r.get("position"),
            "phone": r.get("phone"),
            "task": r.get("task"),       # ← 추가
            "raw": r.get("raw"),
        })
    if payload:
        sb.table("staging_gov_staff").insert(payload).execute()

# ---------- 후보 생성 ----------
def build_candidates(batch_id: str):
    st = sb.table("staging_gov_staff").select("*").eq("batch_id", batch_id).execute().data or []
    persons = sb.table("person_registry").select("person_id,canonical_name").execute().data or []
    current = sb.table("gov_staff_current").select("*").execute().data or []

    current_by_person: Dict[str, List[dict]] = {}
    for r in current:
        current_by_person.setdefault(r["person_id"], []).append(r)

    cands = []
    for s in st:
        s_name_norm = _norm_name(s["name"])
        s_phone = (s.get("phone") or "").strip()
        s_dept  = _dept_tokens(s.get("department") or "")
        s_pos   = s.get("position") or ""

        for p in persons:
            # ★ 이름 동일하지 않으면 후보 제외
            if _norm_name(p["canonical_name"]) != s_name_norm:
                continue

            score = 0.0
            # 전화번호 완전 일치 → 사실상 동일인
            if s_phone and any((s_phone == (x.get("phone") or "")) for x in current_by_person.get(p["person_id"], [])):
                score = 1.0
            else:
                # 같은 이름 내에서 부서/직위 유사도로 가벼운 가점 (충돌시 수동판단)
                best_dept = 0.0
                best_pos  = 0.0
                for x in current_by_person.get(p["person_id"], []):
                    best_dept = max(best_dept, _jaccard(s_dept, _dept_tokens(x.get("department") or "")))
                    best_pos  = max(best_pos, _pos_similarity(s_pos, x.get("position")))
                score = round(min(0.9, (best_dept * 0.5) + (best_pos * 0.4)), 3)

            if score > 0.0:
                cands.append({
                    "batch_id": batch_id,
                    "staging_id": s["staging_id"],
                    "candidate_person_id": p["person_id"],
                    "score": score,
                    "features": {},
                })

    if cands:
        sb.table("match_candidates").insert(cands).execute()

# ---------- 자동 결정 ----------
def auto_decide(batch_id: str, hi: float = 0.85, margin: float = 0.10):
    rows = sb.table("match_candidates").select("*")\
        .eq("batch_id", batch_id).order("staging_id").order("score", desc=True)\
        .execute().data or []

    # staging별 후보 묶기
    by_staging: Dict[int, List[dict]] = {}
    for r in rows:
        by_staging.setdefault(r["staging_id"], []).append(r)

    # 이번 배치의 전체 staging id 목록
    st_rows = sb.table("staging_gov_staff").select("staging_id").eq("batch_id", batch_id).execute().data or []
    all_st_ids = [r["staging_id"] for r in st_rows]

    decisions = []

    # 후보 있는 staging 처리
    for staging_id, cands in by_staging.items():
        top = cands[0]
        top_score = float(top["score"])
        second = float(cands[1]["score"]) if len(cands) > 1 else 0.0

        if top_score >= hi and (top_score - second) >= margin:
            decisions.append({
                "batch_id": batch_id,
                "staging_id": staging_id,
                "person_id": top["candidate_person_id"],
                "decision": "AUTO",
                "score": top_score
            })
        else:
            decisions.append({
                "batch_id": batch_id,
                "staging_id": staging_id,
                "person_id": None,
                "decision": "PENDING",
                "score": top_score
            })

    # 후보가 "아예 없는" staging 은 자동 NEW로 간주
    staged_with_cands = set(by_staging.keys())
    for sid in all_st_ids:
        if sid not in staged_with_cands:
            decisions.append({
                "batch_id": batch_id,
                "staging_id": sid,
                "person_id": None,
                "decision": "AUTO",   # 후보 없음 → 자동 신규
                "score": 1.0
            })

    if decisions:
        # 중복 upsert 보호
        for d in decisions:
            sb.table("match_decisions").upsert(d, on_conflict="batch_id,staging_id").execute()

# ---------- 부트스트랩(최초 1회) ----------
def seed_batch_as_new(batch_id: str):
    st = sb.table("staging_gov_staff").select("*").eq("batch_id", batch_id).execute().data or []
    decs = []
    for s in st:
        r = sb.table("person_registry").insert({"canonical_name": s["name"]}).execute().data
        pid = r[0]["person_id"]
        decs.append({
            "batch_id": batch_id,
            "staging_id": s["staging_id"],
            "person_id": pid,
            "decision": "AUTO",
            "score": 1.0,
            "rationale": "bootstrap"
        })
    if decs:
        sb.table("match_decisions").insert(decs).execute()

# ---------- 결정 적용 ----------
NAME_CHANGED_AS_NEW = True  # ← 이름이 다르면 퇴사+신규로 처리

def apply_decisions(batch_id: str):
    # staging/decision 로드
    st_rows = sb.table("staging_gov_staff").select("*").eq("batch_id", batch_id).execute().data or []
    st_by_id = {r["staging_id"]: r for r in st_rows}
    decs = sb.table("match_decisions").select("*").eq("batch_id", batch_id).execute().data or []

    curr_all = sb.table("gov_staff_current").select("*").execute().data or []
    curr_key = {(r["person_id"], r["source"]): r for r in curr_all}

    def fill_full(x: dict | None) -> dict:
        # 표시 필드 6개를 모두 채워서 반환
        x = x or {}
        return {
            "source":     x.get("source")     or "-",
            "department": x.get("department") or "-",
            "position":   x.get("position")   or "-",
            "name":       x.get("name")       or "-",
            "phone":      x.get("phone")      or "-",
            "task":       x.get("task")       or "-",
        }

    events = []
    upserts = []
    seen = set()  # (person_id, source) 이번 배치에서 본 대상

    for d in decs:
        if d["decision"] not in ("AUTO", "MANUAL"):
            continue
        s = st_by_id.get(d["staging_id"])
        if not s:
            continue

        # person_id 확정(없으면 신규 생성)
        pid = d.get("person_id")
        if not pid:
            r = sb.table("person_registry").insert({"canonical_name": s["name"]}).execute().data
            pid = r[0]["person_id"]

        # 사람 기록 갱신
        sb.table("person_registry").update({"last_seen_at": _now_iso()}).eq("person_id", pid).execute()

        key = (pid, s["source"])
        seen.add(key)

        prev = curr_key.get(key)
        next_row = {
            "person_id": pid,
            "source": s["source"],
            "name": s.get("name"),
            "department": s.get("department"),
            "position": s.get("position"),
            "phone": s.get("phone"),
            "task": s.get("task"),
            "is_active": True,
            "last_seen_batch": batch_id,
            "updated_at": _now_iso(),
        }

        # 변경 내용 라벨 계산(복수)
        kinds: list[str] = []
        if prev is None:
            kinds = ["NEW"]
        else:
            if (prev.get("department") or "") != (next_row.get("department") or ""):
                kinds.append("MOVE")
            if (prev.get("position") or "") != (next_row.get("position") or ""):
                kinds.append("CHANGE_POSITION")
            if (prev.get("phone") or "") != (next_row.get("phone") or ""):
                kinds.append("CHANGE_PHONE")
            if (prev.get("task") or "") != (next_row.get("task") or ""):
                kinds.append("CHANGE_TASK")
            if (prev.get("name") or "") != (next_row.get("name") or ""):
                kinds.append("CHANGE_NAME")

        events.append({
            "batch_id": batch_id,
            "person_id": pid,
            "source": s["source"],
            "event_type": ",".join(kinds) if kinds else "NO_CHANGE",
            "prev": fill_full(prev),      # 6필드 모두 채움
            "next": fill_full(next_row),  # 6필드 모두 채움
        })

        upserts.append(next_row)

    # 미포함 → 퇴사
    for (pid, src), prev in curr_key.items():
        if prev.get("is_active") and (pid, src) not in seen:
            events.append({
                "batch_id": batch_id,
                "person_id": pid,
                "source": src,
                "event_type": "DEPARTED",
                "prev": fill_full(prev),
                "next": {  # 표시는 (퇴사)로 하되, 6필드도 채워두기 원하면 fill_full({})로 바꿔도 됨
                    "source": "-", "department": "-", "position": "-",
                    "name": "-",   "phone": "-",      "task": "-"
                },
            })
            upserts.append({**prev, "is_active": False, "updated_at": _now_iso()})

    if events:
        sb.table("gov_staff_events").insert(events).execute()

    # current 반영
    for row in upserts:
        sb.table("gov_staff_current").upsert(row, on_conflict="person_id,source").execute()

# ---------- (테스트용) 기존 테이블에서 MOTIE 뽑아오기 ----------
def load_motie_from_existing(limit: int = 5000) -> List[Dict[str, Any]]:
    # 1) gov_staff (source='MOTIE')
    try:
        data = sb.table("gov_staff").select("name,department,position,phone").eq("source", "MOTIE").limit(limit).execute().data
        if data:
            return [{"source": "MOTIE", **r, "raw": r} for r in data]
    except Exception:
        pass

    # 2) motie_org_people
    try:
        data = sb.table("motie_org_people").select("*").limit(limit).execute().data
        if data:
            rows = []
            for r in data:
                rows.append({
                    "source": "MOTIE",
                    "name": r.get("name"),
                    "department": r.get("department"),
                    "position": r.get("position"),
                    "phone": r.get("phone"),
                    "raw": r
                })
            return rows
    except Exception:
        pass

    # 3) motie_org
    try:
        data = sb.table("motie_org").select("*").limit(limit).execute().data
        if data:
            rows = []
            for r in data:
                rows.append({
                    "source": "MOTIE",
                    "name": r.get("name"),
                    "department": r.get("department"),
                    "position": r.get("position"),
                    "phone": r.get("phone"),
                    "raw": r
                })
            return rows
    except Exception:
        pass

    return []