# app/web/pages/admin/crawlers.py
from __future__ import annotations
from datetime import datetime, timezone
from zoneinfo import ZoneInfo
from typing import Any, Dict, List, Optional

import os
import sys
import subprocess
from pathlib import Path

from fastapi import APIRouter, Request, Query, Form, status
from fastapi.responses import HTMLResponse, RedirectResponse, JSONResponse
from app.web.deps import templates, sb

router = APIRouter()

STATUSES = ["running", "collected", "passed", "failed", "aborted"]
KST = ZoneInfo("Asia/Seoul")

# --- 공통 유틸 ---
def _to_kst_str(ts: Optional[str]) -> Optional[str]:
    if not ts:
        return None
    try:
        s = ts.replace("Z", "+00:00")
        dt = datetime.fromisoformat(s)
        if dt.tzinfo is None:
            dt = dt.replace(tzinfo=timezone.utc)
        return dt.astimezone(KST).strftime("%Y-%m-%d %H:%M:%S")
    except Exception:
        return ts

def _sec_to_str(sec: Optional[float]) -> str:
    if sec is None:
        return "-"
    sec = int(sec)
    h, rem = divmod(sec, 3600)
    m, s = divmod(rem, 60)
    return f"{h:02d}:{m:02d}:{s:02d}" if h else f"{m:02d}:{s:02d}"

def _duration_seconds(start_iso: Optional[str], end_iso: Optional[str]) -> Optional[float]:
    if not start_iso or not end_iso:
        return None
    try:
        s = datetime.fromisoformat(start_iso.replace("Z","+00:00"))
        e = datetime.fromisoformat(end_iso.replace("Z","+00:00"))
        if s.tzinfo is None: s = s.replace(tzinfo=timezone.utc)
        if e.tzinfo is None: e = e.replace(tzinfo=timezone.utc)
        return max(0.0, (e - s).total_seconds())
    except Exception:
        return None

def _apply_filters(qb, status: Optional[str], target: Optional[str], q: Optional[str]):
    if status:
        qb = qb.eq("status", status)
    if target:
        qb = qb.eq("target", target)
    if q:
        qb.session.params.update({"or": f"id.ilike.*{q}*,target.ilike.*{q}*,fail_reason.ilike.*{q}*"})
    return qb

def _fetch_targets() -> List[str]:
    try:
        res = sb.table("crawler_run").select("target").order("started_at", desc=True).limit(1000).execute()
        rows = res.data or []
        seen, out = set(), []
        for r in rows:
            t = (r.get("target") or "").strip()
            if t and t not in seen:
                seen.add(t); out.append(t)
        return out
    except Exception:
        return []

def _safe_q(fn, default):
    try:
        return fn()
    except Exception:
        return default

# --- 실행기 경로: Errno 2 우회 ---
PY_BIN = os.environ.get("PYTHON_BIN") or sys.executable or "/usr/bin/python3"

# 수동 실행 타깃/레이블/명령
CRAWLER_LABELS: Dict[str, str] = {
    "na_industry_committee": "국회 산업 관련 상임위",
    "motie_org_pipeline":    "산업부 조직",
    "me_org_pipeline":       "환경부 조직",
    "moef_org_pipeline":     "기재부 조직",
    "motie_id":              "산업부 인사동정",
    "moef_id":               "기재부 인사동정",
    "me_id":                 "환경부 인사동정",
    "group_n8n":             "전력그룹사(ALIO)",
}
CRAWLER_COMMANDS: Dict[str, List[str]] = {
    "na_industry_committee": [PY_BIN, "-m", "app.crawler.na_industry_committee"],
    "motie_org_pipeline":    [PY_BIN, "-m", "app.crawler.motie_org_pipeline"],
    "me_org_pipeline":       [PY_BIN, "-m", "app.crawler.me_org_pipeline"],
    "moef_org_pipeline":     [PY_BIN, "-m", "app.crawler.moef_org_pipeline"],
    "motie_id":              [PY_BIN, "-m", "app.crawler.motie_n8n"],
    "moef_id":               [PY_BIN, "-m", "app.crawler.moef_n8n"],
    "me_id":                 [PY_BIN, "-m", "app.crawler.me_n8n"],
    "group_n8n":             [PY_BIN, "-m", "app.crawler.group_n8n"],
}

# 작업 디렉터리
PROJECT_ROOT = Path(__file__).resolve().parents[4]  # /var/www/html/bot 가정
WORKDIR = os.environ.get("CRAWLER_WORKDIR", str(PROJECT_ROOT))

# 보호 토큰(선택): 헤더/폼 어느 쪽이든 허용
LAUNCH_TOKEN = os.environ.get("ADMIN_LAUNCH_TOKEN")

# --- 목록 페이지 ---
@router.get("/admin/crawlers", response_class=HTMLResponse)
async def admin_crawlers(
    request: Request,
    status: Optional[str] = Query(default=None),
    target: Optional[str] = Query(default=None),
    q: Optional[str] = Query(default=None),
    limit: int = Query(default=50, ge=1, le=500),
    page: int = Query(default=1, ge=1),
    auto_refresh: int = Query(default=0),
):
    targets = _fetch_targets()

    # total
    try:
        qb_count = sb.table("crawler_run").select("id", count="exact")
        qb_count = _apply_filters(qb_count, status, target, q)
        total = qb_count.execute().count or 0
    except Exception:
        total = 0

    pages = max(1, (total + limit - 1) // limit)
    page = min(page, pages)
    offset = (page - 1) * limit

    # rows
    runs: List[Dict[str, Any]] = []
    try:
        qb = sb.table("crawler_run").select("*").order("started_at", desc=True).range(offset, offset + limit - 1)
        qb = _apply_filters(qb, status, target, q)
        runs = qb.execute().data or []
    except Exception:
        runs = []

    for r in runs:
        st_raw, fn_raw = r.get("started_at"), r.get("finished_at")
        dur = _duration_seconds(st_raw, fn_raw)
        r["_duration_str"] = _sec_to_str(dur)
        r["started_at"] = _to_kst_str(st_raw) or "-"
        r["finished_at"] = _to_kst_str(fn_raw) or "-"

    # KPIs
    kpis = {"total": total, "passed": 0, "failed": 0, "running": 0, "avg_duration_str": "-"}
    try:
        for s in ["passed", "failed", "running"]:
            cqb = sb.table("crawler_run").select("id", count="exact")
            cqb = _apply_filters(cqb, status, target, q).eq("status", s)
            kpis[s] = cqb.execute().count or 0

        aqb = sb.table("crawler_run").select("started_at,finished_at").order("started_at", desc=True)
        aqb = _apply_filters(aqb, status, target, q).not_.is_("finished_at", "null").limit(2000)
        rows = aqb.execute().data or []
        secs = [d for d in (_duration_seconds(r.get("started_at"), r.get("finished_at")) for r in rows) if d is not None]
        if secs:
            kpis["avg_duration_str"] = _sec_to_str(sum(secs)/len(secs))
    except Exception:
        pass

    # 템플릿에 런 버튼 정보도 전달
    return templates.TemplateResponse("admin/crawlers.html", {
        "request": request,
        "runs": runs,
        "targets": targets,
        "STATUSES": STATUSES,
        "status": status,
        "target": target,
        "q": q or "",
        "limit": limit,
        "page": page,
        "pages": pages,
        "total": total,
        "auto_refresh": auto_refresh,
        "kpis": kpis,
        "CRAWLER_LABELS": CRAWLER_LABELS,
        "LAUNCH_TOKEN_ENABLED": bool(LAUNCH_TOKEN),
    })

# --- 상세 페이지 ---
@router.get("/admin/crawlers/{run_id}", response_class=HTMLResponse)
async def admin_crawlers_detail(request: Request, run_id: str):
    try:
        res = sb.table("crawler_run").select("*").eq("id", run_id).limit(1).execute()
        run = (res.data or [None])[0]
    except Exception:
        run = None

    if not run:
        return templates.TemplateResponse("admin/crawler_detail.html", {
            "request": request, "not_found": True, "run": None
        })

    started_raw = run.get("started_at")
    finished_raw = run.get("finished_at")
    dur = _duration_seconds(started_raw, finished_raw)
    run["_started_kst"] = _to_kst_str(started_raw) or "-"
    run["_finished_kst"] = _to_kst_str(finished_raw) or "-"
    run["_duration_str"] = _sec_to_str(dur)

    target = (run.get("target") or "").strip()
    extras = {
        "snapshot": [],
        "stg_count": None,
        "raw_pages": [],
        "notes": [],
        "section_title": None,
    }

    if target == "moef_org":
        extras["snapshot"] = _safe_q(lambda:
            (sb.table("moef_org_snapshot")
               .select("*").eq("run_id", run_id)
               .order("id", desc=True).limit(2000).execute().data) or [], [])
        extras["stg_count"] = _safe_q(lambda:
            (sb.table("moef_org_stg").select("key_hash", count="exact")
               .eq("run_id", run_id).execute().count) or 0, 0)
        extras["raw_pages"] = _safe_q(lambda:
            (sb.table("moef_org_raw")
               .select("kind,page,url,created_at").eq("run_id", run_id)
               .order("page", asc=True).limit(2000).execute().data) or [], [])
        extras["section_title"] = "MOEF 수집 결과"

    elif target == "group_n8n":
        extras["snapshot"] = _safe_q(lambda:
            (sb.table("kepco_org_snapshot")
               .select("department, run_id, created_at").eq("run_id", run_id)
               .order("department", asc=True).limit(2000).execute().data) or [], [])
        extras["stg_count"] = _safe_q(lambda:
            (sb.table("kepco_org_stg").select("key_hash", count="exact")
               .eq("run_id", run_id).execute().count) or 0, 0)
        extras["raw_pages"] = _safe_q(lambda:
            (sb.table("kepco_raw")
               .select("apba_id, kind, page, url, created_at").eq("run_id", run_id)
               .order("apba_id", asc=True).order("page", asc=True)
               .limit(5000).execute().data) or [], [])
        extras["section_title"] = "전력그룹사(ALIO) 수집 결과"

    else:
        extras["section_title"] = "추가 정보"

    return templates.TemplateResponse("admin/crawler_detail.html", {
        "request": request,
        "run": run,
        "extras": extras,
    })

# --- 수동 실행(폼 POST) ---
@router.post("/admin/crawlers/run", response_class=HTMLResponse)
async def admin_crawlers_run(
    request: Request,
    target: str = Form(...),
    launch_token: Optional[str] = Form(default=None),
    next_url: Optional[str] = Form(default="/admin/crawlers"),
    backfill_pages: Optional[int] = Form(default=None),
):
    if LAUNCH_TOKEN:
        hdr = request.headers.get("x-admin-launch-token")
        if not (hdr == LAUNCH_TOKEN or launch_token == LAUNCH_TOKEN):
            return HTMLResponse("Unauthorized", status_code=401)

    cmd = CRAWLER_COMMANDS.get(target)
    if not cmd:
        return HTMLResponse(f"Unknown target: {target}", status_code=400)

    try:
        env = os.environ.copy()
        env["CRAWLER_LAUNCHED_FROM_ADMIN"] = "1"
        # 옵션: 특정 크롤러에 백필 페이지 수 전달
        if target == "me_id" and backfill_pages and backfill_pages > 0:
            env["ME_ID_BACKFILL_PAGES"] = str(backfill_pages)

        devnull = open(os.devnull, "wb")
        subprocess.Popen(
            cmd,
            cwd=WORKDIR,
            env=env,
            stdout=devnull,
            stderr=devnull,
            preexec_fn=os.setsid if hasattr(os, "setsid") else None,
        )
        return RedirectResponse(next_url or "/admin/crawlers", status_code=status.HTTP_303_SEE_OTHER)
    except Exception as e:
        return HTMLResponse(f"Failed to launch: {e}", status_code=500)

# --- (옵션) Ajax JSON 실행 ---
@router.post("/api/admin/crawlers/run")
async def api_admin_crawlers_run(
    request: Request,
    target: str = Form(...),
    launch_token: Optional[str] = Form(default=None),
):
    if LAUNCH_TOKEN:
        hdr = request.headers.get("x-admin-launch-token")
        if not (hdr == LAUNCH_TOKEN or launch_token == LAUNCH_TOKEN):
            return JSONResponse({"ok": False, "error": "unauthorized"}, status_code=401)

    cmd = CRAWLER_COMMANDS.get(target)
    if not cmd:
        return JSONResponse({"ok": False, "error": f"unknown target: {target}"}, status_code=400)

    try:
        env = os.environ.copy()
        env["CRAWLER_LAUNCHED_FROM_ADMIN"] = "1"
        devnull = open(os.devnull, "wb")
        subprocess.Popen(
            cmd,
            cwd=WORKDIR,
            env=env,
            stdout=devnull,
            stderr=devnull,
            preexec_fn=os.setsid if hasattr(os, "setsid") else None,
        )
        return JSONResponse({"ok": True, "launched": target})
    except Exception as e:
        return JSONResponse({"ok": False, "error": str(e)}, status_code=500)
