# /var/www/html/bot/app/crawler/group_n8n.py
from __future__ import annotations
import os, sys, re, json, time, argparse, logging, html
from typing import Any, Dict, List, Optional, Tuple
from datetime import datetime, timedelta
import uuid
import requests
from pypdf import PdfReader

# repo 루트 기준 import
ROOT_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
if ROOT_DIR not in sys.path:
    sys.path.append(ROOT_DIR)

from app.services.supabase_service import get_client  # noqa: E402

# --------------------
# 설정
# --------------------
APBA_IDS = [
    "C0247",  # 한국전력공사
    "C0042",  # 한국남동발전(주)
    "C0043",  # 한국남부발전(주)
    "C0066",  # 한국동서발전(주)
    "C0082",  # 한국서부발전(주)
    "C0220",  # 한국수력원자력(주)
    "C0259",  # 한국중부발전(주)
    "C0248",  # 한국전력기술주식회사
    "C0306",  # 한전KDN
    "C0305",  # 한전KPS(주)
    "C0236",  # 한전원자력연료(주)
]

LIST_URL = "https://alio.go.kr/item/itemReportListSusi.json"
PREWARM_URL = "https://alio.go.kr/item/itemOrganList.do"
PDF_URL = "https://alio.go.kr/download/pdf.json"

# 테이블/뷰명
TABLE_ID = "kepco_id"          # 공시 ledger
TABLE_RAW = "kepco_raw"        # 원문 텍스트
TABLE_STG = "kepco_org_stg"    # 파싱 스테이징
TABLE_SNAP = "kepco_org_snapshot"  # 합격 스냅샷
TABLE_CUR_VIEW = "kepco_org_cur"   # 현재 뷰
TABLE_ORG = "kepco_org"        # (레거시) 현재본 물리 테이블(웹 호환)
TABLE_HISTORY = "kepco_history"

# 텔레그램 환경변수
TG_BOT_TOKEN = os.getenv("TELEGRAM_BOT_TOKEN") or os.getenv("TG_BOT_TOKEN")
TG_CHAT_ID = os.getenv("TELEGRAM_CHAT_ID") or os.getenv("TG_CHAT_ID")

LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
logger = logging.getLogger("group_n8n")
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter("[%(levelname)s] %(message)s"))
logger.addHandler(handler)
logger.setLevel(LOG_LEVEL)

# --------------------
# 유틸(공통)
# --------------------
VACANCY_PAT = re.compile(r"^\s*\(?공석\)?\s*$")

def is_vacant_name(name: Optional[str]) -> bool:
    if not name:
        return False
    s = str(name).strip()
    if VACANCY_PAT.match(s):
        return True
    return s in {"결원", "-"}

def normalize(s: Optional[str]) -> str:
    if not s:
        return ""
    s = s.replace("\r", "")
    s = re.sub(r"[ \t]+", " ", s)
    s = re.sub(r"[ ]*\n[ ]*", "\n", s)
    return s.strip()

def is_blank_date(s: Optional[str]) -> bool:
    if not s: return True
    v = re.sub(r"\s+", "", s).strip()
    return v in {"-", "–", "—"} or re.search(r"(미정|무기한)", v) is not None

def pick(pattern: str, s: str) -> Optional[str]:
    m = re.search(pattern, s)
    return m.group(1).strip() if m else None

def extract_department_from_header(merged_text: str) -> Optional[str]:
    m = re.search(r"임원\s*현황\s*\n([^\n]+)\n임원\s*현황", merged_text)
    return m.group(1).strip() if m else None

def merge_page_texts(texts: List[str]) -> str:
    joined = "\n".join([normalize(t) for t in texts if t])
    joined = re.sub(r"\n{2,}", "\n", joined)
    return joined.strip()

def _normalize_name(s: Optional[str]) -> str:
    return re.sub(r"\s+", "", (s or "")).strip()

def _key(dept: str, name: Optional[str], start: Optional[str]) -> Tuple[str, str, Optional[str]]:
    return (dept, _normalize_name(name), start or None)

def _to_iso(posted_at: str) -> Optional[str]:
    if not posted_at:
        return None
    s = posted_at.strip().replace(" ", "")
    if re.match(r"^\d{4}-\d{2}-\d{2}$", s):
        return s
    m = re.match(r"^(\d{4})\.(\d{1,2})\.(\d{1,2})$", s)
    if m:
        y, mo, d = int(m.group(1)), int(m.group(2)), int(m.group(3))
        return f"{y:04d}-{mo:02d}-{d:02d}"
    return None

def _iso_prev_day(iso_ymd: str) -> str:
    dt = datetime.strptime(iso_ymd, "%Y-%m-%d")
    prev = dt - timedelta(days=1)
    return prev.strftime("%Y-%m-%d")

def _to_kor_date(iso_ymd: str) -> str:
    dt = datetime.strptime(iso_ymd, "%Y-%m-%d")
    return f"{dt.year}년 {dt.month:02d}월 {dt.day:02d}일"

# --------------------
# 파싱 (n8n 로직 + '변경 전/후' 헤더 필터)
# --------------------
def parse_people(merged_text: str, department_hint: Optional[str] = None) -> List[Dict[str, Any]]:
    department = department_hint or extract_department_from_header(merged_text)
    start_idx = merged_text.find("직위 ")
    body = merged_text[start_idx:] if start_idx >= 0 else merged_text
    sections = [sec for sec in re.split(r"\n(?=직위\s)", body) if re.match(r"^직위\s", sec)]
    out: List[Dict[str, Any]] = []

    for raw in sections:
        sec = normalize(raw)

        # "변경 전/후" 블록 제거
        if sec.startswith("직위 변경 전") or ("변경사유" in sec and not re.search(r"(임기|직책|성별)\s", sec)):
            continue

        position = pick(r"직위\s*([^\n]+?)\s*성명", sec)
        name     = pick(r"성명\s*([^\n]+?)(?=\s*(?:직책|성별|임기|\n))", sec)
        task     = pick(r"직책\s*([^\n]+?)(?=\s*(?:성별|임기|\n))", sec)
        gender   = pick(r"성별\s*([남여])", sec)

        # 임기(시작/종료)
        m = re.search(r"임기\s*\(시작일\)\s*([^\n(]+?)\s*\(종료일\)\s*([^\n]+?)(?:\n|$)", sec)
        start_raw = m.group(1).strip() if m else None
        end_raw   = m.group(2).strip() if m else None
        start = None if is_blank_date(start_raw) else re.sub(r"\s+", " ", start_raw or "")
        end   = None if is_blank_date(end_raw)   else re.sub(r"\s+", " ", end_raw   or "")

        # 사유 발생일(있으면)
        reason_raw = None
        m_reason = re.search(r"(?:사유\s*발생(?:일자|일)|사유발생(?:일자|일))\s*([^\n]+)", sec)
        if m_reason:
            reason_raw = m_reason.group(1).strip()
        reason_date = None if is_blank_date(reason_raw) else re.sub(r"\s+", " ", reason_raw or "")

        # 주요경력
        career_block = pick(
            r"주요경력\s*([\s\S]*?)(?=\n\s*(?:선임절차|선임절차규정|당연직여부|직위\s|기준일|제출일|기관 공시 담당자|$))",
            sec
        )
        career: List[str] = []
        if career_block:
            career = [ln.strip() for ln in re.split(r"\n+", career_block) if ln.strip()]

        # 토큰 컷
        def tok(x: Optional[str]) -> str: return (x or "").replace(" ", "")
        if tok(position) in {"변경전", "직위"} or tok(name) in {"변경후", "성명"}:
            continue
        if not name:
            continue

        out.append({
            "department": department,
            "name": name,
            "position": position,
            "gender": gender,
            "start": start,
            "end": end,
            "task": task,
            "reason_date": reason_date,
            "career": career,
        })
    return out

# --------------------
# HTTP
# --------------------
def make_session() -> requests.Session:
    s = requests.Session()
    s.headers.update({
        "User-Agent": "GovBot/1.0 (+https://work.jjickjjicks.com)",
    })
    return s

def fetch_first_item(s: requests.Session, apba_id: str, debug: bool = False) -> Optional[Dict[str, Any]]:
    # 프리웜(쿠키/세션)
    pre_params = {"apbaId": apba_id, "reportFormRootNo": 20305, "pageNo": 1}
    r0 = s.get(PREWARM_URL, params=pre_params, timeout=30)
    if debug:
        logger.debug("[%s] prewarm %s %s", apba_id, r0.status_code, r0.headers.get("content-type"))

    headers_json = {
        "Origin": "https://alio.go.kr",
        "Referer": "https://alio.go.kr/",
        "X-Requested-With": "XMLHttpRequest",
        "Content-Type": "application/json;charset=UTF-8",
    }
    body = {"pageNo": "1", "apbaId": apba_id, "reportFormRootNo": "20305"}
    r = s.post(LIST_URL, headers=headers_json, data=json.dumps(body), timeout=30)
    if debug:
        logger.debug("[%s] list %s %s %s", apba_id, r.status_code, r.headers.get("content-type"), r.text[:400])

    try:
        data = r.json()
    except Exception:
        return None
    if data.get("status") != "success":
        return None

    result = (data.get("data") or {}).get("result") or []
    organ_info = (data.get("data") or {}).get("organInfo") or {}
    organ_name = organ_info.get("apbaNa")

    if not result:
        return None
    first = result[0]
    return {
        "disclosureNo": first.get("disclosureNo"),
        "idate": first.get("idate"),
        "title": first.get("title"),
        "organ": organ_name,
    }

def fetch_pdf_to_tmp(s: requests.Session, disclosure_no: str, debug: bool = False) -> Optional[str]:
    params = {"disclosureNo": disclosure_no}
    r = s.get(PDF_URL, params=params, timeout=60)
    if debug:
        logger.debug("[PDF] %s -> %s", r.url, r.status_code)
    if r.status_code != 200:
        return None
    out_path = f"/tmp/alio_{disclosure_no}.pdf"
    with open(out_path, "wb") as f:
        f.write(r.content)
    return out_path

def pdf_to_page_texts(pdf_path: str) -> List[str]:
    texts: List[str] = []
    reader = PdfReader(pdf_path)
    for page in reader.pages:
        try:
            t = page.extract_text() or ""
        except Exception:
            t = ""
        texts.append(t)
    return texts

# --------------------
# Supabase helpers
# --------------------
def sb() :
    return get_client()

def sb_row_exists_id(client, table: str, id_value: str) -> bool:
    try:
        res = client.table(table).select("id").eq("id", id_value).limit(1).execute()
        rows = res.data or []
        return len(rows) > 0
    except Exception as e:
        logger.warning("exists check failed (%s, %s): %s", table, id_value, e)
        return False

def sb_insert(client, table: str, row: Dict[str, Any]) -> None:
    client.table(table).insert(row).execute()

# --------------------
# Telegram
# --------------------
def send_telegram(text: str, parse_mode: str = "HTML", disable_preview: bool = True) -> bool:
    token = TG_BOT_TOKEN
    chat_id = TG_CHAT_ID
    if not token or not chat_id:
        logger.info("[TG] skipped (no token/chat id)")
        return False
    url = f"https://api.telegram.org/bot{token}/sendMessage"
    payload = {
        "chat_id": chat_id,
        "text": text,
        "parse_mode": parse_mode,
        "disable_web_page_preview": disable_preview,
    }
    try:
        r = requests.post(url, json=payload, timeout=20)
        if r.status_code != 200:
            logger.warning("[TG] send failed %s: %s", r.status_code, r.text)
            return False
        return True
    except Exception as e:
        logger.warning("[TG] send error: %s", e)
        return False

def build_tg_message(organ: str, pdf_url: str) -> str:
    org_esc = html.escape(organ or "-")
    url_esc = html.escape(pdf_url, quote=True)
    lines = [
        "그룹사 신규 임원공시입니다.",
        f"{org_esc}",
        f"<a href=\"{url_esc}\">자세히 보기</a>",
    ]
    return "\n".join(lines)

# --------------------
# crawler_run 기록
# --------------------
def insert_crawler_run(client, target: str) -> str:
    run_id = str(uuid.uuid4())
    client.table("crawler_run").insert({
        "id": run_id,
        "target": target,
        "status": "running",
        "started_at": datetime.utcnow().isoformat()
    }).execute()
    return run_id

def update_crawler_run(client, run_id: str, status: str, pages: int = 0, rows: int = 0, fail_reason: str = None):
    payload = {
        "finished_at": datetime.utcnow().isoformat(),
        "status": status,
        "pages": pages,
        "rows": rows,
        "fail_reason": fail_reason,
    }
    client.table("crawler_run").update(payload).eq("id", run_id).execute()

# --------------------
# 승격/검증 로직
# --------------------
def stg_quality_ok(rows: List[Dict[str, Any]]) -> bool:
    """최소 1행 & 전원이 공석/빈이름이 아니면 OK"""
    if not rows:
        return False
    non_blank = [r for r in rows if (r.get("name") or "").strip()]
    if not non_blank:
        return False
    non_vacant = [r for r in non_blank if not is_vacant_name(r.get("name"))]
    return len(non_vacant) > 0

# /var/www/html/bot/app/crawler/group_n8n.py (교체본: promote_department)
def promote_department(client, department: str, run_id: str) -> int:
    """
    staging(run_id, department) -> snapshot 저장 -> 현재본(kepco_org) 교체
    note: kepco_org에는 reason_date 컬럼이 없으므로 제외하고 insert
    """
    # 1) 이번 run의 해당 부서 staging 읽기
    stg_rows = (
        client.table("kepco_org_stg")
        .select("department,name,position,gender,start,end,task,reason_date,career,key_hash")
        .eq("run_id", run_id)
        .eq("department", department)
        .limit(10000)
        .execute()
        .data
        or []
    )

    # 없으면 noop
    if not stg_rows:
        return 0

    # 2) snapshot에 그대로 적재(검증/추척용: reason_date 포함)
    snap_payload = [
        {
            "run_id": run_id,
            "department": r.get("department"),
            "name": r.get("name"),
            "position": r.get("position"),
            "gender": r.get("gender"),
            "start": r.get("start"),
            "end": r.get("end"),
            "task": r.get("task"),
            "reason_date": r.get("reason_date"),  # snapshot에는 남김
            "career": r.get("career"),
            "key_hash": r.get("key_hash"),
        }
        for r in stg_rows
    ]
    if snap_payload:
        client.table("kepco_org_snapshot").insert(snap_payload).execute()

    # 3) 현재본 교체: 기존 삭제
    client.table("kepco_org").delete().eq("department", department).execute()

    # 4) 현재본 insert (★ reason_date 제외 ★)
    cur_payload = [
        {
            "department": r.get("department"),
            "name": r.get("name"),
            "position": r.get("position"),
            "gender": r.get("gender"),
            "start": r.get("start"),
            "end": r.get("end"),
            "task": r.get("task"),
            "career": r.get("career"),
            # reason_date는 kepco_org에 컬럼이 없어서 넣지 않음
        }
        for r in stg_rows
    ]
    if cur_payload:
        client.table("kepco_org").insert(cur_payload).execute()

    return len(cur_payload)

# --------------------
# kepco_history 동기화 (승격본 기준)
# --------------------
def apply_history(client, dept: str, posted_at_raw: str, pdf_url: str, merged_text: str) -> None:
    """
    - 직전 current 스냅샷 기준 → kepco_history 업데이트
    - 규칙은 기존 설명과 동일
    """
    posted_iso = _to_iso(posted_at_raw) or datetime.utcnow().strftime("%Y-%m-%d")
    prev_iso   = _iso_prev_day(posted_iso)
    prev_kor   = _to_kor_date(prev_iso)

    prev_rows = client.table(TABLE_HISTORY).select(
        "id, department, name, position, task, start, end, actual_end, posted_at, pdf_url"
    ).eq("department", dept).eq("current", True).execute().data or []
    prev_by_key: Dict[Tuple[str, str, Optional[str]], Dict[str, Any]] = {
        _key(dept, r.get("name"), r.get("start")): r for r in prev_rows
    }
    prev_keys = set(prev_by_key.keys())

    # 최신 스냅샷(= 이번 승격본) 불러오기
    cur_rows = client.table(TABLE_CUR_VIEW).select(
        "department,name,position,task,gender,start,end,career"
    ).eq("department", dept).limit(10000).execute().data or []
    new_by_key: Dict[Tuple[str, str, Optional[str]], Dict[str, Any]] = {}
    for r in cur_rows:
        k = _key(dept, r.get("name"), r.get("start"))
        if k not in new_by_key:
            new_by_key[k] = r
    new_keys = set(new_by_key.keys())

    to_close_keys = prev_keys - new_keys
    keep_keys     = prev_keys & new_keys
    insert_keys   = new_keys - prev_keys

    extend_keys: List[Tuple[str, str, Optional[str]]] = []
    task_change_keys: List[Tuple[str, str, Optional[str]]] = []

    def _norm_task(x: Optional[str]) -> str:
        return re.sub(r"\s+", "", (x or "")).strip()

    for k in keep_keys:
        prev_end  = (prev_by_key[k] or {}).get("end")
        new_end   = (new_by_key[k] or {}).get("end")
        prev_task = (prev_by_key[k] or {}).get("task") or ""
        new_task  = (new_by_key[k] or {}).get("task") or ""

        if (prev_end or "") != (new_end or ""):
            extend_keys.append(k)
        elif _norm_task(prev_task) != _norm_task(new_task):
            task_change_keys.append(k)

    if to_close_keys:
        ids_to_close = [prev_by_key[k]["id"] for k in to_close_keys if prev_by_key.get(k)]
        for i in range(0, len(ids_to_close), 1000):
            client.table(TABLE_HISTORY).update({"actual_end": prev_kor}) \
                  .in_("id", ids_to_close[i:i+1000]).is_("actual_end", None).execute()

    client.table(TABLE_HISTORY).update({"current": False}) \
          .eq("department", dept).eq("current", True).execute()

    keep_same = [k for k in keep_keys if (k not in extend_keys and k not in task_change_keys)]
    if keep_same:
        ids_to_keep = [prev_by_key[k]["id"] for k in keep_same if prev_by_key.get(k)]
        for i in range(0, len(ids_to_keep), 1000):
            client.table(TABLE_HISTORY).update({
                "current": True,
                "posted_at": posted_at_raw,
                "pdf_url": pdf_url,
            }).in_("id", ids_to_keep[i:i+1000]).execute()

    if task_change_keys:
        ids_old = [prev_by_key[k]["id"] for k in task_change_keys if prev_by_key.get(k)]
        for i in range(0, len(ids_old), 1000):
            client.table(TABLE_HISTORY).update({"old_career": True}) \
                  .in_("id", ids_old[i:i+1000]).execute()

    # 신규 삽입
    for k in insert_keys:
        p = new_by_key[k]
        payload = {
            "department": dept,
            "name": p.get("name"),
            "task": p.get("task"),
            "position": p.get("position"),
            "gender": p.get("gender"),
            "start": p.get("start"),
            "end": p.get("end"),
            "career": p.get("career") or [],
            "posted_at": posted_at_raw,
            "pdf_url": pdf_url,
            "current": True,
            "extended": False,
            "old_career": False,
        }
        if is_vacant_name(p.get("name")):
            client.table(TABLE_HISTORY).insert(payload).execute()
            continue
        q = client.table(TABLE_HISTORY).select("id").eq("department", dept).eq("name", p.get("name"))
        q = q.is_("start", None) if p.get("start") is None else q.eq("start", p.get("start"))
        exist = q.limit(1).execute().data or []
        if not exist:
            client.table(TABLE_HISTORY).insert(payload).execute()

    # 연장 삽입
    for k in extend_keys:
        p = new_by_key[k]
        payload = {
            "department": dept, "name": p.get("name"),
            "task": p.get("task"), "position": p.get("position"),
            "gender": p.get("gender"),
            "start": p.get("start"), "end": p.get("end"),
            "career": p.get("career") or [],
            "posted_at": posted_at_raw, "pdf_url": pdf_url,
            "current": True, "extended": True, "old_career": False,
        }
        client.table(TABLE_HISTORY).insert(payload).execute()

    # task 교체 삽입
    for k in task_change_keys:
        p = new_by_key[k]
        payload = {
            "department": dept, "name": p.get("name"),
            "task": p.get("task"), "position": p.get("position"),
            "gender": p.get("gender"),
            "start": p.get("start"), "end": p.get("end"),
            "career": p.get("career") or [],
            "posted_at": posted_at_raw, "pdf_url": pdf_url,
            "current": True, "extended": False, "old_career": False,
        }
        client.table(TABLE_HISTORY).insert(payload).execute()

    logger.info(
        "[history] dept=%s applied: close=%d, keep_same=%d, insert=%d, extend=%d, task_change=%d",
        dept, len(to_close_keys), len(keep_same), len(new_by_key.keys() - prev_keys), len(extend_keys), len(task_change_keys)
    )

# --------------------
# 메인 처리(회사 단위)
# --------------------
def process_company(session: requests.Session, apba_id: str, client, run_id: str, debug: bool = False, dry: bool = False) -> Tuple[bool, Optional[str], int, int]:
    """
    반환: (ok, department, inserted_org_rows, pages)
    """
    logger.info("[%s] ---- start ----", apba_id)

    first = fetch_first_item(session, apba_id, debug=debug)
    if not first or not first.get("disclosureNo") or not first.get("organ"):
        logger.warning("[%s] first item missing -> skip", apba_id)
        return False, None, 0, 0

    disclosure_no = first["disclosureNo"]
    organ = first["organ"]
    title = first.get("title") or ""
    idate = first.get("idate")
    posted_iso = _to_iso(idate)

    logger.info("[%s] first: disclosureNo=%s, organ=%s, title=%s, idate=%s",
                apba_id, disclosure_no, organ, title, idate)

    record_id = f"{disclosure_no}-{organ}"
    already = sb_row_exists_id(client, TABLE_ID, record_id)

    # PDF 다운로드
    pdf_path = fetch_pdf_to_tmp(session, disclosure_no, debug=debug)
    if not pdf_path:
        logger.warning("[%s] pdf.json fetch failed.", apba_id)
        # 신규 공시라면 ledger만 남기고 종료
        if not already:
            id_row = {
                "id": record_id,
                "department": organ,
                "disclosure_no": disclosure_no,
                "title": title,
                "posted_at": idate,
                "pdf_url": f"{PDF_URL}?disclosureNo={disclosure_no}",
            }
            if not dry:
                sb_insert(client, TABLE_ID, id_row)
        return True, organ, 0, 0

    # 페이지/텍스트
    texts = pdf_to_page_texts(pdf_path)
    merged = merge_page_texts(texts)
    pages = len(texts)

    # ledger (신규에만)
    if not already:
        id_row = {
            "id": record_id,
            "department": organ,
            "disclosure_no": disclosure_no,
            "title": title,
            "posted_at": idate,
            "pdf_url": f"{PDF_URL}?disclosureNo={disclosure_no}",
        }
        if not dry:
            sb_insert(client, TABLE_ID, id_row)
        # 텔레그램 알림
        if not dry:
            msg = build_tg_message(organ, id_row["pdf_url"])
            send_telegram(msg, parse_mode="HTML", disable_preview=True)

    # RAW 저장
    if not dry:
        sb_insert(client, TABLE_RAW, {
            "run_id": run_id,
            "apba_id": apba_id,
            "organ": organ,
            "disclosure_no": disclosure_no,
            "title": title,
            "posted_at": idate,
            "posted_at_iso": posted_iso,
            "pdf_url": f"{PDF_URL}?disclosureNo={disclosure_no}",
            "pages": pages,
            "merged_text": merged
        })

    # 파싱 → STG 저장
    people = parse_people(merged, department_hint=organ)
    ins_stg = 0
    if not dry and people:
        chunk = 500
        for i in range(0, len(people), chunk):
            part = people[i:i+chunk]
            payload = [{
                "run_id": run_id,
                "department": p.get("department"),
                "name": p.get("name"),
                "position": p.get("position"),
                "gender": p.get("gender"),
                "start": p.get("start"),
                "end": p.get("end"),
                "task": p.get("task"),
                "reason_date": p.get("reason_date"),
                "career": p.get("career") or [],
            } for p in part]
            client.table(TABLE_STG).insert(payload).execute()
            ins_stg += len(part)

    # 승격(품질 OK 시)
    promoted = 0
    if not dry:
        promoted = promote_department(client, organ, run_id)
        if promoted > 0:
            try:
                apply_history(client, organ, idate or (posted_iso or ""), f"{PDF_URL}?disclosureNo={disclosure_no}", merged)
            except Exception as e:
                logger.exception("[%s] apply_history failed: %s", apba_id, e)

    logger.info("[%s] pages=%d, stg=%d, promoted=%d", apba_id, pages, ins_stg, promoted)
    return True, organ, promoted, pages

# --------------------
# 실행 (여러 회사)
# --------------------
def run_once(apba_ids: Optional[List[str]] = None, debug: bool = False, dry: bool = False, run_id: Optional[str] = None) -> Dict[str, Any]:
    ids = apba_ids or APBA_IDS
    session = make_session()
    client = sb()

    summary = {"processed": 0, "skipped": 0, "inserted_org_rows": 0, "pages": 0, "companies": []}

    for apba in ids:
        try:
            ok, dept, ins, pages = process_company(session, apba, client, run_id, debug=debug, dry=dry)
            if ok:
                summary["processed"] += 1
                summary["inserted_org_rows"] += ins
                summary["pages"] += pages
                if dept:
                    summary["companies"].append({"apbaId": apba, "department": dept, "inserted": ins, "pages": pages})
            else:
                summary["skipped"] += 1
        except Exception as e:
            logger.exception("[%s] error: %s", apba, e)
            summary["skipped"] += 1

        time.sleep(0.6)  # API 예절
    return summary

def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--debug", action="store_true")
    parser.add_argument("--dry-run", action="store_true")
    parser.add_argument("--only", type=str, default="", help="쉼표로 apbaId 제한: 예) C0236,C0247")
    args = parser.parse_args()

    if args.debug:
        logger.setLevel("DEBUG")

    client = sb()
    run_id = insert_crawler_run(client, "group_n8n")

    try:
        targets = APBA_IDS
        if args.only.strip():
            allow = {x.strip() for x in args.only.split(",") if x.strip()}
            targets = [x for x in APBA_IDS if x in allow]

        summary = run_once(targets, debug=args.debug, dry=args.dry_run, run_id=run_id)
        logger.info("done: %s", summary)

        update_crawler_run(
            client,
            run_id,
            status="passed",
            pages=summary.get("pages", 0),
            rows=summary.get("inserted_org_rows", 0)
        )
        print(summary)
    except Exception as e:
        logger.exception("group_n8n failed")
        update_crawler_run(client, run_id, status="failed", fail_reason=str(e))
        raise

if __name__ == "__main__":
    main()
