# services/motie_parser.py

import re
from bs4 import BeautifulSoup
from utils.fetcher import fetch_html

MOTIE_ORG_URL = "https://www.motie.go.kr/kor/article/ATCL6e90bb9de?mno=&pageIndex=1&rowPageC=0&displayAuthor=&searchCategory=3&schClear=on&startDtD=&endDtD=&searchCondition=1&searchKeyword=#"

def parse_motie_organization_posts():
    html = fetch_html(MOTIE_ORG_URL)
    if not html:
        return []

    soup = BeautifulSoup(html, "html.parser")
    rows = soup.select("table tbody tr")

    results = []
    for row in rows:
        title_el = row.select_one("td:nth-of-type(3)")
        date_el = row.select_one("td:nth-of-type(5)")
        link_el = row.select_one("a[onclick^='article.view']")
        file_el = row.select_one("a[href^='/attach/down']")

        if not (title_el and date_el and link_el):
            continue

        title = title_el.get_text(strip=True)
        date = date_el.get_text(strip=True)
        file_url = file_el["href"] if file_el else None
        onclick = link_el.get("onclick", "")

        match = re.search(r"\d+", onclick)
        article_id = match.group(0) if match else None

        if article_id:
            results.append({
                "title": title,
                "date": date,
                "fileUrl": file_url,
                "articleId": article_id,
            })

    return results