import os
import re
import time
import json
import hashlib
import socket
import ipaddress
import requests
from requests.adapters import HTTPAdapter
try:
    # Retry is available via urllib3 in requests
    from urllib3.util.retry import Retry  # type: ignore
except Exception:  # pragma: no cover
    Retry = None
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin, quote
from flask import Blueprint, request, Response, redirect, send_from_directory, jsonify
from config import (
    RENDERED_DIR,
    CACHE_DIR,
    ALLOWED_SCHEMES,
    ALLOWED_DOMAINS,
    REQUEST_TIMEOUT,
    RENDERED_MAX_FILES,
    RENDERED_TTL_SECS,
    PROXY_CACHE_MAX_FILES,
    PROXY_CACHE_TTL_SECS,
)

proxy_bp = Blueprint("proxy", __name__)

# Reuse HTTP session with pooling
session = requests.Session()
adapter = HTTPAdapter(pool_connections=16, pool_maxsize=32)
session.mount("http://", adapter)
session.mount("https://", adapter)
if Retry is not None:
    retries = Retry(total=2, backoff_factor=0.2, status_forcelist=[429, 500, 502, 503, 504])
    adapter.max_retries = retries

def generate_filename_from_url(url):
    parsed_url = urlparse(url)
    filename = parsed_url.netloc + parsed_url.path.replace("/", "_")
    if parsed_url.query:
        filename += "_" + parsed_url.query.replace("=", "-").replace("&", "_")
    filename = re.sub(r"[^\w\-_.]", "", filename)
    return f"{filename}.html"

def save_rendered_html(html_content, filename):
    os.makedirs(RENDERED_DIR, exist_ok=True)
    with open(os.path.join(RENDERED_DIR, filename), "w", encoding="utf-8") as f:
        f.write(html_content)

    # best-effort cleanup
    try:
        cleanup_rendered_dir()
    except Exception:
        pass

def cleanup_rendered_dir():
    # Keep only recent files up to RENDERED_MAX_FILES
    if not os.path.isdir(RENDERED_DIR):
        return
    files = [
        (os.path.join(RENDERED_DIR, f), os.path.getmtime(os.path.join(RENDERED_DIR, f)))
        for f in os.listdir(RENDERED_DIR)
        if f.endswith(".html")
    ]
    if len(files) <= RENDERED_MAX_FILES:
        return
    files.sort(key=lambda x: x[1], reverse=True)
    for path, _ in files[RENDERED_MAX_FILES:]:
        try:
            os.remove(path)
        except OSError:
            pass

def cleanup_cache_dir():
    if not os.path.isdir(CACHE_DIR):
        return
    files = [
        (os.path.join(CACHE_DIR, f), os.path.getmtime(os.path.join(CACHE_DIR, f)))
        for f in os.listdir(CACHE_DIR)
        if f.endswith('.bin') or f.endswith('.json')
    ]
    if len(files) <= PROXY_CACHE_MAX_FILES * 2:
        return
    files.sort(key=lambda x: x[1], reverse=True)
    for path, _ in files[PROXY_CACHE_MAX_FILES * 2:]:
        try:
            os.remove(path)
        except OSError:
            pass

def is_private_host(hostname: str) -> bool:
    # Disallow local/private/reserved addresses to mitigate SSRF
    try:
        infos = socket.getaddrinfo(hostname, None)
    except socket.gaierror:
        return True  # unknown host: treat as unsafe
    for info in infos:
        ip = info[4][0]
        try:
            ip_obj = ipaddress.ip_address(ip)
        except ValueError:
            return True
        if (
            ip_obj.is_private
            or ip_obj.is_loopback
            or ip_obj.is_link_local
            or ip_obj.is_reserved
            or ip_obj.is_multicast
        ):
            return True
        # Explicitly block well-known metadata endpoint
        if str(ip_obj) == "169.254.169.254":
            return True
    return False

def is_domain_allowed(netloc: str) -> bool:
    if not ALLOWED_DOMAINS:
        return True
    host = netloc.split(":")[0].lower()
    return any(host == d or host.endswith("." + d) for d in ALLOWED_DOMAINS)

def hashed_name(url: str) -> str:
    return hashlib.sha256(url.encode("utf-8")).hexdigest()

def proxy_cache_paths(url: str):
    h = hashed_name(url)
    os.makedirs(CACHE_DIR, exist_ok=True)
    return (
        os.path.join(CACHE_DIR, f"{h}.bin"),
        os.path.join(CACHE_DIR, f"{h}.json"),
    )

def load_proxy_cache(url: str):
    bin_path, meta_path = proxy_cache_paths(url)
    if not (os.path.exists(bin_path) and os.path.exists(meta_path)):
        return None
    # TTL check
    if time.time() - os.path.getmtime(bin_path) > PROXY_CACHE_TTL_SECS:
        return None
    try:
        with open(bin_path, 'rb') as bf:
            content = bf.read()
        with open(meta_path, 'r', encoding='utf-8') as mf:
            meta = json.load(mf)
        return content, meta
    except Exception:
        return None

def save_proxy_cache(url: str, content: bytes, meta: dict):
    bin_path, meta_path = proxy_cache_paths(url)
    try:
        with open(bin_path, 'wb') as bf:
            bf.write(content)
        with open(meta_path, 'w', encoding='utf-8') as mf:
            json.dump(meta, mf)
        cleanup_cache_dir()
    except Exception:
        pass

def generate_render_filename(url: str, show_images: bool, exception_mode: str | None) -> str:
    base = generate_filename_from_url(url)
    name, ext = os.path.splitext(base)
    suffix = ""
    if not show_images:
        suffix += "_noimg"
    if exception_mode == "recommend":
        suffix += "_rec"
    return f"{name}{suffix}{ext}"

@proxy_bp.route("/fetch")
def fetch():
    target_url = request.args.get("url")
    show_images = request.args.get("noimg") != "1"
    exception_mode = request.args.get("exception_mode")

    if not target_url:
        return jsonify({"error": "No URL provided"}), 400

    parsed = urlparse(target_url)
    if parsed.scheme not in ALLOWED_SCHEMES:
        return jsonify({"error": "Unsupported URL scheme"}), 400
    if not is_domain_allowed(parsed.netloc) or is_private_host(parsed.hostname or ""):
        return jsonify({"error": "URL not allowed"}), 400

    try:
        # reuse recent rendered file if exists and fresh
        candidate_filename = generate_render_filename(target_url, show_images, exception_mode)
        candidate_path = os.path.join(RENDERED_DIR, candidate_filename)
        if os.path.exists(candidate_path) and (time.time() - os.path.getmtime(candidate_path) < RENDERED_TTL_SECS):
            return jsonify({
                "rendered_path": f"/rendered/{candidate_filename}",
                "current_url": target_url,
            })

        headers = {"User-Agent": "Mozilla/5.0", "Accept-Encoding": "gzip, deflate"}
        response = session.get(target_url, headers=headers, timeout=REQUEST_TIMEOUT, allow_redirects=True)
        response.raise_for_status()

        final_url = response.url
        soup = BeautifulSoup(response.content, "html.parser")

        # 이미지 제거
        if not show_images:
            for img in soup.find_all("img"):
                img.decompose()

        # 스크립트 제거 (document.domain / crossDomainStorage / 트래커 등)
        for tag in soup.find_all("script"):
            src = tag.get("src", "")
            if (
                (tag.string and "document.domain" in tag.string)
                or "cross_domain" in src
                or "crossDomainStorage" in src
                or "gtag/js" in src
                or "ba.min.js" in src
                or "sentry" in src
                or "analytics" in src
            ):
                tag.decompose()

        # 링크 및 리소스 경로 재작성
        for tag in soup.find_all(["a", "link", "script", "img"]):
            attr = "href" if tag.name in ["a", "link"] else "src"
            if tag.has_attr(attr):
                orig_url = tag[attr]
                new_url = urljoin(final_url, orig_url)
                if tag.name == "a":
                    if exception_mode == "recommend" and "recommend" in new_url and "exception_mode" not in new_url:
                        new_url += "&exception_mode=recommend"
                    tag[attr] = f"/fetch?url={quote(new_url)}"
                else:
                    tag[attr] = f"/fetch_proxy?url={quote(new_url)}"

        # base 태그 추가
        try:
            if soup.head:
                base_tag = soup.new_tag("base", href=final_url)
                soup.head.insert(0, base_tag)
        except Exception:
            pass

        filename = generate_render_filename(final_url, show_images, exception_mode)
        # Using str(soup) is faster than prettify and preserves original formatting better
        save_rendered_html(str(soup), filename)

        return jsonify({
            "rendered_path": f"/rendered/{filename}",
            "current_url": final_url,
        })

    except requests.exceptions.RequestException as e:
        return jsonify({"error": f"Error fetching URL: {str(e)}"}), 500

@proxy_bp.route("/fetch_proxy")
def fetch_proxy():
    url = request.args.get("url")
    if not url:
        return "No URL provided", 400

    try:
        headers = {
            "User-Agent": "Mozilla/5.0",
            "Accept-Encoding": "gzip, deflate",
        }

        # cached resource?
        cached = load_proxy_cache(url)
        if cached:
            content, meta = cached
            resp_headers = [("Content-Type", meta.get("Content-Type", "application/octet-stream"))]
            # encourage browser caching too
            resp_headers.append(("Cache-Control", f"public, max-age={PROXY_CACHE_TTL_SECS}"))
            return Response(content, 200, resp_headers)

        resp = session.get(url, headers=headers, stream=True, timeout=REQUEST_TIMEOUT)
        excluded_headers = [
            "content-encoding", "content-length", "transfer-encoding", "connection"
        ]
        headers = [(name, value) for name, value in resp.raw.headers.items() if name.lower() not in excluded_headers]
        # save minimal meta and content for caching
        ct = resp.headers.get("Content-Type", "application/octet-stream")
        save_proxy_cache(url, resp.content, {"Content-Type": ct})
        headers.append(("Cache-Control", f"public, max-age={PROXY_CACHE_TTL_SECS}"))
        return Response(resp.content, resp.status_code, headers)
    except requests.exceptions.RequestException as e:
        return f"Error fetching resource: {e}", 500

@proxy_bp.route("/rendered/<path:filename>")
def serve_rendered(filename):
    return send_from_directory(RENDERED_DIR, filename)

@proxy_bp.route("/favicon.ico")
def favicon():
    return "", 204

# 댓글 POST 요청 대응
@proxy_bp.route("/board/comment/", methods=["GET", "POST"])
def proxy_comment_root():
    return proxy_comment_sub("")

@proxy_bp.route("/board/comment/<path:subpath>", methods=["GET", "POST"])
def proxy_comment_sub(subpath):
    target_url = f"https://gall.dcinside.com/board/comment/{subpath}"
    try:
        headers = {
            "User-Agent": "Mozilla/5.0",
        }

        if request.method == "POST":
            resp = session.post(target_url, headers=headers, data=request.form, timeout=REQUEST_TIMEOUT)
        else:
            resp = session.get(target_url, headers=headers, params=request.args, timeout=REQUEST_TIMEOUT)

        excluded_headers = [
            "content-encoding", "content-length", "transfer-encoding", "connection"
        ]
        headers = [(name, value) for name, value in resp.raw.headers.items() if name.lower() not in excluded_headers]
        return Response(resp.content, resp.status_code, headers)

    except requests.exceptions.RequestException as e:
        return f"Error proxying comment request: {e}", 500

# 404 핸들링
@proxy_bp.app_errorhandler(404)
def page_not_found(e):
    return "404 Not Found", 404
