o
    `Ph+-                     @   sn  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 zd dl
mZ W n ey9   dZY nw d dlmZ d dlmZmZmZ d dlmZmZmZmZmZmZ d dlmZmZmZmZmZm Z m!Z!m"Z"m#Z# ede$Z%e& Z'e	d	d
dZ(e')de( e')de( edureddg ddZ*e*e(_+dd Z,dd Z-dd Z.dd Z/de0de1fddZ2de0de1fdd Z3d!e0de0fd"d#Z4d!e0fd$d%Z5d!e0fd&d'Z6d!e0d(e7d)e8fd*d+Z9d!e0d,e1d-e0dB de0fd.d/Z:e%;d0d1d2 Z<e%;d3d4d5 Z=e%;d6d7d8 Z>e%;d9d:d; Z?e%j;d<d=d>gd?d@dA Z@e%j;dBd=d>gd?dCdD ZAe%BdEdFdG ZCdS )H    N)HTTPAdapter)Retry)BeautifulSoup)urlparseurljoinquote)	BlueprintrequestResponseredirectsend_from_directoryjsonify)	RENDERED_DIR	CACHE_DIRALLOWED_SCHEMESALLOWED_DOMAINSREQUEST_TIMEOUTRENDERED_MAX_FILESRENDERED_TTL_SECSPROXY_CACHE_MAX_FILESPROXY_CACHE_TTL_SECSproxy       )pool_connectionspool_maxsizezhttp://zhttps://   g?)i    i  i  i  )totalbackoff_factorstatus_forcelistc                 C   sX   t | }|j|jdd }|jr |d|jdddd 7 }tdd|}| dS )	N/_=-&z	[^\w\-_.] .html)r   netlocpathreplacequeryresub)url
parsed_urlfilename r1   +/var/www/html/proxy/modules/proxy_routes.pygenerate_filename_from_url)   s   
r3   c                 C   sr   t jtdd tt jt|ddd}||  W d    n1 s#w   Y  zt  W d S  ty8   Y d S w )NTexist_okwutf-8encoding)	osmakedirsr   openr)   joinwritecleanup_rendered_dir	Exception)html_contentr0   fr1   r1   r2   save_rendered_html1   s   rC   c               	   C   s   t jtsd S dd t tD } t| tkrd S | jdd dd | td  D ]\}}zt | W q) t	y=   Y q)w d S )Nc              	   S   s:   g | ]}| d rtjt|tjtjt|fqS )r'   )endswithr:   r)   r=   r   getmtime.0rB   r1   r1   r2   
<listcomp>@   s    "z(cleanup_rendered_dir.<locals>.<listcomp>c                 S      | d S N   r1   xr1   r1   r2   <lambda>G       z&cleanup_rendered_dir.<locals>.<lambda>Tkeyreverse)
r:   r)   isdirr   listdirlenr   sortremoveOSErrorfilesr)   r"   r1   r1   r2   r?   <   s   r?   c               	   C   s   t jtsd S dd t tD } t| td krd S | jdd dd | td d  D ]\}}zt | W q- t	yA   Y q-w d S )Nc              	   S   sD   g | ]}| d s| drtjt|tjtjt|fqS ).bin.json)rD   r:   r)   r=   r   rE   rF   r1   r1   r2   rH   Q   s    "z%cleanup_cache_dir.<locals>.<listcomp>r   c                 S   rI   rJ   r1   rL   r1   r1   r2   rN   X   rO   z#cleanup_cache_dir.<locals>.<lambda>TrP   )
r:   r)   rS   r   rT   rU   r   rV   rW   rX   rY   r1   r1   r2   cleanup_cache_dirN   s   r]   hostnamereturnc              	   C   s   zt | d }W n t jy   Y dS w |D ]6}|d d }zt|}W n ty0   Y  dS w |js@|js@|js@|j	s@|j
rC dS t|dkrL dS qdS )NT   r   z169.254.169.254F)socketgetaddrinfogaierror	ipaddress
ip_address
ValueError
is_privateis_loopbackis_link_localis_reservedis_multicaststr)r^   infosinfoipip_objr1   r1   r2   is_private_host_   s6   rq   r(   c                    s0   t sdS | dd   t fddt D S )NT:r   c                 3   s&    | ]} |kp  d | V  qdS ).N)rD   )rG   dhostr1   r2   	<genexpr>|   s   $ z$is_domain_allowed.<locals>.<genexpr>)r   splitlowerany)r(   r1   ru   r2   is_domain_allowedx   s   r{   r.   c                 C   s   t | d S )Nr7   )hashlibsha256encode	hexdigest)r.   r1   r1   r2   hashed_name~   s   r   c                 C   s>   t | }tjtdd tjt| dtjt| dfS )NTr4   r[   r\   )r   r:   r;   r   r)   r=   )r.   hr1   r1   r2   proxy_cache_paths   s
   r   c                 C   s   t | \}}tj|rtj|sd S t tj| tkr"d S z:t|d}| }W d    n1 s7w   Y  t|ddd}t	
|}W d    n1 sSw   Y  ||fW S  tyf   Y d S w )Nrbrr7   r8   )r   r:   r)   existstimerE   r   r<   readjsonloadr@   )r.   bin_path	meta_pathbfcontentmfmetar1   r1   r2   load_proxy_cache   s    

r   r   r   c                 C   s   t | \}}z=t|d}|| W d    n1 sw   Y  t|ddd}t|| W d    n1 s9w   Y  t  W d S  tyM   Y d S w )Nwbr6   r7   r8   )r   r<   r>   r   dumpr]   r@   )r.   r   r   r   r   r   r   r1   r1   r2   save_proxy_cache   s   r   show_imagesexception_modec                 C   sH   t | }tj|\}}d}|s|d7 }|dkr|d7 }| | | S )Nr&   _noimg	recommend_rec)r3   r:   r)   splitext)r.   r   r   basenameextsuffixr1   r1   r2   generate_render_filename   s   r   z/fetchc               
   C   s  t jd} t jddk}t jd}| stddidfS t| }|jtvr/tddidfS t|jr;t	|j
p9d	rCtdd
idfS z
t| ||}tjt|}tj|rot tj| tk rotd| | dW S ddd}tj| |tdd}|  |j}t|jd}	|s|	dD ]}
|
  q|	dD ],}|dd	}|jrd|jv sd|v sd|v sd|v sd|v sd|v sd|v r|  q|	g dD ]F}|jdv rdnd}||r|| }t||}|jd kr|d!krd!|v rd|vr|d"7 }d#t | ||< qd$t | ||< qz|	j!r+|	j"d%|d&}|	j!#d'| W n
 t$y6   Y nw t|||}t%t&|	| td| |dW S  t'j(j)yp } ztdd(t&| id)fW  Y d }~S d }~ww )*Nr.   noimg1r   errorNo URL provided  zUnsupported URL schemer&   zURL not allowedz
/rendered/)rendered_pathcurrent_urlMozilla/5.0gzip, deflate
User-AgentzAccept-EncodingT)headerstimeoutallow_redirectszhtml.parserimgscriptsrczdocument.domaincross_domaincrossDomainStoragezgtag/jsz	ba.min.jssentry	analytics)alinkr   r   )r   r   hrefr   r   z&exception_mode=recommendz/fetch?url=z/fetch_proxy?url=r   )r   r   zError fetching URL: r   )*r	   argsgetr   r   schemer   r{   r(   rq   r^   r   r:   r)   r=   r   r   r   rE   r   sessionr   raise_for_statusr.   r   r   find_all	decomposestringr   has_attrr   r   headnew_taginsertr@   rC   rl   requests
exceptionsRequestException)
target_urlr   r   parsedcandidate_filenamecandidate_pathr   response	final_urlsoupr   tagr   attrorig_urlnew_urlbase_tagr0   er1   r1   r2   fetch   s   
$





&r   z/fetch_proxyc            	   
      s  t jd} | s
dS zfddd}t| }|r4|\}}d|ddfg}|dd	t f t|d
|W S tj| |dtd}g d  fdd|j	j
 D }|j
dd}t| |jd|i |dd	t f t|j|j|W S  tjjy } zd| dfW  Y d }~S d }~ww )Nr.   )r   r   r   r   r   zContent-Typezapplication/octet-streamzCache-Controlzpublic, max-age=   T)r   streamr   zcontent-encodingzcontent-lengthztransfer-encoding
connectionc                    $   g | ]\}}|   vr||fqS r1   ry   rG   r   valueexcluded_headersr1   r2   rH        $ zfetch_proxy.<locals>.<listcomp>zError fetching resource: r   )r	   r   r   r   appendr   r
   r   r   rawr   itemsr   r   status_coder   r   r   )	r.   r   cachedr   r   resp_headersrespctr   r1   r   r2   fetch_proxy  s0   r   z/rendered/<path:filename>c                 C   s
   t t| S )N)r   r   )r0   r1   r1   r2   serve_rendered$  s   
r   z/favicon.icoc                   C      dS )N)r&      r1   r1   r1   r1   r2   favicon(     r   z/board/comment/GETPOST)methodsc                   C   s   t dS )Nr&   )proxy_comment_subr1   r1   r1   r2   proxy_comment_root-  s   r   z/board/comment/<path:subpath>c              
      s   d|  }z8ddi}t jdkrtj||t jtd}n
tj||t jtd}g d  fdd	|jj	
 D }t|j|j|W S  tjjyX } zd
| dfW  Y d }~S d }~ww )Nz(https://gall.dcinside.com/board/comment/r   r   r   )r   datar   )r   paramsr   r   c                    r   r1   r   r   r   r1   r2   rH   A  r   z%proxy_comment_sub.<locals>.<listcomp>z Error proxying comment request: r   )r	   methodr   postformr   r   r   r   r   r   r
   r   r   r   r   r   )subpathr   r   r   r   r1   r   r2   r   1  s   

r     c                 C   r   )N)z404 Not Foundr   r1   )r   r1   r1   r2   page_not_foundH  r   r   )Dr:   r,   r   r   r|   ra   rd   r   requests.adaptersr   urllib3.util.retryr   r@   bs4r   urllib.parser   r   r   flaskr   r	   r
   r   r   r   configr   r   r   r   r   r   r   r   r   __name__proxy_bpSessionr   adaptermountretriesmax_retriesr3   rC   r?   r]   rl   boolrq   r{   r   r   r   bytesdictr   r   router   r   r   r   r   r   app_errorhandlerr   r1   r1   r1   r2   <module>   sf     ,


S
!



