o
    -h{                     @   s   d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	m
Z
mZmZ ddlmZ eejdkr9ejd ded	ee fd
dZdee d	e jfddZdedeeee	f  d	dfddZejdd fdee d	dfddZedkr|e  dS dS )    N)defaultdictdeque)chain)AnyDefaultDictDictList   )PDFz--helpp_strreturnc                 C   s8   d| v rt t| d\}}tt||d S t| gS )N-r	   )mapintsplitlistrange)r   startend r   G/var/www/html/govbot/env/lib/python3.10/site-packages/pdfplumber/cli.pyparse_page_spec   s   
r   args_rawc                 C   s   t d}|jddt dtjjd | }|jdddd	 |jd
ddd	 |jdg ddd |jddd |jdddd |jdddd |jdtj	d |jdt
d |jddtd |jdt
dd || }|jd urytt|j |_|S ) N
pdfplumberinfile?rb)nargstypedefaultz--structurezoWrite the structure tree as JSON.  All other arguments except --pages, --laparams, and --indent will be ignored
store_true)helpactionz--structure-textzWrite the structure tree as JSON including text contents.  All other arguments except --pages, --laparams, and --indent will be ignoredz--format)csvjsontextr#   )choicesr   z--types+)r   z--include-attrsz1Include *only* these object attributes in output.)r   r!   z--exclude-attrsz,Exclude these object attributes from output.z
--laparams)r   z--precisionz--pages)r   r   z--indentz&Indent level for JSON pretty-printing.)r   r!   )argparseArgumentParseradd_argumentFileTypesysstdinbufferadd_mutually_exclusive_groupr$   loadsr   r   
parse_argspagesr   r   )r   parsergroupargsr   r   r   r1      sJ   


r1   pdfdatac           	         s   t dd }| jD ]!}||j  |jD ]}|d}|d u rq |  |d 7  < qq	t|}|rc| }d|v r@||d  |d}|d u rJq/||  d|v r_ fdd	|d D |d< |s1d S d S )
Nc                   S   s   t tS )N)r   strr   r   r   r   <lambda>L   s    z#add_text_to_mcids.<locals>.<lambda>mcidr%   childrenpage_numbermcidsc                    s   g | ]} | qS r   r   ).0r:   text_contentsr   r   
<listcomp>^   s    z%add_text_to_mcids.<locals>.<listcomp>)r   r2   r<   charsgetr   popleftextend)	r6   r7   page_contentspagecr:   delpagenor   r?   r   add_text_to_mcidsK   s*   




rL   c              	   C   sD  t | }tj|j|j|jd}|jrttj	|j
|jd nY|jr6|j
}t|| ttj	||jdd nJ|jdkrK|jtj|j|j|j|jd n=|jdkr_|jD ]
}t|jdd	 qSn1|jtj|j|j|j|j|jd
 W d    d S W d    d S W d    d S W d    d S W d    d S 1 sw   Y  d S )N)r2   laparams)indentF)rN   ensure_asciir#   )	precisioninclude_attrsexclude_attrsr%   T)layout)rP   rQ   rR   rN   )r1   r
   openr   r2   rM   	structureprintr$   dumpsstructure_treerN   structure_textrL   formatto_csvr,   stdouttypesrP   rQ   rR   extract_textto_json)r   r5   r6   treerG   r   r   r   maina   sJ   



"ra   __main__)r(   r$   r,   collectionsr   r   	itertoolsr   typingr   r   r   r   r6   r
   lenargvappendr8   r   r   	Namespacer1   rL   ra   __name__r   r   r   r   <module>   s    "4$ 
