o
    ht                      @   s   d dl Z d dlZd dlmZmZmZmZmZmZm	Z	m
Z
mZ d dlmZ d dlmZmZmZ d dlmZmZ d dlmZ d dlmZmZmZmZ d dlmZ d d	lmZm Z  e!e"Z#ed
Z$edZ%G dd dZ&dS )    N)	AnyBinaryIO	ContainerDictIteratorListOptionalSetTuple)settings)PDFDocumentPDFNoPageLabelsPDFTextExtractionNotAllowed)PDFObjectNotFoundPDFValueError)	PDFParser)
dict_value	int_value
list_valueresolve1)LIT)Rect
parse_rectPagePagesc                   @   s   e Zd ZdZdedededee ddf
dd	Zdefd
dZ	h dZ
ededed  fddZe					d$dedeee  dededededed  fddZdedefddZdededefd d!Zdedee fd"d#ZdS )%PDFPageaz  An object that holds the information about a page.

    A PDFPage object is merely a convenience class that has a set
    of keys and values, which describe the properties of a page
    and point to its contents.

    Attributes
    ----------
      doc: a PDFDocument object.
      pageid: any Python object that can uniquely identify the page.
      attrs: a dictionary of page attributes.
      contents: a list of PDFStream objects that represents the page content.
      lastmod: the last modified time of the page.
      resources: a dictionary of resources used by the page.
      mediabox: the physical size of the page.
      cropbox: the crop rectangle of the page.
      rotate: the page rotation (in degree).
      annots: the page annotations.
      beads: a chain that represents natural reading order.
      label: the page's label (typically, the logical page number).

    docpageidattrslabelreturnNc                 C   s   || _ || _t|| _|| _t| jd| _t| jdt | _	| 
| jd| _| | jd| j| _| | jd| _t| jddd d | _| jd	| _| jd
| _dS )zInitialize a page object.

        doc: a PDFDocument object.
        pageid: any Python object that can uniquely identify the page.
        attrs: a dictionary of page attributes.
        label: page label string.
        LastModified	ResourcesMediaBoxCropBoxContentsRotater   ih  AnnotsBN)r   r   r   r   r   r   getlastmoddict	resources_parse_mediaboxmediabox_parse_cropboxcropbox_parse_contentscontentsr   rotateannotsbeads)selfr   r   r   r    r7   I/var/www/html/govbot/env/lib/python3.10/site-packages/pdfminer/pdfpage.py__init__0   s   
zPDFPage.__init__c                 C   s   d| j d| jdS )Nz<PDFPage: Resources=z, MediaBox=>)r,   r.   )r6   r7   r7   r8   __repr__O   s   zPDFPage.__repr__>   r&   r$   r#   r"   documentc           	      #   s6   	 ddt dttt f dttt   dttttt tt t f f f  f fddz }W n t	y>   t
d }Y nw d}djv rbjd j}|D ]\}} ||t|V  d	}qQ|sjD ]1}| D ](}z|}t|tr|d
tu r ||t|V  W qm ty   Y qmw qgd S d S )Nobjparentvisitedr    c           	      3   s   t | tr| }t| }n	| j}t|  }|d u r"t }||v r(d S || | D ]\}}| j	v rB||vrB|||< q1|
d}|d u rTtjsT|
d}|tu rxd|v rxtd|d  t|d D ]}|||E d H  qjd S |tu rtd| ||fV  d S d S )NTypetypeKidszPages: Kids=%rzPage: %r)
isinstanceintr   getobjcopyobjidsetadditemsINHERITABLE_ATTRSr)   r   STRICTLITERAL_PAGESlogdebugr   LITERAL_PAGE)	r=   r>   r?   	object_idobject_propertieskvobject_typechildclsdepth_first_searchr<   r7   r8   rY   V   s6   



z0PDFPage.create_pages.<locals>.depth_first_searchFr   Tr@   N)r   r   strr   r	   r   r
   rD   get_page_labelsr   	itertoolsrepeatcatalognextxrefs
get_objidsrE   rC   r+   r)   rP   r   )	rX   r<   page_labelspagesobjectsrG   treexrefr=   r7   rW   r8   create_pagesT   sH   

&


zPDFPage.create_pagesr    TFfppagenosmaxpagespasswordcachingcheck_extractablec                 c   s    t |}t|||d}|js"|rd| }	t|	d| }
t|
 t| |D ]\}}|r4||vr4q)|V  |rB||d krB d S q)d S )N)rm   rn   z"Text extraction is not allowed: %rzThe PDF %r contains a metadata field indicating that it should not allow text extraction. Ignoring this field and proceeding. Use the check_extractable if you want to raise an error in this case   )r   r   is_extractabler   rN   warning	enumeraterh   )rX   rj   rk   rl   rm   rn   ro   parserr   	error_msgwarning_msgpagenopager7   r7   r8   	get_pages   s(   
zPDFPage.get_pagesvaluec                 C   sT   d}|d u rt d |S ztdd t|D W S  ty)   t d | Y S w )N)        r{   g      @g     @zHMediaBox missing from /Page (and not inherited), defaulting to US Letterc                 s       | ]}t |V  qd S rZ   r   .0valr7   r7   r8   	<genexpr>       z*PDFPage._parse_mediabox.<locals>.<genexpr>z2Invalid MediaBox in /Page, defaulting to US Letter)rN   rr   r   r   r   )r6   rz   	us_letterr7   r7   r8   r-      s   
zPDFPage._parse_mediaboxr.   c                 C   sF   |d u r|S zt dd t|D W S  ty"   td | Y S w )Nc                 s   r|   rZ   r}   r~   r7   r7   r8   r      r   z)PDFPage._parse_cropbox.<locals>.<genexpr>z0Invalid CropBox in /Page, defaulting to MediaBox)r   r   r   rN   rr   )r6   rz   r.   r7   r7   r8   r/      s   
zPDFPage._parse_cropboxc                 C   s(   g }|d urt |}t|ts|g}|S rZ   )r   rC   list)r6   rz   r2   r7   r7   r8   r1      s   
zPDFPage._parse_contents)Nr   ri   TF)__name__
__module____qualname____doc__r   objectr   r[   r9   r;   rK   classmethodr   rh   r   r   rD   boolry   r   r   r-   r/   r   r1   r7   r7   r7   r8   r      sR    
=
$r   )'r]   loggingtypingr   r   r   r   r   r   r   r	   r
   pdfminerr   pdfminer.pdfdocumentr   r   r   pdfminer.pdfexceptionsr   r   pdfminer.pdfparserr   pdfminer.pdftypesr   r   r   r   pdfminer.psparserr   pdfminer.utilsr   r   	getLoggerr   rN   rP   rM   r   r7   r7   r7   r8   <module>   s    ,
