o
    h;                     @   s  d Z ddlZddlZddlZddlZddlZddlZddlZddlm	Z	m
Z
mZmZmZmZmZmZmZmZmZmZmZ ddlmZ ddlmZmZ ddlmZmZ ddlmZm Z m!Z!m"Z"m#Z# ddl$m%Z%m&Z& e'e(Z)G d	d
 d
eZ*G dd dZ+G dd de+Z,G dd de+Z-G dd de-Z.G dd de+Z/G dd de/Z0G dd de,Z1G dd de/Z2G dd de,Z3G dd de/Z4G dd  d Z5G d!d" d"e"e  Z6dS )#zAdobe character mapping (CMap) support.

CMaps provide the mapping between character codes and Unicode
code-points to character ids (CIDs).

More information is available on:

  https://github.com/adobe-type-tools/cmap-resources

    N)AnyBinaryIODictIterableIteratorListMutableMappingOptionalSetTextIOTupleUnioncast)name2unicode)PDFExceptionPDFTypeError)PSEOFPSSyntaxError)KWD	PSKeyword	PSLiteralPSStackParserliteral_name)choplistnunpackc                   @      e Zd ZdS )	CMapErrorN__name__
__module____qualname__ r!   r!   H/var/www/html/govbot/env/lib/python3.10/site-packages/pdfminer/cmapdb.pyr   ,       r   c                   @   s   e Zd ZdZdeddfddZdefddZd	ed
eddfddZ	dede
ddfddZde
deeee
f ddfddZdddZdedee
 fddZdS )CMapBaser   kwargsreturnNc                 K   s   |  | _d S N)copyattrsselfr%   r!   r!   r"   __init__3      zCMapBase.__init__c                 C   s   | j dddkS )NWModer   r)   getr+   r!   r!   r"   is_vertical6   s   zCMapBase.is_verticalkvc                 C   s   || j |< d S r'   )r)   )r+   r3   r4   r!   r!   r"   set_attr9   r-   zCMapBase.set_attrcodecidc                 C      d S r'   r!   )r+   r6   r7   r!   r!   r"   add_code2cid<      zCMapBase.add_code2cidc                 C   r8   r'   r!   )r+   r7   r6   r!   r!   r"   add_cid2unichr?   r:   zCMapBase.add_cid2unichrcmapc                 C   r8   r'   r!   r+   r<   r!   r!   r"   use_cmapB   r:   zCMapBase.use_cmapc                 C   s   t r'   )NotImplementedError)r+   r6   r!   r!   r"   decodeE   r:   zCMapBase.decode)r<   r$   r&   N)r   r   r    debugobjectr,   boolr2   strr5   intr9   r   r   bytesr;   r>   r   r@   r!   r!   r!   r"   r$   0   s     
r$   c                	   @   s   e Zd Zdeeef ddfddZdefddZdeddfd	d
Z	de
dee fddZejddfdedeeeef  deedf ddfddZdS )CMapr%   r&   Nc                 K      t j| fi | i | _d S r'   )r$   r,   code2cidr*   r!   r!   r"   r,   J      
zCMap.__init__c                 C      d| j d S )Nz
<CMap: %s>CMapNamer/   r1   r!   r!   r"   __repr__N      zCMap.__repr__r<   c                    sV   t |tsJ tt|dtttf dtttf dd f fdd  | j|j d S )Ndstsrcr&   c                    s@   |  D ]\}}t|tri }|| |<  || q|| |< qd S r'   )items
isinstancedict)rO   rP   r3   r4   dr(   r!   r"   r(   T   s   

zCMap.use_cmap.<locals>.copy)rR   rG   rD   typer   rE   rB   rI   r=   r!   rU   r"   r>   Q   s   *	zCMap.use_cmapr6   c                 c   sj    t d| | | j}t|D ]#}||v r/|| }t|tr%|V  | j}qttttf |}q| j}qd S )Nzdecode: %r, %r)	logrA   rI   iterrR   rE   r   r   rB   )r+   r6   rT   ixr!   r!   r"   r@   _   s   
zCMap.decoder!   outrI   .c                 C   sr   |d u r	| j }d}t| D ]'\}}||f }t|tr'|d||f  q| j|tttt	f ||d qd S )Nr!   zcode %r = cid %d
)r[   rI   r6   )
rI   sortedrQ   rR   rE   writedumpr   r   rB   )r+   r[   rI   r6   r3   r4   cr!   r!   r"   r^   m   s   

 z	CMap.dump)r   r   r    r   rD   rE   r,   rM   r$   r>   rF   r   r@   sysstdoutr   r	   r   rB   r   r^   r!   r!   r!   r"   rG   I   s"    
rG   c                   @   &   e Zd Zdedeedf fddZdS )IdentityCMapr6   r&   .c                 C   s$   t |d }|rtd| |S dS )N   z>%dHr!   lenstructunpackr+   r6   nr!   r!   r"   r@      s   zIdentityCMap.decodeNr   r   r    rF   r   rE   r@   r!   r!   r!   r"   rc   ~       rc   c                   @   rb   )IdentityCMapByter6   r&   .c                 C   s    t |}|rtd| |S dS )Nz>%dBr!   re   ri   r!   r!   r"   r@      s   zIdentityCMapByte.decodeNrk   r!   r!   r!   r"   rm      rl   rm   c                   @   s^   e Zd Zdeeef ddfddZdefddZdedefd	d
Ze	j
fdeddfddZdS )
UnicodeMapr%   r&   Nc                 K   rH   r'   )r$   r,   
cid2unichrr*   r!   r!   r"   r,      rJ   zUnicodeMap.__init__c                 C   rK   )Nz<UnicodeMap: %s>rL   r/   r1   r!   r!   r"   rM      rN   zUnicodeMap.__repr__r7   c                 C   s   t d| | | j| S )Nget_unichr: %r, %r)rW   rA   ro   r+   r7   r!   r!   r"   
get_unichr   s   
zUnicodeMap.get_unichrr[   c                 C   s.   t | j D ]\}}|d||f  qd S )Nzcid %d = unicode %r
)r\   ro   rQ   r]   )r+   r[   r3   r4   r!   r!   r"   r^      s   zUnicodeMap.dump)r   r   r    r   rD   rE   r,   rM   rr   r`   ra   r   r^   r!   r!   r!   r"   rn      s
    rn   c                   @   s   e Zd ZdedefddZdS )IdentityUnicodeMapr7   r&   c                 C   s   t d| | t|S )z+Interpret character id as unicode codepointrp   )rW   rA   chrrq   r!   r!   r"   rr      s   zIdentityUnicodeMap.get_unichrN)r   r   r    rE   rD   rr   r!   r!   r!   r"   rs      s    rs   c                   @   s"   e Zd ZdededdfddZdS )FileCMapr6   r7   r&   Nc                 C   s   t |tr
t |tsJ tt|t|f| j}|d d D ]}t|}||v r5ttttf || }qi }|||< |}qt|d }|||< d S )N)	rR   rD   rE   rV   rI   ordr   r   rB   )r+   r6   r7   rT   r_   citr!   r!   r"   r9      s   zFileCMap.add_code2cid)r   r   r    rD   rE   r9   r!   r!   r!   r"   ru      s    ru   c                   @   s,   e Zd Zdedeeeef ddfddZdS )FileUnicodeMapr7   r6   r&   Nc                 C   s   t |tsJ tt|t |tr t |jtsJ t|j}nt |tr,|dd}nt |tr6t	|}nt
||dkrH| j|dkrHd S || j|< d S )NzUTF-16BEignore     )rR   rE   rD   rV   r   namer   rF   r@   rt   r   ro   r0   )r+   r7   r6   unichrr!   r!   r"   r;      s   



zFileUnicodeMap.add_cid2unichr)r   r   r    rE   r   r   rF   r;   r!   r!   r!   r"   rz      s    $rz   c                       s*   e Zd Zdededdf fddZ  ZS )PyCMapr~   moduler&   Nc                    s.   t  j|d |j| _|jrd| jd< d S d S N)rL      r.   )superr,   CODE2CIDrI   IS_VERTICALr)   )r+   r~   r   	__class__r!   r"   r,      s
   zPyCMap.__init__)r   r   r    rD   r   r,   __classcell__r!   r!   r   r"   r      s    "r   c                       s.   e Zd Zdedededdf fddZ  ZS )PyUnicodeMapr~   r   verticalr&   Nc                    s4   t  j|d |r|j| _d| jd< d S |j| _d S r   )r   r,   CID2UNICHR_Vro   r)   CID2UNICHR_H)r+   r~   r   r   r   r!   r"   r,      s
   zPyUnicodeMap.__init__)r   r   r    rD   r   rC   r,   r   r!   r!   r   r"   r      s    &r   c                   @   s   e Zd ZU i Zeeef ed< i Zeee	e
 f ed< G dd deZededefddZededefd	d
ZeddededefddZdS )CMapDB_cmap_cache_umap_cachec                   @   r   )zCMapDB.CMapNotFoundNr   r!   r!   r!   r"   CMapNotFound   r#   r   r~   r&   c              	   C   s   | dd}d| }td| tjddtjtjt	df}|D ].}tj||}tj
|rRt|}ztt|dt| W |    S |  w q$t|)	N  z%s.pickle.gzzloading: %r	CMAP_PATHz/usr/share/pdfminer/r<   r!   )replacerW   rA   osenvironr0   pathjoindirname__file__existsgzipopenrV   rD   pickleloadsreadcloser   r   )clsr~   filename
cmap_paths	directoryr   gzfiler!   r!   r"   
_load_data   s   

zCMapDB._load_datac                 C   s   |dkr	t ddS |dkrt ddS |dkrtddS |dkr$tddS z| j| W S  ty3   Y nw | |}t|| | j|< }|S )Nz
Identity-Hr   )r.   z
Identity-Vr   OneByteIdentityHOneByteIdentityV)rc   rm   r   KeyErrorr   r   )r   r~   datar<   r!   r!   r"   get_cmap   s    




zCMapDB.get_cmapFr   c                    sZ   z| j  | W S  ty   Y nw | d   fdddD | j < | j  | S )Nzto-unicode-%sc                    s   g | ]}t  |qS r!   )r   ).0r4   r   r~   r!   r"   
<listcomp>  s    z*CMapDB.get_unicode_map.<locals>.<listcomp>)FT)r   r   r   )r   r~   r   r!   r   r"   get_unicode_map  s   zCMapDB.get_unicode_mapN)F)r   r   r    r   r   rD   r   __annotations__r   r   r   r   r   classmethodr   r   r$   r   rC   rn   r   r!   r!   r!   r"   r      s   
 r   c                   @   s   e Zd ZdededdfddZd ddZed	Zed
Z	edZ
edZedZedZedZedZedZedZedZedZedZedZedZedZdededdfddZdeddfddZdS )!
CMapParserr<   fpr&   Nc                 C   s$   t | | || _d| _t | _d S )NT)r   r,   r<   _in_cmapset	_warnings)r+   r<   r   r!   r!   r"   r,     s   zCMapParser.__init__c                 C   s$   z|    W d S  ty   Y d S w r'   )
nextobjectr   r1   r!   r!   r"   run   s
   zCMapParser.runs	   begincmaps   endcmaps   usecmaps   defs   begincodespaceranges   endcodespaceranges   begincidranges   endcidranges   begincidchars
   endcidchars   beginbfranges
   endbfranges   beginbfchars	   endbfchars   beginnotdefranges   endnotdefrangepostokenc                 C   s*  || j u rd| _|   dS || ju rd| _dS | jsdS || ju rDz| d\\}}\}}| jt|| W dS  t	yC   Y dS w || j
u rtz| d\\}}| jtt| W dS  t	yi   Y dS  tjys   Y dS w || ju r|   dS || ju r|   dS || ju r|   dS || ju r2dd |  D }td|D ]\}}	}
t|ts| d	 qt|	ts| d
 qt|
ts| d qt|t|	kr| d q|dd }|	dd }||kr| d q|dd }|	dd }t|}t|}t|}t|| d D ]}|td|| | d  }| j|
| | qqdS || ju r>|   dS || ju rmdd |  D }td|D ]\}
}t|trit|
tri| j|
| qRdS || j u ry|   dS || j!u r7dd |  D }td|D ]\}}	}t|ts| d qt|	ts| d qt|t|	kr| d qt|}t|	}t|t"rt||| d kr| d t#t||d |D ]\}
}| j|
| qqt|tsJ |dd }t|}|dd }t|}t|| d D ]}|td|| | d  }| j|| | qqdS || j$u rC|   dS || j%u rtdd |  D }td|D ]\}
}t|
trpt|trp| jt|
| qWdS || j&u r|   dS || j'u r|   dS | (||f dS )z[ToUnicode CMaps

        See Section 5.9.2 - ToUnicode CMaps of the PDF Reference.
        TNFrd   r   c                 S      g | ]\}}|qS r!   r!   r   __objr!   r!   r"   r   f      z)CMapParser.do_keyword.<locals>.<listcomp>   z0The start object of begincidrange is not a byte.z.The end object of begincidrange is not a byte.z.The cid object of begincidrange is not a byte.z?The start and end byte of begincidrange have different lengths.zGThe prefix of the start and end byte of begincidrange are not the same.z>Lc                 S   r   r!   r!   r   r!   r!   r"   r     r   c                 S   r   r!   r!   r   r!   r!   r"   r     r   zThe start object is not a byte.zThe end object is not a byte.z.The start and end byte have different lengths.zPThe difference between the start and end offsets does not match the code length.c                 S   r   r!   r!   r   r!   r!   r"   r     r   ))KEYWORD_BEGINCMAPr   popallKEYWORD_ENDCMAPKEYWORD_DEFpopr<   r5   r   r   KEYWORD_USECMAPr>   r   r   r   KEYWORD_BEGINCODESPACERANGEKEYWORD_ENDCODESPACERANGEKEYWORD_BEGINCIDRANGEKEYWORD_ENDCIDRANGEr   rR   rF   
_warn_oncerE   rf   r   rangerg   packr;   KEYWORD_BEGINCIDCHARKEYWORD_ENDCIDCHARKEYWORD_BEGINBFRANGEKEYWORD_ENDBFRANGElistzipKEYWORD_BEGINBFCHARKEYWORD_ENDBFCHARKEYWORD_BEGINNOTDEFRANGEKEYWORD_ENDNOTDEFRANGEpush)r+   r   r   _r3   r4   cmapnameobjs
start_byteend_byter7   start_prefix
end_prefixsvarevarstartendvlenrY   rZ   r6   unicode_valuevarbaseprefixr!   r!   r"   
do_keyword7  s  















zCMapParser.do_keywordmsgc                 C   s0   || j vr| j | d}t||  dS dS )z!Warn once for each unique messagezIgnoring (part of) ToUnicode map because the PDF data does not conform to the format. This could result in (cid) values in the output. N)r   addrW   warning)r+   r   base_msgr!   r!   r"   r     s   
zCMapParser._warn_once)r&   N)r   r   r    r$   r   r,   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rE   r   r   rD   r   r!   r!   r!   r"   r     s,    
 r   )7__doc__r   loggingr   os.pathr   rg   r`   typingr   r   r   r   r   r   r   r	   r
   r   r   r   r   pdfminer.encodingdbr   pdfminer.pdfexceptionsr   r   pdfminer.psexceptionsr   r   pdfminer.psparserr   r   r   r   r   pdfminer.utilsr   r   	getLoggerr   rW   r   r$   rG   rc   rm   rn   rs   ru   rz   r   r   r   r   r!   r!   r!   r"   <module>   s8    <
5		
7