o
    e<                     @  s2  d Z ddlmZ ddlZddlmZ ddlmZ ddlm	Z	 ddl
Z
ddlZ
ddlZ
ddlZ
ddlmZ ddlmZmZmZmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZmZ ddlm Z m!Z! e"dZ#ej$Z%dd Z&e&e_$dd Z'dd Z(dd Z)G dd deZ*G dd deZ+dd Z,dd Z-dS )zEDetailed text position and layout analysis, building on pdfminer.six.    )annotationsN)copysign)Path)patch)PDFLayoutAnalyzer)LAParamsLTCharLTPage	LTTextBox)PDFTextExtractionNotAllowed)PDFSimpleFontPDFUnicodeNotDefined)PDFPage)bbox2str
matrix2str)EncryptedPdfErrorInputFileErrorz[0-9]+c                 C  s&   t | ||| | jsd|vri | _d S )NZEncoding)original_pdfsimplefont_initZunicode_mapZcid2unicode)selfZ
descriptorwidthsspec r   GD:\Projects\ConvertPro\env\Lib\site-packages\ocrmypdf/pdfinfo/layout.pypdfsimplefont__init__   s   r   c                 C  s8   | j d | j d  }|dkr| j| j }|td| j S )N      r         ?)bboxascentdescentr   vscale)r   hr   r   r   !pdftype3font__pscript5_get_height2   s   r"   c                 C     | j td| j S Nr   )r   r   r    r   r   r   r   "pdftype3font__pscript5_get_descent9      r&   c                 C  r#   r$   )r   r   r    r%   r   r   r   !pdftype3font__pscript5_get_ascent=   r'   r(   c                      s<   e Zd ZdZdZ fddZdd Zdd Zd	d
 Z  Z	S )LTStateAwareCharzDA subclass of LTChar that tracks text render mode at time of drawing)
rendermode_textmatrixfontnameadvZuprightsizewidthheightr   Zx0x1Zy0y1c                   s*   t  |||||||||	|

 |j| _d S N)super__init__renderr*   )r   r,   fontfontsizescalingrisetext	textwidthtextdispncsgraphicstate	textstate	__class__r   r   r6   U   s   zLTStateAwareChar.__init__c              	   C  sn   t | jtot |jt}z|r| j|jkW S | j\}}|j\}}||ko)| j|jkW S  ttfy6   Y dS w )a
  Check if characters can be combined into a textline

        We consider characters compatible if:
            - the Unicode mapping is known, and both have the same render mode
            - the Unicode mapping is unknown but both are part of the same font
        F)
isinstancer+   strr*   
ValueErrorAttributeError)r   objZboth_unicode_mappedZfont0_Zfont1r   r   r   is_compatibleq   s   

zLTStateAwareChar.is_compatiblec                 C  s   t | jtrdS | jS )Nu   �)rD   r+   tupler%   r   r   r   get_text   s   zLTStateAwareChar.get_textc                 C  sJ   d| j j dt| j dt| j d| jd| jd| j d| 	 dS )	N< z matrix=z rendermode=z font=z adv=z text=>)
rC   __name__r   r   r   r,   r*   r-   r.   rL   r%   r   r   r   __repr__   s   zLTStateAwareChar.__repr__)
rP   
__module____qualname____doc__	__slots__r6   rJ   rL   rQ   __classcell__r   r   rB   r   r)   A   s    r)   c                      sb   e Zd ZdZd fdd	Z fddZdd	 Z fd
dZdd Zdd Z	dd Z
dd Z  ZS )TextPositionTrackerz=A page layout analyzer that pays attention to text visibilityr   Nc                   s&   t  ||| d | _d | _d | _d S r4   )r5   r6   rA   resultcur_item)r   ZrsrcmgrpagenolaparamsrB   r   r   r6      s   
zTextPositionTracker.__init__c                   s"   t  || t| j|j| _d S r4   )r5   
begin_pager	   rZ   ZmediaboxrY   )r   pageZctmrB   r   r   r\      s   zTextPositionTracker.begin_pagec                 C  sl   | j rJ tt| j t| jtsJ tt| j| jd ur'| j| j |  j	d7  _	| 
| j d S )Nr   )_stackrE   lenrD   rY   r	   typer[   ZanalyzerZ   receive_layout)r   r]   r   r   r   end_page   s   
zTextPositionTracker.end_pagec                   s"   |  | _t | j||| d S r4   )copyrA   r5   render_string)r   rA   seqr?   r@   rB   r   r   rd      s   
z!TextPositionTracker.render_stringc	                 C  s   z| |}	t|	tsJ tt|	W n ty#   | ||}	Y nw ||}
||}t||||||	|
|||| j	}| j
| |jS r4   )Z	to_unichrrD   rE   r`   r   handle_undefined_char
char_widthZ	char_dispr)   rA   rY   addr.   )r   r,   r8   r9   r:   r;   cidr?   r@   r<   r=   r>   itemr   r   r   render_char   s.   


zTextPositionTracker.render_charc                 C  s
   |j |fS r4   )r-   )r   r8   ri   r   r   r   rf      s   
z)TextPositionTracker.handle_undefined_charc                 C  s
   || _ d S r4   rX   )r   Zltpager   r   r   ra      s   
z"TextPositionTracker.receive_layoutc                 C  s   | j S r4   rl   r%   r   r   r   
get_result   s   zTextPositionTracker.get_result)r   N)rP   rR   rS   rT   r6   r\   rb   rd   rk   rf   ra   rm   rV   r   r   rB   r   rW      s    rW   c              
   C  s  t jjdd}d }t|tdd|dd}t j||}d }|r-tjddtt	t
d}|  zVz8t| d'}tj||gdd	}	t|	d }
|
d u rQtd
| d||
 W d    n1 s`w   Y  W n tyv } zt |d }~ww W |d ur|  | S |d ur|  w w )NT)Zcaching)Z	all_textsZdetect_verticalZ
boxes_flow)r[   zpdfminer.pdffont.PDFType3Font)r   Z
get_ascentZget_descentZ
get_heightrbr   )ZpagenosZmaxpagesz pdfminer could not process page z (counting from 0).)pdfminerZ	pdfinterpZPDFResourceManagerrW   r   ZPDFPageInterpreterr   Zmultipler(   r&   r"   startr   openr   Z	get_pagesnextr   Zprocess_pager   r   stoprm   )infilerZ   Zpscript5_modeZrmanZdisable_boxes_flowdevZinterpZpatcherfZ	page_iterr]   er   r   r   get_page_analysis   sR   


rx   c              	   c  sF    | D ]}t |tr|V  qz	t|E d H  W q ty    Y qw d S r4   )rD   r
   get_text_boxes	TypeError)rH   childr   r   r   ry      s   
ry   ).rT   
__future__r   remathr   pathlibr   Zunittest.mockr   ro   Zpdfminer.encodingdbZpdfminer.pdfdeviceZpdfminer.pdfinterpZpdfminer.converterr   Zpdfminer.layoutr   r   r	   r
   Zpdfminer.pdfdocumentr   Zpdfminer.pdffontr   r   Zpdfminer.pdfpager   Zpdfminer.utilsr   r   Zocrmypdf.exceptionsr   r   compileZ
STRIP_NAMEr6   r   r   r"   r&   r(   r)   rW   rx   ry   r   r   r   r   <module>   s8   
S>(