o
    e                     @  s\  U d Z ddlmZ ddlZddlZddlZddlmZ ddlm	Z	 ddl
mZ ddlmZmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZmZmZmZmZmZm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z* ddl+m,Z,m-Z- ddl.m/Z/m0Z0 ddl1m2Z2m3Z3m4Z4 ddl5m6Z6m7Z7 e8 Z9G dd deZ:G dd deZ;e e<e<e<e<f Z=e:j>e:j>e:j?e:j?e:j@e:jAe:jBe:jCe:jDe:jEe:jFe:j>e:j?e:j@e:jCdZGdeHd< e;jIe;jJe;jKe;jLe;jIe;jJe;jMe;jNe;jOe;jPe;jQdZRdeHd< e:j>de:j?de:j@de:jAde:jCdiZSd eHd!< d"ZTd#d$ ZUG d%d& d&eZVG d'd( d(eZWG d)d* d*eZXG d+d, d,eZYG d-d. d.ZZG d/d0 d0Z[d1d2 Z\eTfdjd5d6Z]dkd9d:Z^G d;d< d<Z_dld?d@Z`dmdBdCZadndEdFZbdodIdJZcddKdpdMdNZddqdRdSZedrdUdVZfdagdsdYdZZhd[d\ Zi	]dtdudadbZjG dcdd ddZke- ZlG dedf dfZmdgdh Zneodikren  dS dS )vz/Extract information about the content of a PDF.    )annotationsN)defaultdict)	ExitStackDecimal)Enumauto)partial)hypotinfisclose)PathLike)Path)	ContainerIterableIteratorMapping
NamedTupleSequenceTuple)warn)ObjectPdfPdfImagePdfInlineImage	PdfMatrixUnsupportedImageTypeErrorparse_content_stream)ExecutorSerialExecutor)EncryptedPdfErrorInputFileError)
Resolutionavailable_cpu_countpikepdf_enable_mmap)get_page_analysisget_text_boxesc                   @  sL   e Zd ZdZe Ze Ze Ze Ze Z	e Z
e Ze Ze Ze ZdS )
Colorspacez1Description of common image colorspaces in a PDF.N)__name__
__module____qualname____doc__r   grayrgbcmyklabiccindexsepdevnpatternjpeg2000 r6   r6   ED:\Projects\ConvertPro\env\Lib\site-packages\ocrmypdf/pdfinfo/info.pyr'   )   s    
r'   c                   @  sF   e Zd ZdZe Ze Ze Ze Ze Z	e Z
e Ze Ze ZdS )Encodingz/Description of common image encodings in a PDF.N)r(   r)   r*   r+   r   ccittjpegr5   jbig2asciihexascii85lzwflate	runlengthr6   r6   r6   r7   r8   9   s    
r8   )z/DeviceGrayz/CalGrayz
/DeviceRGBz/CalRGBz/DeviceCMYKz/Labz	/ICCBasedz/Indexedz/Separationz/DeviceNz/Patternz/Gz/RGBz/CMYKz/Izdict[str, Colorspace]FRIENDLY_COLORSPACE)z/CCITTFaxDecodez
/DCTDecodez
/JPXDecodez/JBIG2Decodez/CCFz/DCTz/AHxz/A85z/LZWz/Flz/RLzdict[str, Encoding]FRIENDLY_ENCODING         zdict[Colorspace, int]FRIENDLY_COMP)      ?        rH   rG   rH   rH   c                 C  s&   t t| }t|t}tdd |D S )Nc                 s  s"    | ]\}}t ||d dV  qdS )gMbP?)Zrel_tolN)r   ).0abr6   r6   r7   	<genexpr>y        z"_is_unit_square.<locals>.<genexpr>)mapfloatzipUNIT_SQUAREall)	shorthandvaluespairwiser6   r6   r7   _is_unit_squarev   s   

rV   c                   @  *   e Zd ZU dZded< ded< ded< dS )	XobjectSettingsz%Info about an XObject found in a PDF.strname/tuple[float, float, float, float, float, float]rS   intstack_depthNr(   r)   r*   r+   __annotations__r6   r6   r6   r7   rX   |   
   
 rX   c                   @  rW   )	InlineSettingsz*Info about an inline image found in a PDF.r   iimager[   rS   r\   r]   Nr^   r6   r6   r6   r7   ra      r`   ra   c                   @  s:   e Zd ZU dZded< ded< ded< ded< d	ed
< dS )ContentsInfoz*Info about various objects found in a PDF.zlist[XobjectSettings]xobject_settingszlist[InlineSettings]inline_imagesboolfound_vector
found_textz#Mapping[str, list[XobjectSettings]]
name_indexNr^   r6   r6   r6   r7   rc      s   
 rc   c                   @  s*   e Zd ZU dZded< ded< ded< dS )TextboxInfoz%Info about a text box found in a PDF.z!tuple[float, float, float, float]bboxrf   
is_visible
is_corruptNr^   r6   r6   r6   r7   rj      r`   rj   c                   @     e Zd ZdZdS )VectorMarkerzCSentinel indicating vector drawing operations were found on a page.Nr(   r)   r*   r+   r6   r6   r6   r7   ro          ro   c                   @  rn   )
TextMarkerzASentinel indicating text drawing operations were found on a page.Nrp   r6   r6   r6   r7   rr      rq   rr   c                 c  sH    | D ]\}}t |}td|r|D ]}g |fV  qq||fV  qdS )z7Convert runs of qQ's in the stack into single graphobjszQ*q+$N)rY   rematch)Z	graphobjsoperandsoperatorcharr6   r6   r7   _normalize_stack   s   rx   contentstreamr   c              	   C  s  g }t |}g }g }tdd }d}d}td }	td }
td }d|	|
B |B }ttt| |D ]\}}|\}}|dkra|| t	|d	kr`t	|d
kr\t
d| td q8|dkryz| }W q8 tyx   td Y q8w |dkrt || }q8|dkr|d }t||jt	|d}|| |t| | q8|dkr|d }t||jt	|d}|| q8||	v rd}q8||
v rd}q8t|||||dS )a  Interpret the PDF content stream.

    The stack represents the state of the PDF graphics stack.  We are only
    interested in the current transformation matrix (CTM) so we only track
    this object; a full implementation would need to track many other items.

    The CTM is initialized to the mapping from user space to device space.
    PDF units are 1/72".  In a PDF viewer or printer this matrix is initialized
    to the transformation to device space.  For example if set to
    (1/72, 0, 0, 1/72, 0, 0) then all units would be calculated in inches.

    Images are always considered to be (0, 0) -> (1, 1).  Before drawing an
    image there should be a 'cm' that sets up an image coordinate system
    where drawing from (0, 0) -> (1, 1) will draw on the desired area of the
    page.

    PDF units suit our needs so we initialize ctm to the identity matrix.

    According to the PDF specification, the maximum stack depth is 32. Other
    viewers tolerate some amount beyond this.  We issue a warning if the
    stack depth exceeds the spec limit and set a hard limit beyond this to
    bound our memory requirements.  If the stack underflows behavior is
    undefined in the spec, but we just pretend nothing happened and leave the
    CTM unchanged.
    c                   S  s   g S Nr6   r6   r6   r6   r7   <lambda>   s    z%_interpret_contents.<locals>.<lambda>FzS s f F f* B B* b b*z	TJ Tj " 'zBI ID EI q Q Do cm q       z5PDF graphics stack overflowed hard limit at operator z(PDF graphics stack overflowed spec limitQz5PDF graphics stack underflowed - PDF may be malformedcmZDor   )rZ   rS   r]   zINLINE IMAGE)rb   rS   r]   T)rd   re   rg   rh   ri   )r   r   setsplitjoin	enumeraterx   r   appendlenRuntimeErrorr   pop
IndexErrorrX   rS   rY   ra   rc   )ry   initial_shorthandstackctmrd   re   ri   rg   rh   Z
vector_opsZtext_showing_opsZ	image_opsZoperator_whitelistnZgraphobjru   rv   Z
image_namesettingsrb   inliner6   r6   r7   _interpret_contents   sr   

r   returnr"   c           	        sV   | \}}}}}}t ||t ||fddd  fddtdD \}}t||S )	az  Given the transformation matrix and image size, find the image DPI.

    PDFs do not include image resolution information within image data.
    Instead, the PDF page content stream describes the location where the
    image will be rasterized, and the effective resolution is the ratio of the
    pixel size to raster target size.

    Normally a scanned PDF has the paper size set appropriately but this is
    not guaranteed. The most common case is a cropped image will change the
    page size (/CropBox) without altering the page content stream. That means
    it is not sufficient to assume that the image fills the page, even though
    that is the most common case.

    A PDF image may be scaled (always), cropped, translated, rotated in place
    to an arbitrary angle (rarely) and skewed. Only equal area mappings can
    be expressed, that is, it is not necessary to consider distortions where
    the effective DPI varies with position.

    To determine the image scale, transform an offset axis vector v0 (0, 0),
    width-axis vector v0 (1, 0), height-axis vector vh (0, 1) with the matrix,
    which gives the dimensions of the image in PDF units. From there we can
    compare to actual image dimensions. PDF uses
    row vector * matrix_transposed unlike the traditional
    matrix * column vector.

    The offset, width and height vectors can be combined in a matrix and
    multiplied by the transform matrix. Then we want to calculated
        magnitude(width_vector - offset_vector)
    and
        magnitude(height_vector - offset_vector)

    When the above is worked out algebraically, the effect of translation
    cancels out, and the vector magnitudes become functions of the nonzero
    transformation matrix indices. The results of the derivation are used
    in this code.

    pdfimages -list does calculate the DPI in some way that is not completely
    naive, but it does not get the DPI of rotated images right, so cannot be
    used anymore to validate this. Photoshop works, or using Acrobat to
    rotate the image back to normal.

    It does not matter if the image is partially cropped, or even out of the
    /MediaBox.

          R@c                 S  s    | dkr||  nt }|| }|S Nr   )r   )ZdrawnZpixelsZinches_per_ptscaledpir6   r6   r7   calc<  s   z_get_dpi.<locals>.calcc                 3  s"    | ]} | | V  qd S rz   r6   rI   r   r   Zimage_drawn
image_sizer6   r7   rL   B  rM   z_get_dpi.<locals>.<genexpr>   N)r   )r
   ranger"   )	ctm_shorthandr   rJ   rK   cd_Zdpi_wZdpi_hr6   r   r7   _get_dpi  s
   /

r   c                   @  s   e Zd ZU dZedZded< ded< ddddd	d&ddZedd Z	edd Z
edd Zedd Zedd Zedd Zedd Zedd Zed d! Zed"d# Zd$d% ZdS )'	ImageInfoz*Information about an image found in a PDF.z1.000z
int | None_comprY   _name N)rZ   pdfimager   rS   r   Object | Noner   PdfInlineImage | Nonec             
   C  s  t || _|| _|d urd| _|}n|d urd| _t|}ntd|j| _|j| _	|j
r1d| _nd| _t|j| _zt|jd | _W n tyQ   d | _Y nw zt|jpYd| _W n tyi   d | _Y nw | jtjkrttj| _| jtjkrz#|j}|jjdkrd	| _n|jjd
krd| _n	d| _W d S W d S W d S  tt fy } zd | _t!"d| d|   W Y d }~d S d }~ww t#| jtrt$| j| _nd | _| jd u r| jtj%tj&fv rt$tj' | _d S d S d S )Nr   Zxobjectz%Either pdfimage or inline must be setZstencilimager   r   ZGRAYrC   ZCMYKrE   rD   zrAn image with a corrupt or unreadable ICC profile was found. The output PDF may not match the input PDF visually: z. )(rY   r   
_shorthandZ_originr   
ValueErrorwidth_widthheight_heightZ
image_mask_typer\   Zbits_per_component_bpcrB   getfilters_encr   rA   Z
colorspace_colorNotImplementedErrorr8   r5   r'   r0   ZprofileZxcolor_spacer   AttributeErrorr   loggerwarning
isinstancerF   r9   r;   r,   )selfrZ   r   r   rS   ZpimZpim_iccexr6   r6   r7   __init__N  sl   



zImageInfo.__init__c                 C     | j S rz   )r   r   r6   r6   r7   rZ        zImageInfo.namec                 C  r   rz   )r   r   r6   r6   r7   type_  r   zImageInfo.type_c                 C  r   rz   )r   r   r6   r6   r7   r     r   zImageInfo.widthc                 C  r   rz   )r   r   r6   r6   r7   r     r   zImageInfo.heightc                 C  r   rz   )r   r   r6   r6   r7   bpc  r   zImageInfo.bpcc                 C     | j d ur| j S dS N?)r   r   r6   r6   r7   color     zImageInfo.colorc                 C  r   r   )r   r   r6   r6   r7   comp  r   zImageInfo.compc                 C  r   )Nr   )r   r   r6   r6   r7   enc  r   zImageInfo.encc                 C  s   | j jo| jdko| jdkS r   )r   	is_finiter   r   r   r6   r6   r7   
renderable  s   zImageInfo.renderablec                 C  s   t | j| j| jfS rz   )r   r   r   r   r   r6   r6   r7   r     r   zImageInfo.dpic                 C  sN   d| j  d| j d| j d| j d| j d| j d| j d| j d| j dS )Nz<ImageInfo 'z' r|   x>)	rZ   r   r   r   r   r   r   r   r   r   r6   r6   r7   __repr__  s   "zImageInfo.__repr__)r   r   r   r   )r(   r)   r*   r+   r   ZDPI_PRECr_   r   propertyrZ   r   r   r   r   r   r   r   r   r   r   r6   r6   r6   r7   r   F  s>   
 H









r   contentsinfoIterator[ImageInfo]c                 c  s6    t | jD ]\}}td|d|j|jdV  qdS )z'Find inline images in the contentstreamzinline-02d)rZ   rS   r   N)r   re   r   rS   rb   )r   r   r   r6   r6   r7   _find_inline_images  s   
r   Iterator[tuple[Object, str]]c                 c  sh    d| vrdS | d }d|vrdS |d   }|D ]}|| }d|vr$q|d dkr1|}||fV  qdS )a  Search for all XObject-based images in the container

    Usually the container is a page, but it could also be a Form XObject
    that contains images. Filter out the Form XObjects which are dealt with
    elsewhere.

    Generate a sequence of tuples (image, xobj container), where container,
    where xobj is the name of the object and image is the object itself,
    since the object does not know its own name.

    
/ResourcesN/XObject/Subtypez/Image)as_dict)	container	resourcesxobjsxobj	candidater   r6   r6   r7   _image_xobjects  s    
r   r   c                 c  s^    t | D ]'\}}||jvrq|j| D ]}|jdkr!t|jr!qt|j||jdV  qqdS )zFind images stored in the container's /Resources /XObject

    Usually the container is a page, but it could also be a Form XObject
    that contains images.

    Generates images with their DPI at time of drawing.
    r   )rZ   r   rS   N)r   ri   r]   rV   rS   r   rZ   )r   r   r   r   Zdrawr6   r6   r7   _find_regular_images  s   
r   pdfr   c           
      c  s    d|vrdS |d }d|vrdS |d   }|D ](}|| }|d dkr&q|}|jD ]}|j|kr3q+|j}	t| ||	dE dH  q+qdS )z|Find any images that are in Form XObjects in the container

    The container may be a page, or a parent Form XObject.

    r   Nr   r   /Formr   r   rS   )r   rd   rZ   rS   _process_content_streams)
r   r   r   r   r   r   r   Zform_xobjectr   r   r6   r6   r7   _find_form_xobject_images  s*   

r   )rS   /Iterator[VectorMarker | TextMarker | ImageInfo]c                 c  s    | ddkrd|v r|pt}n-| ddkr<|d dkr<|r$t|nt }| dt }t|}|| }|j}ndS t||}|jrJt V  |jrQt	 V  t
|E dH  t||E dH  t| ||E dH  dS )	a  Find all individual instances of images drawn in the container

    Usually the container is a page, but it may also be a Form XObject.

    On a typical page images are stored inline or as regular images
    in an XObject.

    Form XObjects may include inline images, XObject images,
    and recursively, other Form XObjects; and also vector graphic objects.

    Every instance of an image being drawn somewhere is flattened and
    treated as a unique image, since if the same image is drawn multiple times
    on one page it may be drawn at differing resolutions, and our objective
    is to find the resolution at which the page can be rastered without
    downsampling.

    z/Typez/Pagez	/Contentsr   r   r   z/MatrixN)r   rQ   r   identityrS   r   rg   ro   rh   rr   r   r   r   )r   r   rS   r   r   Zform_shorthandZform_matrixr   r6   r6   r7   r   $  s$   

r   text_blocksIterable[FloatRect]rf   c           
      C  sj   t |t |}}d}|| d| | d| | || f}ddd	}d
}| D ]}	||	|r2d} |S q&|S )z3Smarter text detection that ignores text in marginsg      ?rC   rJ   	FloatRectrK   r   rf   c                 S  s@   | d |d k o| d |d ko| d |d ko| d |d k S )z
        Where (a,b) are 4-tuple rects (left-0, top-1, right-2, bottom-3)
        https://stackoverflow.com/questions/306316/determine-if-two-rectangles-overlap-each-other
        Formula assumes all boxes are in first quadrant
        r   r   rC   rD   r6   )rJ   rK   r6   r6   r7   rects_intersectd  s   @z'_page_has_text.<locals>.rects_intersectFTN)rJ   r   rK   r   r   rf   )rO   )
r   Z
page_widthZpage_heightpwphZmargin_ratioZinterior_bboxr   has_textrk   r6   r6   r7   _page_has_textW  s    



r   Iterator[TextboxInfo]c                 c  sN    || D ]}|j d }|j d }|jdk}| dk}t|j||V  qdS )z|Extract only limited content from text boxes

    We do this to save memory and ensure that our objects are pickleable.
    r   rD   u   �N)Z_objsZ
rendermodeZget_textrj   rk   )minerZtextbox_getterbox
first_line
first_charvisiblecorruptr6   r6   r7   simplify_textboxest  s   


r   infiler   c                 C  sB   t   td| | d u rt|add }t| d S d S )Npdfminerc                   S  s   t   d S rz   )
worker_pdfcloser6   r6   r6   r7   on_process_close  s   z1_pdf_pageinfo_sync_init.<locals>.on_process_close)	r$   logging	getLoggersetLevelr   openr   atexitregister)r   r   Zpdfminer_loglevelr   r6   r6   r7   _pdf_pageinfo_sync_init  s   
r  c           	      C  sp   | \}}}}}|d ur|nt }t }|s|t|}t|||||}|W  d    S 1 s1w   Y  d S rz   )r   r   enter_contextr   r   PageInfo)	argspagenoZ
thread_pdfr   check_pagesdetailed_analysisr   r   pager6   r6   r7   _pdf_pageinfo_sync  s   $r
  Fexecutorr   Sequence[PageInfo | None]c                   s   d gt | j fdd}|d u rt }t | j}d}	tdt d  |}
|
dkr-d}	|	r1| nd  fddt|D }|	rK|
dksSJ d	|
dksSJ d	||	|
t|d
d| dttt	dj
t||d S )Nc                   s&   | }|st d| |j< |  d S )NzCould read a page in the PDF)r!   r  update)resultZpbarr	  )pagesr6   r7   update_pageinfo  s
   
z1_pdf_pageinfo_concurrent.<locals>.update_pageinfoFrC   rE   Tc                 3  s    | ]
}| fV  qd S rz   r6   r   )r  r  r   initial_pdfr6   r7   rL     s    
z+_pdf_pageinfo_concurrent.<locals>.<genexpr>zNot multithreadablezScanning contentsr	  )totaldescunitdisabler   )use_threadsmax_workersZtqdm_kwargsZworker_initializertaskZtask_argumentsZtask_finished)r   r  r#   minr   dictr	   r  r   r   levelr
  )r   r  r   progbarr  r  r  r  r  r  Z	n_workersZcontextsr6   )r  r  r   r  r  r7   _pdf_pageinfo_concurrent  s>   	
$

r  c                   @  s  e Zd ZU dZded< ded< ded< 	d<d=ddZd=ddZed>ddZed?ddZ	ed?ddZ
ed?ddZed@d d!Zed@d"d#Zed>d$d%Zed>d&d'Zed>d(d)Zejd*d) Zed+d, ZdAdBd0d1ZedCd3d4Zed@d5d6ZedDd8d9Zd:d; Zd-S )Er  z9Information about type of contents on each page in a PDF.bool | None	_has_text_has_vectorzlist[ImageInfo]_imagesFr   r   r  r\   r   r   r  Container[int]r  rf   c                 C  s(   || _ || _|| _| ||||| d S rz   )_pageno_infile_detailed_analysis_gather_pageinfo)r   r   r  r   r  r  r6   r6   r7   r     s   zPageInfo.__init__c                 C  s  |j | }dd |j D }|d |d  }|d |d  }	||v }
|
rP|rPt|jdd}t|||}tt	|t
| _d	d
 | jD }t|||	| _ng | _d | _|dtd}t|tsgt|}|| _|| td | _|	| td | _z	t|d | _W n ty   d| _Y nw |dd|ddf}|
rd| _d| _g | _t|||dD ]"}t|trd| _qt|trd| _qt|tr| j| qt n	d | _d | _g | _d | _| jrtdd dd
 | jD }|| _tt!|j"t#| j | _$tt!|j%t#| j | _&d S d S )Nc                 S  s   g | ]}t |qS r6   r   )rI   r   r6   r6   r7   
<listcomp>  s    z-PageInfo._gather_pageinfo.<locals>.<listcomp>r   r   rD   rC   z/CreatorZPScript5c                 s      | ]}|j V  qd S rz   rk   )rI   r   r6   r6   r7   rL         z,PageInfo._gather_pageinfo.<locals>.<genexpr>z	/UserUnitrG   r   z/RotateFr   TrH   c                 s  s    | ]	}|j r|jV  qd S rz   )r   r   )rI   r   r6   r6   r7   rL   ,  s    
)'r  ZMediaBoxas_listrY   Zdocinfor   
startswithr%   listr   r&   
_textboxesr   r  r   r   	_userunit_width_inches_height_inchesr\   _rotateKeyErrorr   r!  r   ro   rr   r   r   r   _dpir"   Ztake_maxroundr   rO   Z_width_pixelsyZ_height_pixels)r   r   r  r   r  r  r	  ZmediaboxZwidth_ptZ	height_ptZcheck_this_pageZpscript5_moder   ZbboxesuserunitZuserunit_shorthandinfor   r6   r6   r7   r&    sh   






zPageInfo._gather_pageinfor   c                 C  r   rz   )r#  r   r6   r6   r7   r  3  r   zPageInfo.pagenoc                 C  
   t | jS rz   )rf   r  r   r6   r6   r7   r   7     
zPageInfo.has_textc                 C  s"   | j stdtdd | jD S )NzDid not do detailed analysisc                 s  r(  rz   )rm   )rI   Ztboxr6   r6   r7   rL   ?  r*  z,PageInfo.has_corrupt_text.<locals>.<genexpr>)r%  r   anyr.  r   r6   r6   r7   has_corrupt_text;  s   zPageInfo.has_corrupt_textc                 C  r9  rz   )rf   r   r   r6   r6   r7   
has_vectorA  r:  zPageInfo.has_vectorr   c                 C  r   rz   )r0  r   r6   r6   r7   width_inchesE  r   zPageInfo.width_inchesc                 C  r   rz   )r1  r   r6   r6   r7   height_inchesI  r   zPageInfo.height_inchesc                 C     t tt| j| jj S rz   )r\   r5  rO   r>  r   r   r   r6   r6   r7   width_pixelsM     zPageInfo.width_pixelsc                 C  r@  rz   )r\   r5  rO   r?  r   r6  r   r6   r6   r7   height_pixelsQ  rB  zPageInfo.height_pixelsc                 C  r   rz   )r2  r   r6   r6   r7   rotationU  r   zPageInfo.rotationc                 C  s   |dv r	|| _ d S td)N)r   Z      i  ih  iiLiz!rotation must be a cardinal angle)r2  r   )r   valuer6   r6   r7   rD  Y  s   
c                 C  r   rz   )r!  r   r6   r6   r7   images`  r   zPageInfo.imagesNr   r   c                   sD   dd | j sd ur d urtd| j S  fdd| j D S )Nc                 S  s4   d}|d ur| j |krd}|d ur| j|krd}|S )NTF)rl   rm   )objZwant_visibleZwant_corruptr  r6   r6   r7   	predicatee  s   

z)PageInfo.get_textareas.<locals>.predicatez#Incomplete information on textboxesc                 3  s"    | ]}| r|j V  qd S rz   r)  )rI   rI  r   rJ  r   r6   r7   rL   t  rM   z)PageInfo.get_textareas.<locals>.<genexpr>)r.  r   )r   r   r   r6   rK  r7   get_textareasd  s   
zPageInfo.get_textareasr"   c                 C  s   | j d u r
tddS | j S )NrH   )r4  r"   r   r6   r6   r7   r   v  s   

zPageInfo.dpic                 C  r   rz   )r/  r   r6   r6   r7   r7  |  r   zPageInfo.userunitrY   c                 C  s   | j d urdS dS )Nz1.6z1.5r7  r   r6   r6   r7   min_version  s   
zPageInfo.min_versionc                 C  s6   d| j  d| j d| j d| j d| j d| j dS )Nz<PageInfo pageno=r|   z"xz" rotation=z dpi=z
 has_text=r   )r  r>  r?  rD  r   r   r   r6   r6   r7   r     s   zPageInfo.__repr__F)
r   r   r  r\   r   r   r  r"  r  rf   )r   r\   r   rf   )r   r   )NN)r   r  r   r  r   r"   r   rY   )r(   r)   r*   r+   r_   r   r&  r   r  r   r<  r=  r>  r?  rA  rC  rD  setterrH  rL  r   r7  rN  r   r6   r6   r6   r7   r    sL   
 
E

r  c                   @  s   e Zd ZdZddddedd%ddZed&ddZed'ddZed(ddZ	ed(ddZ
ed)ddZed(ddZd*dd Zd!d" Zd#d$ ZdS )+PdfInfoz#Get summary information about a PDFFN)r  r  r  r  r  r  rf   r  r  r\   r  r   c             
   C  s   || _ |d u rtdd}t|\}|jrt t|||||||d| _|j	dd| _
d| _d|jv rUt|jj	dg dkrDd| _n!d	|jjv r]d| _W d    d S W d    d S W d    d S W d    d S 1 spw   Y  d S )
Nr   i ʚ;)r  r  z/NeedsRenderingFz	/AcroFormz/FieldsTz/XFA)r$  r   r   r   is_encryptedr    r  _pagesZRootr   _needs_rendering_has_acroformr   ZAcroForm)r   r   r  r  r  r  r  r   r6   r6   r7   r     s:   

	
"zPdfInfo.__init__r   r  c                 C  r   rz   rV  r   r6   r6   r7   r    r   zPdfInfo.pagesrY   c                 C     t dd | jD S )Nc                 s  s    | ]}|r|j V  qd S rz   )rN  rI   r	  r6   r6   r7   rL     s    z&PdfInfo.min_version.<locals>.<genexpr>)maxr  r   r6   r6   r7   rN    s   zPdfInfo.min_versionc                 C  rZ  )Nc                 s  s    | ]
}|r|j d kV  qdS )rG   NrM  r[  r6   r6   r7   rL     s    z'PdfInfo.has_userunit.<locals>.<genexpr>)r;  r  r   r6   r6   r7   has_userunit  r   zPdfInfo.has_userunitc                 C  r   rz   )rX  r   r6   r6   r7   has_acroform  r   zPdfInfo.has_acroform
str | Pathc                 C  s   t | jttfstd| jS )Nzcan't get filename from stream)r   r$  rY   r   r   r   r6   r6   r7   filename  s   zPdfInfo.filenamec                 C  r   rz   )rW  r   r6   r6   r7   needs_rendering  r   zPdfInfo.needs_renderingr  c                 C  s
   | j | S rz   rY  )r   itemr6   r6   r7   __getitem__     
zPdfInfo.__getitem__c                 C  r9  rz   )r   rV  r   r6   r6   r7   __len__  rd  zPdfInfo.__len__c                 C  s   dt |  dS )Nz<PdfInfo('...'), page count=r   )r   r   r6   r6   r7   r     s   zPdfInfo.__repr__)r  rf   r  rf   r  r\   r  r   )r   r  rR  rP  )r   r_  )r   r  )r(   r)   r*   r+   DEFAULT_EXECUTORr   r   r  rN  r]  r^  r`  ra  rc  re  r   r6   r6   r6   r7   rT    s.    "
rT  c                  C  sl   dd l } ddlm} |  }|d | }t|j}|| |jD ]}|| |jD ]}|| q,q#d S )Nr   )pprintr   )	argparserg  ArgumentParseradd_argument
parse_argsrT  r   r  rH  )rh  rg  parserr  Zpdfinfor	  Zimr6   r6   r7   main  s   




rm  __main__)ry   r   rQ  )r   rc   r   r   )r   r   )r   r   r   rc   r   r   )r   r   r   r   r   rc   )r   r   r   r   r   r   )r   r   r   rf   )r   r   )r   r   r   r   rO  )r  r   r   r  )pr+   
__future__r   r   r   rs   collectionsr   
contextlibr   decimalr   enumr   r   	functoolsr	   mathr
   r   r   osr   pathlibr   typingr   r   r   r   r   r   r   warningsr   Zpikepdfr   r   r   r   r   r   r   Zocrmypdf._concurrentr   r   Zocrmypdf.exceptionsr    r!   Zocrmypdf.helpersr"   r#   r$   Zocrmypdf.pdfinfo.layoutr%   r&   r   r   r'   r8   rO   r   r,   r-   r.   r/   r0   r1   r2   r3   r4   rA   r_   r9   r:   r5   r;   r<   r=   r>   r?   r@   rB   rF   rQ   rV   rX   ra   rc   rj   ro   rr   rx   r   r   r   r   r   r   r   r   r   r   r   r  r
  r  r  rf  rT  rm  r(   r6   r6   r6   r7   <module>   s   $$
	

W>

	

!
3

: 6J

