o
    e<                     @  s4  d Z ddlmZ ddlZddlZddlZddlZddlZddlZddl	m
Z
 ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZmZmZ ddlZdd
lmZmZ ddlmZ ddlmZmZm Z  ddl!m"Z" ddl#m$Z$m%Z%m&Z&m'Z'm(Z(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6m7Z7m8Z8m9Z9m:Z: ddl;m<Z<m=Z= ddl>m?Z?m@Z@mAZA ddlBmCZCmDZD ddlEmFZFmGZGmHZHmIZImJZJ ddlKmLZL eMeNZOG dd deZPeQ ZRdeR_SeT ZUdd ZVeWeV dJd!d"ZXdKd&d'ZYdLd(d)ZZdMd0d1Z[dNd4d5Z\dOd7d8Z]	9dPdQd>d?Z^d@dAdRdHdIZ_dS )SzEImplements the concurrent and page synchronous parts of the pipeline.    )annotationsN)BrokenProcessPool)BrokenThreadPool)partial)Path)mkdtemp)
NamedTupleSequencecast)Executorsetup_executor)
OcrGrafter)PageContext
PdfContextcleanup_working_files)PageNumberFilter)convert_to_pdfa
copy_finalcreate_ocr_imagecreate_pdf_page_from_imagecreate_visible_page_jpggenerate_postscript_stubget_orientation_correctionget_pdfinfois_ocr_requiredmerge_sidecarsmetadata_fixupocr_engine_hocrocr_engine_textonly_pdfoptimize_pdfpreprocess_cleanpreprocess_deskewpreprocess_remove_background	rasterizerasterize_previewrender_hocr_page!should_visible_page_image_use_jpgtriagevalidate_pdfinfo_options)OcrmypdfPluginManagerget_plugin_manager)check_requested_output_filecreate_input_filereport_output_file_size)ExitCodeExitCodeException)
NeverRaiseavailable_cpu_count	check_pdfpikepdf_enable_mmapsamefile)file_claims_pdfac                   @  s:   e Zd ZU dZded< ded< ded< ded< ded< d	S )

PageResultz*Result when a page is finished processing.intpagenozPath | Nonepdf_page_from_imageocrtextorientation_correctionN)__name__
__module____qualname____doc____annotations__ rB   rB   >D:\Projects\ConvertPro\env\Lib\site-packages\ocrmypdf/_sync.pyr6   I   s   
 r6   c                  O  s$   t | i |}ttdrtj|_|S )Nr8   )old_factoryhasattrtlsr8   )argskwargsrecordrB   rB   rC   record_factoryZ   s   
rJ   page_contextr   imager   remove_backgroundbooldeskewcleanreturnc                 C  s.   |rt || }|rt|| }|rt|| }|S N)r"   r!   r    )rK   rL   rM   rO   rP   rB   rB   rC   
preprocessd   s   


rS   r<   r7   tuple[Path, Path | None]c                 C  s   | j }d  }}t| j| |dd}t|j|j|jgs+t| ||j|j	dd }}||fS |j
s:t| ||j|j	|jd}|jrHt| j| |ddd}n|}|r\||kr\|j|jkr\|}||fS t| ||j|j	|jd}||fS )NF)
correctionremove_vectors)rP   TZ_ocr)rU   rV   Z
output_tag)optionsr#   originanyrP   Zclean_finalrV   rS   rM   rO   lossless_reconstruction)rK   r<   rW   	ocr_imagepreprocess_outZrasterize_outZrasterize_ocr_outrB   rB   rC   make_intermediate_imagest   s`   
*	r]   c                 C  s  | j }| jd t_t| st| jd d d ddS d}|jr't| j| }t|| }t	| |\}}t
|| }d }|js]|s<J |}t| jrHt|| }| jjj| |d}	|	d urW|	}t|| |}|jdrpt|| \}
}t|
| }n|jdkr}t|| \}}ntd|j t| j||||dS )N   r   )r8   r9   r:   r;   r<   )pageZimage_filenameZhocrsandwichzpdf_renderer )rW   r8   rF   r   r6   Zrotate_pagesr$   rX   r   r]   r   rZ   r&   Zpageinfor   plugin_managerhookZfilter_page_imager   Zpdf_renderer
startswithr   r%   r   NotImplementedError)rK   rW   r<   Zrasterize_preview_outr[   r\   Zocr_image_outZpdf_page_from_image_outZvisible_image_outZfiltered_imageZhocr_outZtext_outZocr_outrB   rB   rC   exec_page_sync   s^   



re   pdf_filecontextr   executorr   tuple[Path, Sequence[str]]c                 C  s<   | }|j jdrt|}t|||}t||}t|||S )Npdfa)rW   output_typerc   r   r   r   r   )rf   rg   rh   Zpdf_outZps_stub_outrB   rB   rC   post_process   s   
rl   
max_pixelsNonec                 C  s   | t j_t  d S rR   )PILImageMAX_IMAGE_PIXELSr3   )rm   rB   rB   rC   worker_init   s   
rr   Sequence[str]c              
     s  | j }tt| j|j}|dkrtd| dgt| j t|  d fdd}||j|t	dt| j |j
d	kr<d
nddd|j dtttjjt|  |d |jrat| }t||j|    }g }|jdkrtd t|| |\}}t||j|  |S )z!Execute the pipeline concurrentlyr^   z&Start processing %d pages concurrentlyNresultr6   c                   sX   z'| j d t_ | j| j < |   j| j | j| j| jd |  W d t_ d S d t_ w )Nr^   )r8   rL   ZtextpdfZautorotate_correction)r8   rF   r;   updateZ
graft_pager9   r:   r<   )rt   ZpbarZocrgraftZsidecarsrB   rC   update_page	  s   
z$exec_concurrent.<locals>.update_page   r   ZOCRzImage processingr_   g      ?)totaldescunitZ
unit_scaledisable)use_threadsmax_workersZtqdm_kwargsZworker_initializertaskZtask_argumentsZtask_finishednonezPostprocessing...)rt   r6   )rW   minlenpdfinfojobsloginfor   r}   dictZtesseract_timeoutprogress_barr   rr   ro   rp   rq   re   Zget_page_contextsZsidecarr   r   finalizerk   rl   output_file)rg   rh   rW   r~   rw   r;   ZpdfmessagesrB   rv   rC   exec_concurrent   s@   


r    log_filenameprefixstrlogging.FileHandlerc                 C  sN   t j| dd}|t j t d}|| |t  t |	| |S )z
    Create a debug log file at a specified location.

    Arguments:
        log_filename: Where to the put the log file.
        prefix: The logging domain prefix that should be sent to the log.
    T)delayzA[%(asctime)s] - %(name)s - %(levelname)7s -%(pageno)s %(message)s)
loggingFileHandlersetLevelDEBUG	FormattersetFormatter	addFilterr   	getLogger
addHandler)r   r   Zlog_file_handler	formatterrB   rB   rC   configure_debug_logging<  s   

r   F)apirW   argparse.Namespacera   OcrmypdfPluginManager | Noner   r.   c                C  sb  | j st | _ |st| j}ttdd}d }| js| jdkr0tj	
dds0|s0tt|d }t  t|}zzt|  t| |\}}t|||d | }t||| j| j| js[| j nd| jd}	t| |||	|}
t|
 t|
|}| jd	kr}td
 nt| jdr| j rtd nt| jttjrn| j !drt"| j}|d rtd|d  n9t#d|d  t$j%W W |rz|&  t'| W n t(y } zt)|t*j+d W Y d }~nd }~ww t,||  S t-| js't#d t$j.W W |r!z|&  t'| W n t(y  } zt)|t*j+d W Y d }~nd }~ww t,||  S t/| || j| W n |s8t0nt1y   | jdkrJt2d nt3d t$j4 Y W |r}z|&  t'| W n t(y| } zt)|t*j+d W Y d }~nd }~ww t,||  S  |st5nt1y } zct6t5|}| jdkrt2d nt7|rt3dt8|j9t7| nt3t8|j9 |j:W  Y d }~W |rz|&  t'| W n t(y } zt)|t*j+d W Y d }~nd }~ww t,||  S d }~w |st;j<j=nt1y>   t2d t$j> Y W |r8z|&  t'| W n t(y7 } zt)|t*j+d W Y d }~nd }~ww t,||  S  |sDt?nt1|sJt@nt1fy   t2d t$jA Y W |rz|&  t'| W n t(y } zt)|t*j+d W Y d }~nd }~ww t,||  S  |stBnt1y   t2d t$j> Y W |rz|&  t'| W n t(y } zt)|t*j+d W Y d }~nd }~ww t,||  S w W |rz|&  t'| W n t(y } zt)|t*j+d W Y d }~nd }~ww t,||  t$jCS |r+z|&  t'| W n t(y* } zt)|t*j+d W Y d }~nd }~ww t,||  w )Nzocrmypdf.io.)r   r^   ZPYTEST_CURRENT_TESTr   z	debug.logz
origin.pdf)rh   Zdetailed_analysisZprogbarr~   Zcheck_pages-zOutput sent to stdoutwritablezOutput written to streamrj   passz!Output file is a %s (as expected)Zconformancez5Output file is okay but is not PDF/A (seems to be %s))filez)Output file: The generated PDF is INVALIDKeyboardInterruptr/   z%s: %szA decompression bomb error was encountered while executing the pipeline. Use the argument --max-image-mpixels to raise the maximum image pixel limit.zA worker process was terminated unexpectedly. This is known to occur if processing your file takes all available swap space and RAM. It may help to try again with a smaller number of jobs, using the --jobs argument.z2An exception occurred while executing the pipeline)Dr   r1   r*   pluginsr   r   Zkeep_temporary_filesverboseosenvirongetr   r3   r   r+   r,   r'   r   Zredo_ocrr   r}   Zpagesr   r(   r   r   r   r   rE   r   r4   devnullrk   rc   r5   warningr.   Zpdfa_conversion_failedcloseremoveHandlerOSErrorprintsysstderrr   r2   Zinvalid_output_pdfr-   r   r0   	exceptionerrorZctrl_cr/   r
   r   typer=   	exit_codero   rp   ZDecompressionBombErrorZother_errorr   r   Zchild_process_error	Exceptionok)rW   ra   r   Zwork_folderZdebug_log_handlerrh   Zstart_input_fileZoriginal_filenameZ
origin_pdfr   rg   Zoptimize_messagesZ	pdfa_infoerB   rB   rC   run_pipelineQ  s`  	


	



-
*


 






r   )rK   r   rL   r   rM   rN   rO   rN   rP   rN   rQ   r   )rK   r   r<   r7   rQ   rT   )rK   r   rQ   r6   )rf   r   rg   r   rh   r   rQ   ri   )rm   r7   rQ   rn   )rg   r   rh   r   rQ   rs   )r   )r   r   r   r   rQ   r   )rW   r   ra   r   r   rN   rQ   r.   )`r@   
__future__r   argparser   logging.handlersr   r   	threadingconcurrent.futures.processr   Zconcurrent.futures.threadr   	functoolsr   pathlibr   tempfiler   typingr   r	   r
   ro   Zocrmypdf._concurrentr   r   Zocrmypdf._graftr   Zocrmypdf._jobcontextr   r   r   Zocrmypdf._loggingr   Zocrmypdf._pipeliner   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   Zocrmypdf._plugin_managerr)   r*   Zocrmypdf._validationr+   r,   r-   Zocrmypdf.exceptionsr.   r/   Zocrmypdf.helpersr0   r1   r2   r3   r4   Zocrmypdf.pdfar5   r   r=   r   r6   localrF   r8   getLogRecordFactoryrD   rJ   setLogRecordFactoryrS   r]   re   rl   rr   r   r   r   rB   rB   rB   rC   <module>   sT   d





;
:

@