o
    e)1                     @  s  d Z ddlmZ ddlZddlZddlZddlZddlZddlm	Z	 ddl
mZ ddlmZ ddlmZ ddlZddlZddlmZ dd	lmZ dd
lmZmZmZmZ ddlmZmZmZ ddlm Z  ddl!m"Z" dZ#e$e%Z&dFddZ'dGddZ(dHddZ)dHddZ*dHddZ+dId"d#Z,dHd$d%Z-dHd&d'Z.dHd(d)Z/dHd*d+Z0dHd,d-Z1dJd0d1Z2dJd2d3Z3dKd7d8Z4dHd9d:Z5		;	<dLdMdDdEZ6dS )Nz/Validate a work order from API or command line.    )annotationsN)	Namespace)Path)copyfileobj)Sequence)PluginManager)unpaper)BadArgsErrorInputFileErrorMissingDependencyErrorOutputFileAccessError)is_file_writable	monotonicsafe_symlink)HOCR_OK_LANGS)check_external_programZengreturnNonec                   C  s*   t jdkrtjdkrtd d S d S d S )Nntl        zmYou are running OCRmyPDF in a 32-bit (x86) Python interpreter.Please use a 64-bit (x86-64) version of Python.)osnamesysmaxsizelogerror r   r   DD:\Projects\ConvertPro\env\Lib\site-packages\ocrmypdf/_validation.pycheck_platform-   s
   r   optionsr   ocr_engine_languagesset[str]c                 C  sz   | j sth| _ t d }|r|dstdt |sd S | j | }|r;d}|ddd |D 7 }|d7 }t|d S )	Nr   enz-No language specified; assuming --language %szOOCR engine does not have language data for the following requested languages: 

c                 s  s    | ]}|V  qd S Nr   ).0langr   r   r   	<genexpr>D   s    z*check_options_languages.<locals>.<genexpr>z@
Note: most languages are identified by a 3-digit ISO 639-2 Code)		languagesDEFAULT_LANGUAGElocale	getlocale
startswithr   debugjoinr   )r   r   Zsystem_langZmissing_languagesmsgr   r   r   check_options_languages6   s   
r/   c                 C  s   | j t}| jdr|std | jdkr)| jt	j
dfvr)td| j dd}t| j| j| j| jfs9d}|| _| jsF| jrHtd	d S d S )
NZhocrzThe 'hocr' PDF renderer is known to cause problems with one or more of the languages in your document.  Use `--pdf-renderer auto` (the default) to avoid this issue.none-z:Since you specified `--output-type none`, the output file zI cannot be produced. Set the output file to `-` to suppress this message.FTz\--redo-ocr is not currently compatible with --deskew, --clean-final, and --remove-background)r'   issubsetr   Zpdf_rendererr+   r   warningoutput_typeoutput_filer   devnullr	   anydeskewclean_final	force_ocrremove_backgroundlossless_reconstructionredo_ocr)r   Zis_latinr<   r   r   r   check_options_outputI   s4   r>   c                 C  s`   | j dkr| jdkrtd| jtjkrtd| jd | _ | j | jks*| j | jkr.tdd S )N r1   z@--sidecar filename must be specified when output file is stdout.zJ--sidecar filename must be specified when output file is /dev/null or NUL.z.txtz@--sidecar file must be different from the input and output files)Zsidecarr5   r	   r   r6   
input_filer   r   r   r   check_options_sidecarm   s   

rB   c              
   C  s   | j rd| _| jr| jstd| jrCtddtjddd z| jr+t| j| _W d S W d S  tyB } z	tdt	| |d }~ww d S )NTz&--clean is required for --unpaper-argsr   z6.1z--clean, --clean-final)programpackageZversion_checkerZneed_versionZrequired_forz--unpaper-args: )
r9   cleanZunpaper_argsr	   r   r   versionZvalidate_custom_args	Exceptionstr)r   er   r   r   check_options_preprocessing~   s.   rJ   rangesrH   set[int]c              	   C  s  g }|  ddd}|D ]S}|sqz	|d\}}W n ty-   |t|d  Y qw z"ttt|d t|}|sJtd| d| dd || W q ty`   td| dd w |sktd|  d	t	|stt
d
 tdd |D rtdt
d| t|S )N  ,r1      zinvalid page subrange ''zThe string of page ranges 'z/' did not contain any recognizable page ranges.zQList of pages to process contains duplicate pages, or pages that are out of orderc                 s  s    | ]}|d k V  qdS )r   Nr   )r$   pager   r   r   r&          z%_pages_from_ranges.<locals>.<genexpr>z)pages refers to a page number less than 1zOCRing only these pages: %s)replacesplit
ValueErrorappendintlistranger	   extendr   r   r3   r7   r,   set)rK   pagesZpage_groupsgroupstartendZ	new_pagesr   r   r   _pages_from_ranges   sD   
ra   c                 C  sH   t dd | j| j| jfD }|dkrtd| jr"t| j| _d S d S )Nc                 s  s    | ]	}|rd ndV  qdS )rP   r   Nr   )r$   optr   r   r   r&      s
    

z-check_options_ocr_behavior.<locals>.<genexpr>   z8Choose only one of --force-ocr, --skip-text, --redo-ocr.)sumr:   Z	skip_textr=   r	   r]   ra   )r   Zexclusive_optionsr   r   r   check_options_ocr_behavior   s   re   c                 C  s,   | j dkr| jdstd d S d S d S )Nautopdfazi--pdfa-image-compression argument only applies when --output-type is one of 'pdfa', 'pdfa-1', or 'pdfa-2')Zpdfa_image_compressionr4   r+   r   r3   rA   r   r   r   check_options_advanced   s   rh   c                 C  s~   | j | j| j| jg}dd |D D ]+}|D ]&}t|dks$t|dkr;tt|dd   }t	d| d| dqqd S )	Nc                 s  s    | ]}|r|V  qd S r#   r   )r$   mr   r   r   r&      rS   z)check_options_metadata.<locals>.<genexpr>Coi   rc   zGOne of the metadata strings contains an unsupported Unicode character: z (U+))
titleauthorkeywordssubjectunicodedatacategoryordhexupperrV   )r   ZdocinfoscharZhexcharr   r   r   check_options_metadata   s   rw   c                 C  s.   t | jd tj_tjjdkrd tj_d S d S )Ni@B r   )rX   Zmax_image_mpixelsPILZImageZMAX_IMAGE_PIXELSrA   r   r   r   check_options_pillow   s   ry   c                 C  sB   t   t|  t|  t|  t|  t|  t|  t|  d S r#   )r   rw   r>   rB   rJ   re   rh   ry   rA   r   r   r   _check_plugin_invariant_options   s   rz   plugin_managerr   c                 C  s,   |j j| d |j  | }t| | d S )NrA   )hookcheck_optionsZget_ocr_enginer'   r/   )r   r{   r   r   r   r   _check_plugin_options   s   r~   c                 C  s   t |  t| | d S r#   )rz   r~   )r   r{   r   r   r   r}      s   r}   work_folderr   tuple[Path, str]c              
   C  s@  | j dkr1td |d }t|d}ttjj| W d    |dfS 1 s(w   Y  |dfS t| j drk| j 	 s@t
dtd |d }t|d}t| j | W d    |dfS 1 sbw   Y  |dfS z|d	 }t| j | |t| j fW S  ty } zd
| j  }td r|d7 }t
||d }~ww )Nr1   z reading file from standard inputstdinwbreadablez!Input file stream is not readablezreading file from input streamstreamoriginzFile not found - z/.dockerenvz
Docker cannot your working directory unless you explicitly share it with the Docker container and set uppermissions correctly.
You may find it easier to use stdin/stdout:
	docker run -i --rm jbarlow83/ocrmypdf - - <input.pdf >output.pdf
)r@   r   infoopenr   r   r   bufferhasattrr   r
   r   r   fspathFileNotFoundErrorr   exists)r   r   targetZstream_bufferrI   r.   r   r   r   create_input_file   sB   






r   c                 C  sb   | j dkrtj rtdd S t| j dr!| j  stdd S t| j s/td| j  dd S )Nr1   ztOutput was set to stdout '-' but it looks like stdout is connected to a terminal.  Please redirect stdout to a file.writablezOutput stream is not writablezOutput file location (z) is not a writable file.)	r5   r   stdoutisattyr	   r   r   r   r   rA   r   r   r   check_requested_output_file  s   



r       r@   r5   optimize_messagesSequence[str] | Nonefile_overheadrX   page_overheadc                 C  sJ  |d u rg }zt | j}t | j}W n
 ty    Y d S w t|}||t|j  }	W d    n1 s:w   Y  || }
|||	  }|dk sQ|dk rSd S g }h d}|D ]}t| |drp|	d|
dd d q[|| | jd	r|	d
 | jr|	d |rdd| d }nd}td|
dd|  d S )Ng?ia  >   Z
oversampler:   r9   r;   r8   FzThe argument --_r1   z! was issued, causing transcoding.rg   z8PDF/A conversion was enabled. (Try `--output-type pdf`.)zPlugins were used.z#Possible reasons for this include:
r"   z@No reason for this increase is known.  Please report this issue.zThe output file size is z.2fu   × larger than the input file.
)r   statst_sizer   pikepdfr   lenr]   getattrrW   rT   r[   r4   r+   pluginsr-   r   r3   )r   r@   r5   r   r   r   Zoutput_sizeZ
input_sizepZreasonable_overheadratioZreasonable_ratioreasonsZimage_preprocargZexplanationr   r   r   report_output_file_size-  sH   



r   )r   r   )r   r   r   r    r   r   )r   r   r   r   )rK   rH   r   rL   )r   r   r{   r   r   r   )r   r   r   r   r   r   )Nr   r   )r   r   r@   r   r5   r   r   r   r   rX   r   rX   r   r   )7__doc__
__future__r   r)   loggingr   r   rp   argparser   pathlibr   shutilr   typingr   r   rx   Zpluggyr   Zocrmypdf._execr   Zocrmypdf.exceptionsr	   r
   r   r   Zocrmypdf.helpersr   r   r   Zocrmypdf.hocrtransformr   Zocrmypdf.subprocessr   r(   	getLogger__name__r   r   r/   r>   rB   rJ   ra   re   rh   rw   ry   rz   r~   r}   r   r   r   r   r   r   r   <module>   sP   


	

$


'








#