o
    e5v                     @   s:  d dl Z d dlZd dlZe jeZd dlZeje j	ed d dl
Z
d dlZd dlZd dlmZ eddZeddZeddZd dlmZ d d	lmZ e Zd d
lmZmZ d dlmZmZmZm Z  d dl!m"Z"m#Z#m$Z$ d dl%m&Z&m'Z' d dl(m)Z)m*Z*m+Z+ g dZ,dgZ-dZ.ddgZ/e j0dZ1dZ2g dZ3dZ4ddgZ5ddiddiddidddd d!d"d d#d$d d%d&d d'd(d d)d*d d+d,d d-d.d d/d0d d1d2d d3d4d d5d6d d7d8dd9iid:d8dd;iid8d<dd id8dd9iid:dd=idd>idd?id@dAdd dBd"d dCdDd dEdFd dGd$d dHd&d dId(d dJd*d dKd,d dLd.d dMd0d dNd2d dOd4d dPd6d dQdRd dSd8dd9iid:dTdUdVdWdXd iidYdXd dZd[d d\d]d^d d_d`d d\dadbdcZ6dvdedfZ7dgdh Z8didj Z9dke:fdldmZ;dndo Z<G dpdq dqej=Z>G drds dse)Z?dtdu Z@dS )w    N )Path.toolsppocrppstructure)predict_system)
get_logger)check_and_readget_image_file_list)maybe_downloaddownload_with_progressbaris_linkconfirm_model_dir_url)draw_ocrstr2bool	check_gpu)	init_argsdraw_structure_result)StructureSystemsave_structure_resto_excel)	PaddleOCRPPStructurer   r   r   r   r   ZDBz2.6.1.1ZCRNNZ
SVTR_LCNetz~/.paddleocr/PP-OCRv3)PP-OCRPP-OCRv2r   PP-StructureV2PP-StructureurlzJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_det_infer.tarzJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_det_infer.tarzYhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/Multilingual_PP-OCRv3_det_infer.tar)chenmlzJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/chinese/ch_PP-OCRv3_rec_infer.tarz./ppocr/utils/ppocr_keys_v1.txt)r   	dict_pathzJhttps://paddleocr.bj.bcebos.com/PP-OCRv3/english/en_PP-OCRv3_rec_infer.tarz./ppocr/utils/en_dict.txtzShttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/korean_PP-OCRv3_rec_infer.tarz"./ppocr/utils/dict/korean_dict.txtzRhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/japan_PP-OCRv3_rec_infer.tarz!./ppocr/utils/dict/japan_dict.txtzXhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/chinese_cht_PP-OCRv3_rec_infer.tarz'./ppocr/utils/dict/chinese_cht_dict.txtzOhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ta_PP-OCRv3_rec_infer.tarz./ppocr/utils/dict/ta_dict.txtzOhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/te_PP-OCRv3_rec_infer.tarz./ppocr/utils/dict/te_dict.txtzOhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/ka_PP-OCRv3_rec_infer.tarz./ppocr/utils/dict/ka_dict.txtzRhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/latin_PP-OCRv3_rec_infer.tarz!./ppocr/utils/dict/latin_dict.txtzShttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/arabic_PP-OCRv3_rec_infer.tarz"./ppocr/utils/dict/arabic_dict.txtzUhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/cyrillic_PP-OCRv3_rec_infer.tarz$./ppocr/utils/dict/cyrillic_dict.txtzWhttps://paddleocr.bj.bcebos.com/PP-OCRv3/multilingual/devanagari_PP-OCRv3_rec_infer.tarz&./ppocr/utils/dict/devanagari_dict.txt)r    r!   koreanjapanchinese_chttatekalatinarabiccyrillic
devanagarir    zRhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_cls_infer.tardetrecclszJhttps://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_det_infer.tarzJhttps://paddleocr.bj.bcebos.com/PP-OCRv2/chinese/ch_PP-OCRv2_rec_infer.tarzRhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_det_infer.tarz\https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_ppocr_mobile_v2.0_det_infer.tarz[https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_det_infer.tar)r    r!   	structurezRhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/ch/ch_ppocr_mobile_v2.0_rec_infer.tarz]https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/en_number_mobile_v2.0_rec_infer.tarzZhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/french_mobile_v2.0_rec_infer.tarz"./ppocr/utils/dict/french_dict.txtzZhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/german_mobile_v2.0_rec_infer.tarz"./ppocr/utils/dict/german_dict.txtzZhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/korean_mobile_v2.0_rec_infer.tarzYhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/japan_mobile_v2.0_rec_infer.tarz_https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/chinese_cht_mobile_v2.0_rec_infer.tarzVhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ta_mobile_v2.0_rec_infer.tarzVhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/te_mobile_v2.0_rec_infer.tarzVhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/ka_mobile_v2.0_rec_infer.tarz_https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/latin_ppocr_mobile_v2.0_rec_infer.tarz`https://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/arabic_ppocr_mobile_v2.0_rec_infer.tarzbhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/cyrillic_ppocr_mobile_v2.0_rec_infer.tarzdhttps://paddleocr.bj.bcebos.com/dygraph_v2.0/multilingual/devanagari_ppocr_mobile_v2.0_rec_infer.tarz[https://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_rec_infer.tarzppocr/utils/dict/table_dict.txt)r    r!   frenchgermanr$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r2   )r   r   r   tabler!   zahttps://paddleocr.bj.bcebos.com/dygraph_v2.0/table/en_ppocr_mobile_v2.0_table_structure_infer.tarz)ppocr/utils/dict/table_structure_dict.txtzehttps://paddleocr.bj.bcebos.com/ppstructure/models/slanet/en_ppstructure_mobile_v2.0_SLANet_infer.tarzehttps://paddleocr.bj.bcebos.com/ppstructure/models/slanet/ch_ppstructure_mobile_v2.0_SLANet_infer.tarz,ppocr/utils/dict/table_structure_dict_ch.txt)r!   r    zahttps://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_infer.tarz6ppocr/utils/dict/layout_dict/layout_publaynet_dict.txtzfhttps://paddleocr.bj.bcebos.com/ppstructure/models/layout/picodet_lcnet_x1_0_fgd_layout_cdla_infer.tarz1ppocr/utils/dict/layout_dict/layout_cdla_dict.txt)r5   layout)r   r   )OCR	STRUCTURETc                 C   s   dd l }t }| |_|jdtdd |jdtdd |jdtdd |jdtd	d |jd
ttddd |jdttddd |jD ]
}|j	dv rKd |_
qA| rR| S i }|jD ]}|j
||j	< qW|jdi |S )Nr   z--langr    )typedefaultz--detTz--recz--typeocrz--ocr_versionr   aR  OCR Model version, the current model support list is as follows: 1. PP-OCRv3 Support Chinese and English detection and recognition model, and direction classifier model2. PP-OCRv2 Support Chinese detection and recognition model. 3. PP-OCR support Chinese detection, recognition and direction classifier and multilingual recognition model.)r9   choicesr:   helpz--structure_versionr   zModel version, the current model support list is as follows: 1. PP-Structure Support en table structure model. 2. PP-StructureV2 Support ch and en table structure model.)rec_char_dict_pathtable_char_dict_pathlayout_dict_path )argparser   add_helpadd_argumentstrr   SUPPORT_OCR_MODEL_VERSIONSUPPORT_STRUCTURE_MODEL_VERSIONZ_actionsdestr:   
parse_args	Namespace)mMainrB   parseractionZinference_args_dictrA   rA   CD:\Projects\ConvertPro\env\Lib\site-packages\paddleocr/paddleocr.pyrI   3  s>   

	

rI   c                 C   s   g d}g d}g d}g d}| |v rd} n| |v rd} n| |v r%d} n| |v r+d} | t d	 t d
 v sEJ dt d	 t d
  | | dkrOd}| |fS | dkrYd}| |fS | dv rcd}| |fS d}| |fS )N)*afazbscscydadeesetfrgahrhuidisitZkuZlaltlvmimsmtnlnoocpiplptroZrs_latinskslsqsvswtltruzvir3   r4   )arfaZugur)ruZrs_cyrillicbebgukZmnZabqZadyZkbdZavaZdarZinhZcheZlbeZleztab)himrneZbhmaiangZbhoZmahZscknewZgomsaZbgcr*   r+   r,   r-   r7   r0   z!param lang must in {}, but got {}r    r2   )r!   r*   r!   r"   )
MODEL_URLSDEFAULT_OCR_MODEL_VERSIONformatkeys)langZ
latin_langZarabic_langZcyrillic_langZdevanagari_langdet_langrA   rA   rN   
parse_lang\  s<   r   c                 C   s   | dkrt }n	| dkrt}ntt|  }||vr|}||| vr;||| v r)|}ntd|||   t	d ||| | vrc||| | v rN|}ntd||| |  | t	d || | | S )Nr7   r8   z,{} models is not support, we only support {}z8lang {} is not support, we only support {} for {} models)
r   DEFAULT_STRUCTURE_MODEL_VERSIONNotImplementedErrorr   loggererrorr   r   sysexit)r9   versionZ
model_typer   ZDEFAULT_MODEL_VERSIONZ
model_urlsrA   rA   rN   get_model_config  s4   

r   contentc                 C   s   t j| t jd}t|tjS )N)Zdtype)npZ
frombufferZuint8cv2ZimdecodeZIMREAD_COLOR)r   Znp_arrrA   rA   rN   
img_decode  s   r   c                 C   s   t | tr	t| } t | trOt| rt| d d} | }t|\} }}|sA|sAt|d}t| } W d    n1 s<w   Y  | d u rOt	
d| d S t | tjrct| jdkrct| tj} | S )Ntmp.jpgrberror in loading image:{}   )
isinstancebytesr   rE   r   r   r
   openreadr   r   r   r   ndarraylenshaper   ZcvtColorZCOLOR_GRAY2BGR)imgZ
image_fileflag_gifflag_pdffrA   rA   rN   	check_img  s$   


r   c                       s&   e Zd Z fddZdddZ  ZS )r   c                    s  t dd}|jjdi | |jtv sJ dt|jt|j|_|js+t	
tj |j| _t|j\}}td|jd|}t|jtjtdd||d \|_}td|jd|}t|jtjtdd||d \|_}td|jd	d
}	t|jtjtdd	|	d \|_}
|jdkrd|_nd|_|jst|j| t|j| t|j|
 |jtvrt	dt t d |j!t"vrt	dt" t d |j#du rt$t%t&j'|d  |_#t	(| t) *| |j+| _+dS )zm
        paddleocr package
        args:
            **kwargs: other params show in paddleocr --help
        FrK   z"ocr_version must in {}, but get {}r7   r/   whlr   r0   r1   r    r   z
3, 48, 320z
3, 32, 320zdet_algorithm must in {}r   zrec_algorithm must in {}Nr#   rA   ),rI   __dict__updateocr_versionrF   r   r   use_gpushow_logr   setLevelloggingINFOuse_angle_clsr   r   r   r   det_model_dirospathjoinBASE_DIRrec_model_dirZcls_model_dirZrec_image_shapeZuse_onnxr   Zdet_algorithmSUPPORT_DET_MODELr   r   r   Zrec_algorithmSUPPORT_REC_MODELr>   rE   r   __file__parentdebugsuper__init__page_num)selfkwargsparamsr   r   det_model_configdet_urlrec_model_configrec_urlZcls_model_configZcls_url	__class__rA   rN   r     sh   













zPaddleOCR.__init__Tc                 C   s  t |tjtttfsJ t |tr|dkrtd td |dkr,| j	dkr,t
d t|}t |trN| jt|ksA| jdkrFt|| _|d| j }n|g}|rz|rzg }t|D ]\}}| ||\}}	}
dd t||	D }|| q[|S |r|sg }t|D ]\}}| |\}}d	d |D }|| q|S g }g }t|D ],\}}t |ts|g}| j	r|r| |\}}}|s|| | |\}	}||	 q|s|S |S )
uY  
        ocr with paddleocr
        args：
            img: img for ocr, support ndarray, img_path and list or ndarray
            det: use text detection or not. If false, only rec will be exec. Default is True
            rec: use text recognition or not. If false, only det will be exec. Default is True
            cls: use angle classifier or not. Default is True. If true, the text with rotation of 180 degrees can be recognized. If no text is rotated by 180 degrees, use cls=False to get better performance. Text with rotation of 90 or 270 degrees can be recognized even if cls=False.
        Tz.When input a list of images, det must be falser   FzoSince the angle classifier is not initialized, the angle classifier will not be uesd during the forward processNc                 S   s   g | ]
\}}|  |gqS rA   tolist).0boxresrA   rA   rN   
<listcomp>  s    z!PaddleOCR.ocr.<locals>.<listcomp>c                 S   s   g | ]}|  qS rA   r   )r   r   rA   rA   rN   r     s    )r   r   r   listrE   r   r   r   r   r   warningr   r   r   	enumerate__call__zipappendZtext_detectorZtext_classifierZtext_recognizer)r   r   r/   r0   r1   ZimgsZocr_residxZdt_boxesZrec_res_Ztmp_resZelapseZcls_resZcls_res_tmprA   rA   rN   r;     sX   	





zPaddleOCR.ocr)TTT)__name__
__module____qualname__r   r;   __classcell__rA   rA   r   rN   r     s    <r   c                       s*   e Zd Z fddZd fdd	Z  ZS )r   c                    s  t dd}|jjdi | |jtv sJ dt|jt|j|_d|_|j	s.t
tj t|j\}}|dkr<d}nd}|jdkrFd|_td|jd	|}t|jtjtd
d	||d \|_}td|jd|}t|jtjtd
d||d \|_}	td|jd|}
t|jtjtd
d|
d \|_}td|jd|}t|jtjtd
d|d \|_}t|j| t|j|	 t|j| t|j| |jd u rttt j!|d  |_|j"d u rttt j!|
d  |_"|j#d u rttt j!|d  |_#t
$| t% &| d S )NFr   z(structure_version must in {}, but get {}r2   r    r!   r   r7   r/   r   r   r0   r8   r5   r6   r#   rA   )'rI   r   r   Zstructure_versionrG   r   r   r   moder   r   r   r   r   r   r   Zmerge_no_span_structurer   r   r   r   r   r   r   r   r   Ztable_model_dirZlayout_model_dirr   r>   rE   r   r   r   r?   r@   r   r   r   )r   r   r   r   r   Z
table_langr   r   r   r   Ztable_model_configZ	table_urlZlayout_model_configZ
layout_urlr   rA   rN   r   4  s|   













zPPStructure.__init__Fr   c                    s"   t |}t j|||d\}}|S )Nimg_idx)r   r   r   )r   r   Zreturn_ocr_result_in_tabler   r   r   r   rA   rN   r   n  s
   
zPPStructure.__call__)Fr   )r   r   r   r   r   r   rA   rA   r   rN   r   3  s    :r   c            !      C   s  t dd} | j}t|rt|d dg}nt| j}t|dkr+td| j d S | j	dkr9t
di | j}n| j	dkrGtdi | j}nt|D ]}tj|dd }td	d
|d
 | j	dkr|j|| j| j| jd}|d urtt|D ]}|| }|D ]}	t|	 qq~qK| j	dkrt|\}
}}|s|st|}
| jr| jr|rddlm} tj| j d|}||}|!| |"  td| qK|s|
d u rtd| qK||
gg}n6g }t#|
D ]/\}}tj$tj| j |dd tj| j ||d t%| d }t&|| |'||g qg }t#|D ]U\}\}}
td|d t| tj|dd }|||d}t(|| j || | jrz|g krzddl)m*} ddl+m,} |
j-\}}}||}|||}||7 }q&| jr|g krzddl+m.} ||
|| j | W n t/y } ztd|| W Y d }~qKd }~ww |D ]} | 0d | 0d t|  qtd| j  qKd S )NTr   r   r   zno images find in {}r;   r2   r   z{}{}{}z
**********r.   )	Converterz{}.docxzdocx save to {}r   )exist_okr   z.jpgzprocessing {}/{} page:   r   )deepcopy)sorted_layout_boxes)convert_info_docxz.error in layout recovery image:{}, err msg: {}r   r   zresult save to {}rA   )1rI   	image_dirr   r   r   r   r   r   r   r9   r   r   r   r   r   r   basenamesplitinfor;   r/   r0   r   ranger
   r   ZimreadZrecoveryZuse_pdf2docx_apiZpdf2docx.converterr   r   outputconvertcloser   makedirsrE   Zimwriter   r   copyr   Z$ppstructure.recovery.recovery_to_docr   r   r   	Exceptionpop)!argsr   Zimage_file_listZengineZimg_pathZimg_nameresultr   r   liner   r   r   r   Z	docx_fileZcvZ	img_pathsindexZpdf_imgZpdf_img_pathZall_resZnew_img_pathZnew_img_namer   r   hwr   Z	result_cpZresult_sortedr   exitemrA   rA   rN   mainu  s   












r   )T)Ar   r   	importlibr   dirnamer   __dir__Zpaddler   r   r   r   numpyr   pathlibr   import_moduler   r   r   Ztools.inferr   Zppocr.utils.loggingr	   r   Zppocr.utils.utilityr
   r   Zppocr.utils.networkr   r   r   r   Ztools.infer.utilityr   r   r   Zppstructure.utilityr   r   Zppstructure.predict_systemr   r   r   __all__r   VERSIONr   
expanduserr   r   rF   r   rG   r   rI   r   r   r   r   r   Z
TextSystemr   r   r   rA   rA   rA   rN   <module>   s  ?VN Q  
z)&wB