o
    e;                     @   s  d dl mZ d dlZd dlZdedefddZdd Zd	d
 Z	de
dede
fddZde
de
de
fddZdedede
fddZ		d0dejdedededef
dd Zd!ejd"ed#efd$d%Zdejd&ededefd'd(Zd1d*ejdejd+ed,ed-ef
d.d/ZdS )2    )dequeNbbox_1bbox_2c                 C   s   | \}}}}|\}}}}	|| ||  t ||t||  }
|
dkr$dS || |	|  t ||	t||  }|dkr<dS |
| S )Nr   )maxmin)r   r   x0y0x1y1u0v0u1v1wh r   ID:\Projects\ConvertPro\env\Lib\site-packages\pdf2docx/common/algorithm.pyget_area	   s   $$r   c                 C   sL   t  }g }tt| D ]}||v rqt t| |}|| || q|S )zBreadth First Search graph (may be disconnected graph).
    
    Args:
        graph (list): GRAPH represented by adjacent list, [set(1,2,3), set(...), ...]
    
    Returns:
        list: A list of connected components
    )setrangelen_graph_bfs_from_nodeappendupdate)graphZcounted_indexesgroupsiZindexesr   r   r   	graph_bfs   s   

r   c                 c   sb    t  }t }|| |r/| }||v rq|V  || | | D ]}|| q#|sdS dS )zBreadth First Search connected graph with start node.
    
    Args:
        graph (list): GRAPH represented by adjacent list, [set(1,2,3), set(...), ...].
        start (int): Index of any start vertex.
    N)r   r   r   popleftadd)r   startZsearch_queueZsearchedZcur_nodenoder   r   r   r   2   s   


r   Vnumindex_groupsc           
         s   |dk rdS t |d }| d d | |d  d | d d  | d| }| |d }tt fdd|}ttfd	d|}tt fd
d|}ttfdd|}	t||| t|	|| t||	| t||| t||| | dS )u[  Implementation of solving Rectangle-Intersection Problem.

    Performance::

        O(nlog n + k) time and O(n) space, where k is the count of intersection pairs.

    Args:
        V (list): Rectangle-related x-edges data, [(index, Rect, x), (...), ...].
        num (int): Count of V instances, equal to len(V).
        index_groups (list): Target adjacent list for connectivity between rects.
    
    Procedure ``detect(V, H, m)``::
    
        if m < 2 then return else
        - let V1 be the first ⌊m/2⌋ and let V2 be the rest of the vertical edges in V in the sorted order;
        - let S11 and S22 be the set of rectangles represented only in V1 and V2 but not spanning V2 and V1, respectively;
        - let S12 be the set of rectangles represented only in V1 and spanning V2; 
        - let S21 be the set of rectangles represented only in V2 and spanning V1
        - let H1 and H2 be the list of y-intervals corresponding to the elements of V1 and V2 respectively
        - stab(S12, S22); stab(S21, S11); stab(S12, S21)
        - detect(V1, H1, ⌊m/2⌋); detect(V2, H2, m − ⌊m/2⌋)
       Ng       @r      c                       | d d  kS Nr'   r%   r   itemXr   r   <lambda>z       z*solve_rects_intersection.<locals>.<lambda>c                    s   | d d  kS r)   r   r*   )X1r   r   r.   {   r/   c                    s   | d d  kS Nr'   r   r   r*   r,   r   r   r.   |   r/   c                    r(   r1   r   r*   )X0r   r   r.   }   r/   )intlistfilter_stabsolve_rects_intersection)
r"   r#   r$   Z
center_posleftrightZS11ZS12ZS22ZS21r   )r-   r2   r0   r   r7   X   s   ,r7   S1S2c                 C   s  | r|sdS | j dd d |j dd d d\}}|t| k r|t|k r| | \}}}|| \}}	}|d |	d k r}|}
|
t|k rx||
 d d |d k rxtt|d	 t||
 d
 d	 | |
d7 }
|
t|k rx||
 d d |d k sP|d7 }n@|}
|
t| k r| |
 d d |	d k rtt| |
 d
 d	 t|d	 | |
d7 }
|
t| k r| |
 d d |	d k s|d7 }|t| k r|t|k s&dS dS dS dS )u  Check interval intersection in y-direction.
    
    Procedure ``stab(A, B)``::
        i := 1; j := 1
        while i ≤ |A| and j ≤ |B|
            if ai.y0 < bj.y0 then
            k := j
            while k ≤ |B| and bk.y0 < ai.y1
                reportPair(air, bks)
                k := k + 1
            i := i + 1
            else
            k := i
            while k ≤ |A| and ak.y0 < bj.y1
                reportPair(bjs, akr)
                k := k + 1
            j := j + 1
    Nc                 S      | d d S Nr'   r   r*   r   r   r   r.          z_stab.<locals>.<lambda>)keyc                 S   r<   r=   r   r*   r   r   r   r.      r>   r   r   r'      r%   r   )sortr   _report_pairr3   )r:   r;   r$   r   jma_nbkr   r   r   r6      s*   $$$
$$$(r6   r   rD   c                 C   s    ||   | ||  |  dS )z add pair (i,j) to adjacent list.N)r   )r   rD   r$   r   r   r   rC      s   rC                 .@
img_binarymin_wmin_hmin_dxmin_dyc                    sJ   dt jdtdtdtdtdtdtf fdd	 g } | d
|||||d |S )a  Split image with recursive xy-cut algorithm.
    
    Args:
        img_binary (np.array): Binarized image with interesting region (255) and empty region (0).
        min_w (float): Ignore bbox if the width is less than this value.
        min_h (float): Ignore bbox if the height is less than this value.
        min_dx (float): Merge two bbox-es if the x-gap is less than this value.
        min_dy (float): Merge two bbox-es if the y-gap is less than this value.
    
    Returns:
        list: bbox (x0, y0, x1, y1) of split blocks.
    arrtop_leftresrN   rO   rP   rQ   c              
      s  |\}}| j \}	}
tj| dkdd}t|||}|sd S |\}}t||D ]f\}}| ||d|
f }tj|dkdd}t|||}|sEq%|\}}t|dkre|||d  || ||d  || f q%t||D ] \}}| ||||f }|| || f} ||||||| qjq%d S )N   r'   Zaxisr   )shapenpcount_nonzero_split_projection_profilezipr   r   )rR   rS   rT   rN   rO   rP   rQ   r   r   r   r   Z
projectionZpos_yZarr_y0Zarr_y1Zr0r1Zx_arrZpos_xZarr_x0Zarr_x1Zc0c1Zy_arrxy_cutr   r   r_      s*   
*z recursive_xy_cut.<locals>.xy_cutr@   )rR   rS   rT   rN   rO   rP   rQ   )rX   arraytupler4   float)rM   rN   rO   rP   rQ   rT   r   r^   r   recursive_xy_cut   s   rc   
arr_values	min_valuemin_gapc           
      C   s   t | |kd }t|sdS |dd |dd  }t ||kd }|| }||d  }t |d|d }t ||d }	|	d7 }	||	fS )uU  Split projection profile:

    ```
                              ┌──┐
         arr_values           │  │       ┌─┐───
             ┌──┐             │  │       │ │ |
             │  │             │  │ ┌───┐ │ │min_value
             │  │<- min_gap ->│  │ │   │ │ │ |
         ────┴──┴─────────────┴──┴─┴───┴─┴─┴─┴───
         0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
    ```

    Args:
        arr_values (np.array): 1-d array representing the projection profile.
        min_value (float): Ignore the profile if `arr_value` is less than `min_value`.
        min_gap (float): Ignore the gap if less than this value.

    Returns:
        tuple: Start indexes and end indexes of split groups.
    r   Nr'   r&   )rX   wherer   insertr   )
rd   re   rf   Z	arr_indexZarr_diffZarr_diff_indexZarr_zero_intvl_startZarr_zero_intvl_endZ	arr_startZarr_endr   r   r   rZ      s   rZ   bboxc                    s  |\}}}}t j| jt jd}| ||||f |||||f< t|tjtj\}	}
t |
ddddf dkd }t t 	|
ddddf |d }t t 	|
ddddf |d }dd  g g g }}|D ]#}t
|	| \}}}}||k s||k rqn||||| || f qn|D ]}|D ]}||krq ||r| || qq fdd	}|D ]*}t
|	| \}}}}|||| || f}||k s||k rq||rq|| q|S )
a  Inner contours of current region, especially level 2 contours of the default opencv tree hirerachy.

    Args:
        img_binary (np.array): Binarized image with intresting region (255) and empty region (0).
        bbox (tuple): The external bbox.
        min_w (float): Ignore contours if the bbox width is less than this value.
        min_h (float): Ignore contours if the bbox height is less than this value.

    Returns:
        list: A list of bbox-es of inner contours.
    Zdtyper   NrA   r&   c           
      S   s8   | \}}}}|\}}}}	||ko||ko||ko|	|kS )Nr   )
bbox1bbox2r   r   r	   r
   r   r   r   r   r   r   r   containsA  s    z inner_contours.<locals>.containsc                    s   D ]
} || r dS qdS )NTFr   )ri   Zlevel_1_bboxrm   Zres_level_1r   r   contained_in_concerned_level_1T  s   z6inner_contours.<locals>.contained_in_concerned_level_1)rX   ZzerosrW   uint8cvZfindContoursZ	RETR_TREEZCHAIN_APPROX_SIMPLErg   isinZboundingRectr   )rM   ri   rN   rO   r   r   r	   r
   rR   ZcontoursZ	hierarchyZlevel_0Zlevel_1Zlevel_2Zlevel_1_bbox_listrT   r   xyr   r   rk   rl   ro   Zlevel_2_bboxr   rn   r   inner_contours!  s:   $ $$




ru      
img_sourcegapdwdhc                 C   s:  |j \}}|ptt|d d}|ptt|d d}dtj|| | || | dftjd }| ||| || | d|ddf< tj|dkdd}t|D ]#\}	}
t|
| | }d||	| | || || t| ddf< qPtj|dkdd}t|D ]\}	}
t|
| | }d||| ||	ddf< q|S )	u  Projection profile along x and y direction.

    ```
           ┌────────────────┐
        dh │                │
           └────────────────┘
                 gap
           ┌────────────────┐ ┌───┐
           │                │ │   │
         h │     image      │ │   │
           │                │ │   │
           └────────────────┘ └───┘
                    w           dw
    ```

    Args:
        img_source (np.array): Source image, e.g. RGB mode.
        img_binary (np.array): Binarized image.
        gap (int, optional): Gap between sub-graph. Defaults to 5.
        dw (int, optional): Graph height of x projection profile. Defaults to None.
        dh (int, optional): Graph height of y projection profile. Defaults to None.

    Returns:
        np.array: The combined graph data.
    rA      rU   rj   r   Nr'   rV   )rW   r   r3   rX   Zonesrp   rY   	enumerate)rw   rM   rx   ry   rz   r   r   rR   valsr   valcrr   r   r   xy_project_profilec  s   
*&0r   )rK   rK   rL   rL   )rv   NN)collectionsr   numpyrX   Zcv2rq   ra   r   r   r   r4   r3   r7   r6   rC   r`   rb   rc   rZ   ru   r   r   r   r   r   <module>   s.    &1+

3*(B