o
    e<                     @   s   d Z ddlmZ ddlmZ ddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZ dd
lmZmZ ddlmZ G dd dZdS )a=  Parsing table blocks.

* ``lattice table``: explicit borders represented by strokes.
* ``stream table`` : borderless table recognized from layout of text blocks.

Terms definition:

* From appearance aspect, we say ``stroke`` and ``fill``, the former looks like a line, 
  while the later an area.
* From semantic aspect, we say ``border`` (cell border) and ``shading`` (cell shading).
* An explicit border is determined by a certain stroke, while a stroke may also represent 
  an underline of text.
* An explicit shading is determined by a fill, while a fill may also represent a highlight 
  of text.
* Border object is introduced to determin borders of stream table. Border instance is a 
  virtual border adaptive in a certain range, then converted to a stroke once finalized, 
  and finally applied to detect table border.
   )	constants)Element)
Collection)Blocks)Shapes)Lines   )TableStructure)BorderBorders)Cellc                	   @   s   e Zd ZdZdd ZdededefddZdeded	efd
dZede	fddZ
ede	dededefddZedd Zede	defddZdS )TablesConstructorz7Object parsing ``TableBlock`` for specified ``Layout``.c                 C   s   || _ |j| _|j| _d S N)_parentblocks_blocksZshapes_shapes)selfparent r   PD:\Projects\ConvertPro\env\Lib\site-packages\pdf2docx/table/TablesConstructor.py__init__$   s   zTablesConstructor.__init__connected_border_tolerancemin_border_clearancemax_border_widthc                 C   s   | j sdS dtfdd}| j jj||d}||}| j j}t }||d}|D ] }	||	j}
t|	fi |	|

 }|rG|  || q'| j| | j | dS )ac  Parse table with explicit borders/shadings represented by rectangle shapes.

        Args:
            connected_border_tolerance (float): Two borders are intersected if the gap lower than this value.
            min_border_clearance (float): The minimum allowable clearance of two borders.
            max_border_width (float): Max border width.
        N	instancesc                 S   s`   dd }t | |}g }|D ]}t|dkr|d }nt|dd d}|d }|| q|S )z4Delete group when it's contained in a certain group.c                 S   s   | j |j p|j | j S r   )bboxcontains)abr   r   r   <lambda>:   s    zJTablesConstructor.lattice_tables.<locals>.remove_overlap.<locals>.<lambda>r       c                 S   s
   | j  S r   )r   Zget_area)instancer   r   r   r    D   s   
 )key)r   grouplensortedappend)r   ZfungroupsZunique_groupsZgroup_instancesr"   Zsorted_groupr   r   r   remove_overlap7   s   
z8TablesConstructor.lattice_tables.<locals>.remove_overlap)ZdxZdyr   r   )r   listtable_strokesZgroup_by_connectivitytable_fillingsr   contained_in_bboxr   r	   parseto_table_blockZset_lattice_table_blockr(   r   assign_to_tables)r   r   r   r   r*   Zgrouped_strokesZfillstablessettingsstrokesZgroup_fillstabler   r   r   lattice_tables*   s(   

z TablesConstructor.lattice_tablesline_separate_thresholdc                    s  j j}j j}j||}jj\} } fdd}	t }
||d}|D ]}|s.q)tdd |D }tdd |D }t	dd |D }t	dd |D }|	||\}}||||f}||||f}t
||}t |}||j}|j|jtjd	\}}|s|st
|rq)||||}|sq)|  t|fi || }tjtr|j|j d
kr|d d jdu rq)|  |
| q)j|
 j |
 dS )zParse table with layout of text/image blocks, and update borders with explicit borders 
        represented by rectangle shapes.

        Refer to ``lattice_tables`` for arguments description.
        c                    sP    }}j D ]}|jj| k r|jj}|jj|kr#|jj} ||fS q||fS )a  find the vertical boundaries of table in y-range [y0, y1]:
                - the bottom of block closest to y0
                - the top of block closest to y1

                ```
                +-------------------------+  <- Y0

                +--------------+
                +--------------+  <- y_lower

                +------------------------+  <- y0
                |         table          |
                +------------------------+  <- y1

                +-------------------------+ <- y_upper
                +-------------------------+

                +---------------------------+ <- Y1
                ```
            )r   r   y1y0)r:   r9   Zy_lowerZy_upperblockY0Y1r   r   r   top_bottom_boundaries}   s   

z>TablesConstructor.stream_tables.<locals>.top_bottom_boundariesr+   c                 S      g | ]}|j jqS r   )r   x0.0rectr   r   r   
<listcomp>       z3TablesConstructor.stream_tables.<locals>.<listcomp>c                 S   r@   r   )r   r:   rB   r   r   r   rE      rF   c                 S   r@   r   )r   x1rB   r   r   r   rE      rF   c                 S   r@   r   )r   r9   rB   r   r   r   rE      rF   )	thresholdr   r!   N) r   r-   r.   r   Zcollect_stream_linesr   r   r   minmaxr   _outer_bordersr   Zupdate_bboxr/   Zsplit_with_intersectionr   FACTOR_A_FEW_is_simple_structure_stream_strokesZsort_in_reading_orderr	   r0   r1   
isinstancer   Znum_colsZnum_rowsZbg_colorZset_stream_table_blockr(   r2   )r   r   r   r8   r-   r.   Ztables_linesX0X1r?   r3   r4   Ztable_linesrA   r:   rG   r9   Z	y0_marginZ	y1_margin
inner_bbox
outer_bboxouter_bordersrD   explicit_strokesexplicit_shadings_r5   r6   r   r<   r   stream_tablesj   sJ   !"zTablesConstructor.stream_tableslinesc                 C   s<   t |  }|dkrdS |dkrt |  t |  kS dS )zWhether current lines represent a simple table:        
        * only one column -> always flow layout in docx; or
        * two columns: lines are aligned in each row -> simple paragraph in docx
        r   Tr   F)r&   group_by_columnsZgroup_by_physical_rowsgroup_by_rows)rY   numr   r   r   rM      s   z&TablesConstructor._is_simple_structurerT   rU   rV   c                 C   sT   t  }|| t| |}|| ||| t }|D ]	}||  q|S )av  Parsing borders mainly based on content lines contained in cells, 
        and update borders (position and style) with explicit borders represented 
        by rectangle shapes.
        
        Args:
            lines (Lines): lines contained in table cells.
            outer_borders (tuple): Boundary borders of table, ``(top, bottom, left, right)``.
            explicit_strokes (Shapes): Showing borders in a stream table; can be empty.
            explicit_shadings (Shapes): Showing shadings in a stream table; can be empty.
        
        Returns:
            Shapes: Parsed strokes representing table borders.
        )r   extendr   _inner_bordersfinalizer   r(   Z	to_stroke)rY   rT   rU   rV   bordersZinner_bordersresborderr   r   r   rN      s   

z!TablesConstructor._stream_strokesc                 C   s   | \}}}}|\}}}}	t d||fdd}
t d||	fdd}t d||fdd}t d||fdd}|
||f |||f ||
|f ||
|f |
|||fS )ai  Initialize outer Border instances according to lower and upper bbox-es.

        ::
            +--------------------------------->
            |
            | Y0 +------------------------+     + outer bbox
            |    |                        |     |
            |    | y0+----------------+   |     |
            |    |   |                |   +<----+
            |    |   |                +<--------+ inner bbox
            |    | y1+----------------+   |
            |    |   x0               x1  |
            | Y1 +------------------------+
            |    X0                       X1
            v
        ZHTF)border_range	referenceZHBZVLZVR)r
   Zset_boundary_borders)rR   rS   rA   r:   rG   r9   rP   r=   rQ   r>   topbottomleftrightr   r   r   rK     s   z TablesConstructor._outer_bordersc              	   C   st  |   }dd |D }t|}|dk}|dkr|D ]}d|_qt }d}|\}	}
}}t|D ]}|dkr6|n|}||d krA|}n|| jj}||d  jj}td||f|	|
fdd	}|	| || }t|}|dkrmq.d}t|D ]C}|dkr{|	n|}||d kr|
}n|| jj
}||d  jj}td
||f||f|d	}|	| t|| ||||f}|| qsq.|S )a  Calculate the surrounding borders of given ``lines``. These borders construct table cells. 

        Two purposes of stream table: 

        * Rebuild layout, e.g. text layout with two columns, and
        * parsing real borderless table.

        It's controdictory that the former needn't to deep into row level, just ``1xN`` table 
        convenient for layout recreation; instead, the later should, ``MxN`` table for each 
        cell precisely. So, the principle determining stream tables borders:

        * Vertical borders contributes the table structure, so ``border.is_reference=False``.
        * Horizontal borders are for reference when ``n_column=2``, in this case ``border.is_reference=True``.
        * During deeper recursion, h-borders become outer borders: it turns valuable when count 
          of detected columns >= 2.
        
        Args:
            lines (Lines): Lines in table cells.
            outer_borders (tuple): Boundary borders of table region.
        c                 S   s   g | ]	}|j tjd qS ))factor)r[   r   rL   )rC   Z	col_linesr   r   r   rE   >  s    z4TablesConstructor._inner_borders.<locals>.<listcomp>r   FNr!   r   ZVI)Zborder_typerc   r`   rd   ZHI)rZ   r&   is_referencer   ranger   rG   rA   r
   r(   r9   r:   r   r^   r]   )rY   rT   Z
cols_linesZgroup_linesZcol_numrj   rb   r`   rh   ZTOPZBOTTOMLEFTRIGHTirg   rA   rG   Z
rows_linesZrow_numrf   jre   r:   r9   Zborders_r   r   r   r^   &  sP   


z TablesConstructor._inner_bordersN)__name__
__module____qualname____doc__r   floatr7   rX   staticmethodr   rM   tupler   rN   rK   r^   r   r   r   r   r   !   s2    
@
h"
!r   N)rs   commonr   Zcommon.Elementr   Zcommon.Collectionr   Zlayout.Blocksr   Zshape.Shapesr   Z
text.Linesr   r	   r
   r   r   r   r   r   r   r   <module>   s   