o
    Qe                     @   s  d dl Z d dlZd dlZddlmZmZmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ d d	lmZmZ dd
lmZmZmZ ddlmZ ddlmZmZmZ d dlZd dlZd dlZd dl m!Z! g Z"dd Z#dd Z$dd Z%dd Z&dd Z'dd Z(dd Z)dd Z*dd Z+d d! Z,d"d# Z-d$d% Z.d&d' Z/d(d) Z0G d*d+ d+Z1d,d- Z2d.d/ Z3d0d1 Z4d2d3 Z5d4d5 Z6d6d7 Z7d8d9 Z8d:d; Z9dS )<    N   )dotmatmul	transpose)squeeze	unsqueezereshape)multiply)sum   )_in_legacy_dygraph)_C_ops_legacy_C_ops)check_variable_and_dtype
check_typecheck_dtype)LayerHelper)_non_static_modein_dygraph_moder   )dygraph_onlyc                 C   s   |  ddD ]}| sJ d| dq|  dddddks%J dt|j}|d	ks0J |  dd|t|  d
  }t||ksLJ d|  d|S )a  
    Parse labels for an input operand.

    Parameters
    ----------
    labelstr:
        the input label string
    operand:
        the input operand

    Returns
    -------
    the input operand's full label string in which all anonymous dimensions are 
    labeled in dots. 
    . zInvalid equation: z/ is not a valid label, which should be letters....r   z6Invalid equation: `.` is found outside of an ellipsis.r      z$Invalid equation: the label string 'z' misses dimensions.)replaceisalphafindlenshape)labelstroperandcZndimsZfull_labelstr r#   DD:\Projects\ConvertPro\env\Lib\site-packages\paddle/tensor/einsum.pyparse_op_labels%   s   



r%   c                 C   sH   |  d}t|t|ksJ dt| dt| dttt||S )a  
    Parse label strings for all input operands.
    
    Parameters
    ----------
    labelstr:
        The equation's label string
    operands:
        The input operands
    
    Returns
    -------
    list of full label strings for all input operands
    ,,Invalid equation: the number of operands is , but found   segments in the label equation.)splitr   listmapr%   )r    operands
nop_labelsr#   r#   r$   parse_labelsJ   s   
r/   c                 C   sv   |dkrd| v sJ d|  dd} t| }d|vsJ ||}|r-J dt| dt| t|ks9J dd	S )
z@
    Check whether the equation's right hand side is valid 
    r   r   4Invalid equation: missing ellipsis in output labels.r   r   zInvalid equation: output label z not used by any input.4Invalid equation: duplicate output labels are found.N)r   set
differencesortedr   )rhsZinput_labelsn_bcast_dimsZrhs_setZnon_input_labelsr#   r#   r$   validate_rhsb   s    

r7   c                 C   s   dgt | }td|}|rB| | }}td| }|rBtt||ddd t| | ddd D ]\}}|||< q9|rRtt|t|t |}	nt	tt |}	|	D ]}
| 
||
 ||
< q\|S )a@  
    Build an inverse map of dimension indices. Three conditions must hold for 
    the result to be meaningful. 
    First, no duplicate letter labels in each label string.
    Second, the number of dots in dimout_labels >= that in in_labels.
    Third, dots are contiguous in each label string.

    Parameters
    ----------
    in_labels:
        The dimension labels to map to
    out_labels:
        The dimension labels to map from
    
    Returns
    -------
    The inverse map from out_labels to in_labels. The length of the inverse map equals that of
    out_labels. -1 is filled if there's no matching intput dimension for a specific label.

    Examples
    --------
    in_labels = 'ij..', out_labels = '..ji'
    inv_map = [2, 3, 1, 0]
    in_labels = 'ij..', out_labels = '..kji'
    inv_map = [2, 3, -1, 1, 0]
    r   z\.+N)r   researchstartendziprange	itertoolschainiterr   )Z	in_labelsZ
out_labelsZinv_maprr:   r;   saxdimitir#   r#   r$   
build_view{   s"   
rG   c                    s,  t d| dd}g g }}tdg| |D ]\}}||kr+|| |d q|d  d7  < q|dkrGt||| |dd| }nd| ddd t||D  }tt|ddd D ]}	||	 |v ru||	 ||	 qcd|}
||
  t	t
 fd	d
| }t|}|} |||fS )a1  
    Build the global view, which is a layout of all dimension labels
    plus an index table that maps from the layout to the dimensions
    in each operand. In the global view, the dimensions are arranged
    such that output ones are put on the left and contraction ones
    are put on the right.  

    Parameters
    ----------
    nop_labels:
        The input full label strings of all input operands
    rhs:
        The equation right hand side
    n_bcast_dims:
        The maxium number of broadcast dimensions
    
    Returns
    -------
    A tuple of g_labels, g_view, g_nout, g_count
    g_labels:
        the layout of all labels in a string
    g_view:
        the index table
    g_nout:
        the number of output dimensions
    g_count:
        the counter array for dimension contractions
    r   r   r   r   Nr   c                 s   s     | ]\}}|d kr|V  qdS )r   Nr#   ).0lr"   r#   r#   r$   	<genexpr>   s    z$build_global_view.<locals>.<genexpr>c                    s
   t |  S N)rG   )rF   g_labelsr#   r$   <lambda>      
 z#build_global_view.<locals>.<lambda>)r4   joinr   r<   appendr7   r=   r   popr+   r,   )r.   r5   r6   concatlabelscountabZg_labels_outrF   Zg_labels_sumg_viewg_noutg_countr#   rL   r$   build_global_view   s0   





r[   c                    s   g }g }t | |D ]\} | fdd|D  q	dd t | D }dd t|D }|r:J d||d   ddd |D }d	d |D }||fS )
a  
    The global shape is the shape of all dimensions rearranged and broadcasting 
    to the global view. It's a reference data structure for einsum planning.

    Parameters
    ----------
    g_view:
        the global view
    op_shapes:
        the shapes of the all operands

    Returns
    -------
    g_shape:
        the global shape vector
    g_masks:
        list of shape masks for each operand. A dimension's shape mask is a boolean
        indicating whether its size > 1, in other words, it's not squeezable
    c                    s    g | ]}|d kr | ndqS )r   r   r#   rH   rD   Zop_shaper#   r$   
<listcomp>       z&build_global_shape.<locals>.<listcomp>c                 S   s   g | ]	}t |d h qS r   )r2   )rH   Zsizes_per_axr#   r#   r$   r^     s    c                 S   s    g | ]\}}t |d kr|qS r`   )r   )rH   rC   sizesr#   r#   r$   r^   
  r_   zInvalid operands: label r   z- corresponds to non-broadcastable dimensions.c                 S   s$   g | ]}t |d kr| ndqS )r   r   )r   rR   )rH   ra   r#   r#   r$   r^     s   $ c                 S   s   g | ]	}d d |D qS )c                 S   s   g | ]
}|d kp|dkqS )r   r   r#   )rH   rB   r#   r#   r$   r^         z1build_global_shape.<locals>.<listcomp>.<listcomp>r#   )rH   Z
view_shaper#   r#   r$   r^     s    )r<   rQ   	enumerate)rX   rM   Z	op_shapesZview_shapesZg_masksviewg_shapeZnon_bcastabler#   r]   r$   build_global_shape   s   rf   c                 C   s    |  dd} t| tt| kS )z7
    Returns True if there is any duplicate label.
    r   r   )r   r   r2   )rT   r#   r#   r$   has_duplicated_labels  s   rg   c                 C   s   t | rJ d| |fS )aE  
    Merges dimensions with duplicate labels. 
    
    For those dimensions with duplicate labels, merge them into one dimension
    which represents the diagonal elements. This requires the dimensions with
    duplicate labels are equal sized.
    
    Examples
    -------- 
    'ijj...i' would be merged into 'ij...'
    #Duplicate labels are not supported.)rg   )rT   r!   r#   r#   r$   diagonalize   s   
ri   c                    s2   d| } fdd}||g||f}|  | dS )z 
    Add reduce to the plan
    opc                    s   t | | dS )Nkeepdim
paddle_sum)vardimsrk   r#   r$   rN   8  s    zplan_reduce.<locals>.<lambda>Nadd_step)planrj   reduce_dimsrl   varnamefstepr#   rk   r$   plan_reduce2  s   
rx   c                 C   s8   d| d| g}dd }|||d f}|  | d S )Nrj   c                 S   s   t | | S rK   rm   )var1var2r#   r#   r$   rN   ?  s    z"plan_scalar_prod.<locals>.<lambda>r   rq   )rs   op1op2varnamesrv   rw   r#   r#   r$   plan_scalar_prod=  s   r~   c
           "         sx  d| d| }
}fdd||fD \fdd|D }fdd|D }t || |	  }t || |	  } fdd||fD \}}t dd t||D }t dd t||D }t ||}tt|||||	f\}}}}}t|t t|krt|
g|
t	|f}| 
| t|t t|krt|g|t	|f}| 
| || d	kr2|d	kr2d
t ||fvr2t	|| t || g t ||	 g }t	|| t || g t ||	 g }t|
g|
|f}| 
| t|g||f}| 
| t|
|g|ddf}| 
| t	||| |  }t|g||f}| 
| n<||  krD|  krDdkrVn nt|
|g|ddf}| 
| n|rrt	t|| || | }t|
g|
|f}| 
| |rt	t||| }t|g||f}| 
| |d	krt|
|g|f}| 
| n|| d	kr|dkrt|
g|
dgf}| 
| t|g|d
gf}| 
| t|
|g|f}| 
| t|g|d
dgf}| 
| n|| d	krRd
t ||	 ||	 fvrRt||	 ||	 ksJ t|
g|
t	|| dg t ||	 g f}| 
| t|g|t	|| dg t ||	 g f}| 
| t|
|g|ddf}| 
| t|g|d
dgf}| 
| nt|
|g|f}| 
| t	t| d	}t| ||dd || | D ]} ||  dkp||  d
k|| < qt|	D ]} d|| < qttD ]} d
| < qd	}!|| | D ]} |!|!d | < }!qt	|< dS )z
    plan matmul
    rj   c                       g | ]} | qS r#   r#   rH   rj   )rX   r#   r$   r^   M      zplan_matmul.<locals>.<listcomp>c                       g | ]
} | d kr|qS r   r#   rH   idx)op1_viewr#   r$   r^   O  rb   c                    r   r   r#   r   )op2_viewr#   r$   r^   P  rb   c                    r   r#   r#   r   )
g_supportsr#   r$   r^   W  r   c                 S      g | ]
\}}|r
|nd qS r`   r#   rH   rB   mr#   r#   r$   r^   X  rb   c                 S   r   r`   r#   r   r#   r#   r$   r^   Y  rb   r   r   FTr   rk   N)nparrayr<   maximumr,   r   anyZaranger   r+   rr   Zconcatenateprodr   r   r=   r   r	   r   allrx   )"rs   rX   r{   r|   r   re   IJ1J2Kry   rz   ZI1ZI2Zop1_dimsZop2_dimsop1_maskop2_maskZ
op1_vshapeZ
op2_vshapeZvshapei1i2Zj1Zj2krw   Z	op1_shapeZ	op2_shaper   fillrt   rC   rD   r#   )r   rX   r   r   r$   plan_matmulE  s   






(





"
"


"r   c                 C   sf  || || }}	|| || }
}t |}|t | }dg| | }tt|g g g f\}}}}tt||||d |	|d D ]W\}}}|dk|dkkr_|dkrY|| qB|| qB|dkrt|
| t||  }||kr||| kr|| ||  |8  < qB|| ||  t|d d8  < qB||d |dd< t| |||||||||
 dS )z)
    Plan various kinds of summation
    r   Nr   r   )r   r+   r=   r<   rQ   intmaxr   )rs   rX   r{   r|   r   re   rZ   n_bcastr   r   r   r   ndimnoutrU   r   r   r   r   rC   Zdim1Zdim2foldr#   r#   r$   plan_summation  s.   


r   c                 C   s\   g g }}t | D ]\}}|dk r|| q	|| q	tdd t |D r*g }||fS )Nr   c                 s   s    | ]	\}}||kV  qd S rK   r#   )rH   rF   rD   r#   r#   r$   rJ         zrearrange.<locals>.<genexpr>)rc   rQ   r   )Zaxespermr   rC   rD   r#   r#   r$   	rearrange  s   
r   c                    s   t |}dd t|D  tt||D ]*\}}t|\}} | }|r0t|g||f}	| |	 |r>t|g||f}	| |	 q fdd}
|
 df}	| |	 dS )z
    Plan broadcast across
    c                 S      g | ]}d | qS rj   r#   rH   rF   r#   r#   r$   r^          z"plan_broadcast.<locals>.<listcomp>c                     s   d  }t|tt | S )Nz * )rP   evaldictr<   )argsexprr}   r#   r$   rv     s   
zplan_broadcast.<locals>.fN)r   r=   r<   r   r   rr   r   )rs   r-   Znop_axesnoprF   Zop_axesr   r   ro   rw   rv   r#   r   r$   plan_broadcast  s   


r   c                   @   s<   e Zd Zdd Zdd Zdd Zdd Zd	d
 Zdd ZdS )Planc                 C   s   i | _ g | _d S rK   )envsteps)selfr#   r#   r$   __init__  s   
zPlan.__init__c                 C   s   | j | d S rK   )r   rQ   )r   rw   r#   r#   r$   rr     s   zPlan.add_stepc                 C   s   || j v r
| j | S d S rK   r   )r   ru   r#   r#   r$   get_var  s   zPlan.get_varc                 C   s   || j |< d S rK   r   )r   ru   ro   r#   r#   r$   set_var!  s   zPlan.set_varc                 C   s6   d }| j D ]^}}}}tt||g||R  q|S rK   )r   printreprr   resrv   Zin_varnamesZout_varnamer   r#   r#   r$   show$  s   z	Plan.showc                 C   sF   d }| j D ]^}}}}|g t| j||R  }|r | || q|S rK   )r   r,   r   r   r   r#   r#   r$   execute*  s   zPlan.executeN)	__name__
__module____qualname__r   rr   r   r   r   r   r#   r#   r#   r$   r     s    r   c              
   C   sJ  t | }t |d }|t | }t }	dd t|D }
tt|	j|
|  |s/t|	| | |	S t||D ]'\}}dd t||d ||d D }t|D ]\}}||  |8  < qNq4tt|||D ]`\}}}g }t||d ||d |D ]\}}}|	|r|dkr|nd qytt
dd	 |}|rt|	||d
d t|D ] \}}|| }|| o|dk||< ||  |dkrdnd8  < qqdt|D ]%}|dkrqt||d  st|	|d | qt|	||d ||||| qtdd ||d  |d D sJ |d }tdd t|d| D rdd |D }t||kr8d|d  }t|g||f}|	| d}g }t|D ]\}}|dkrR||d ||< }q@t|d| D ]\}}|dkrj|	| q\|rd|d  }t|g||f}|	| dd ||d D }|rd|d  }t|g||f}|	| |	S )zZ
    Plans the actual execution steps.
    Results
    -------
    the execution plan
    r   c                 S   r   r   r#   r   r#   r#   r$   r^   @  r   zplan_einsum.<locals>.<listcomp>c                 S   s"   g | ]\}}t |d  o| qS r`   )r   )rH   drB   r#   r#   r$   r^   K  s    Nr   r   c                 S   s   | dkS )Nr   r#   )xr#   r#   r$   rN   X  s    zplan_einsum.<locals>.<lambda>Trk   c                 s   s    | ]}| V  qd S rK   r#   )rH   maskedr#   r#   r$   rJ     s    zplan_einsum.<locals>.<genexpr>c                 s   s    | ]	\}}||kV  qd S rK   r#   )rH   rC   rD   r#   r#   r$   rJ     r   c                 S   s   g | ]}|d kr|qS r   r#   r\   r#   r#   r$   r^         rj   c                 S   s   g | ]}|d kr|qS )r   r#   r\   r#   r#   r$   r^     r   )r   r   r=   r+   r,   r   r   r<   rc   rQ   filterrx   r   r~   r   r   r4   r   rr   r   r   )r-   rX   re   r   rZ   r   r   r   r   rs   Zop_namesrd   ZsupportZ
down_countrF   rU   maskZ	to_reducerD   r   rt   r   rC   r   ru   rw   Zunsqueeze_dimsZsqueeze_dimsr#   r#   r$   plan_einsum3  s   &( 





r   c                 G   s  |  dd} t|}|dksJ d| |  d^}}t|dk s'J dt||}|r2|d nd}|du r<t|}t|d	t|ksYJ d
t| dt|d	 dd|v red|vreJ dtttt|d	dkrwJ dt|rJ d|||fS )zG
    check equation / raise error, default right labels generation
     r   r   z=Required at least one operand in Einsum API, but received %s ->r   +Invalid equation: multiple `->` were found.Nr&   r'   r(   r)   r   r0   rh   r1   )	r   r   lowerr*   r/   rhs_inferencer+   r   rg   )equationr-   r   lhsr5   rT   r#   r#   r$   
preprocess  s8   

r   c                    s.   t ddg  fdd}tt|||}|S )z 
    this shape is just used for operands planning. may differ with the original shape.
    for example: 
    ... is replaced by 1
    -1  is replaced by 1
    Results
    -------
    list of shape
    shapedr   c                    sv   t |jt | ksJ dt |jt | f dd tt| |jD }ttt|}d| v r7|| dd  |S )NzKlength of shape and length of label must be the same, but received %d != %dc                 S   s    g | ]\}\}}|d kr|qS )r   r#   )rH   rF   rI   rB   r#   r#   r$   r^     r_   z8parse_fake_shape.<locals>.fake_shape.<locals>.<listcomp>r   r   )	r   r   rc   r<   r+   r,   absinsertindex)labelrj   Zfakesr   r#   r$   
fake_shape  s   
z$parse_fake_shape.<locals>.fake_shape)collections
namedtupler+   r,   )r   r-   rT   r   outr#   r   r$   parse_fake_shape  s   
r   c                    sF    fdd}t |  d| v rdnd}|dt|t   }|S )Nc                    s     | dko
| dvS )Nr   )r   r&   )get)keycntr#   r$   is_free  s   zrhs_inference.<locals>.is_freer   r   )r   CounterrP   r   r4   elements)r   r   r5   r#   r   r$   r     s
   
r   c                 C   sR   dd }t | }||}|du rt| }| d|} |d|}| d | |fS )z8 
    1. gen rhs if rhs is None
    2. '...' -> 'A'
    c                 S   s0   t |  }tjD ]
}||vr|  S q	td)NzNYou have used all `a` - `z`, there can't find a unused for einsum optimization)r2   r   stringascii_lowercase
ValueError)counterusedr"   r#   r#   r$   get_used_label  s   
z2gen_equation_for_opteinsum.<locals>.get_used_labelNr   r   )r   r   r   r   )r   r5   r   r   broadcast_labelr#   r#   r$   gen_equation_for_opteinsum  s   
r   c                 G   s   t |}t| g|R  \}}}|dkrt|d | g|R  S t|||}t||\}}tj|g|R ddi\}	}
t|}|
D ]-}|^\}}}	}}||ksRJ d||||g}|	|d}|
t|g|R   q@t |dks|J dt | |d	 S )
z 
    einsum v2 implementation.
    1. Implement C++ EinsumOp.
    2. V2 create the EinsumOp to calculate, so just a little verifty work in python.
    3. V2 use opt_einsum.contract_path to optimize the multivariable einsum.
    r   r   Zeinsum_callTzUAssume the first var_idx is smaller than the second_idx. opt_einsum can guarantee it.r   r   z4There must be one elements in list, but received %d.r   )r   r   gen_einsum_opr   r   
opt_einsumZcontract_pathr+   rR   r   rQ   )r   r-   Zn_opr   r5   rT   ZshapesZopt_equationr   _ZconsZvar_listpathrV   rW   eq__Zvar_sr#   r#   r$   	einsum_v2  s0   r   c                    s  t dks
J dt rt| d S t r'tt t d| d S D ]}t|dddgd q)t| dtd t	di t
   jd jd	}t }| |d<  fd
dtt D } fddtt D } jddi|||d|d |S )z& 
    EinsumOp Python Interface: 
    r   z&Only support two operands in EinsumOp.r   r   dtypeZfloat32Zfloat64einsumr   c                       g | ]} j d  jdqS r   r   "create_variable_for_type_inferencer   r   helperr-   r#   r$   r^   4      z!gen_einsum_op.<locals>.<listcomp>c                    r   r   r   r   r   r#   r$   r^   8  r   ZOperands)ZOutZ
InnerCacheZXShape)typeZinputsZoutputsattrsN)r   )r   r   r   r   r   r   r   r   strr   localsr   r   r   r=   Z	append_op)r   r-   inpr   r   cachesZxshaper#   r   r$   r      s>   

r   c                 G   s  ddl }t|jddrt| g|R  S t|}|dks!J d|  ddd^}}t|d	k s8J d
|r>|d nd}t	||}t
ttt|| \}}ttdd |}t|||\}}	}
}t|	|dd |D \}}||	||||f}t| }| }|S )a_  
    einsum(equation, *operands)

    The current version of this API should be used in dygraph only mode.

    Einsum offers a tensor operation API which allows using the Einstein summation
    convention or Einstain notation. It takes as input one or multiple tensors and
    produces as output one tensor.

    Einsum is able to perform a variety of tensor operations. Following lists a few:

        - for single operand
            - trace
            - diagonal
            - transpose
            - sum
        - for double operands
            - dot
            - outer
            - broadcasting and elementwise multiply
            - matrix multiply
            - batched matrix multiply
        - for many operads
            - broadcasting multiply
            - chained matrix multiply
    
    **The summation notation**

        - The tensor dimensions are labeled using uncased English letters. E.g., `ijk`
        relates to a three dimensional tensor whose dimensions are labeled i, j, and k.
        - The equation is `,` separated into terms, each being a distinct input's
        dimension label string.
        - Ellipsis `...` enables broadcasting by automatically converting the unlabeled
        dimensions into broadcasting dimensions. 
        - Singular labels are called free labels, duplicate are dummy labels. Dummy labeled
        dimensions will be reduced and removed in the output.
        - Output labels can be explicitly specified on the right hand side of `->` or omitted.
        In the latter case, the output labels will be inferred from the input labels.
            - Inference of output labels
                - Broadcasting label `...`, if present, is put on the leftmost position.
                - Free labels are reordered alphabetically and put after `...`.
            - On explicit output labels
                - If broadcasting is enabled, then `...` must be present.
                - The output labels can be an empty, an indication to output as a scalar
                the sum over the original output.
                - Non-input labels are invalid.
                - Duplicate labels are invalid.
                - For any dummmy label which is present for the output, it's promoted to
                a free label.
                - For any free label which is not present for the output, it's lowered to
                a dummy label.
        - Examples
            - '...ij, ...jk', where i and k are free labels, j is dummy. The output label
            string is '...ik'
            - 'ij -> i', where i is a free label and j is a dummy label. 
            - '...ij, ...jk -> ...ijk', where i, j and k are all free labels.
            - '...ij, ...jk -> ij', an invalid equation since `...` is not present for
            the output.

    **The summation rule**

    The summation procedure can be outlined as follows, although the actual steps taken
    may vary significantly due to implementation specific optimization.

        - Step 1: preparation for broadcasting, that is, transposing and unsqueezing
        the input operands to have each resulting dimension identically labeled across
        all the input operands.
        - Step 2: broadcasting multiply all the resulting operands from step 1.
        - Step 3: reducing dummy labeled dimensions.
        - Step 4: transposing the result tensor to match the output labels.

    **On trace and diagonal**

    The trace and diagonal are planned yet unimplemented features. 

    Args:
        equation (`str`):
            The summation terms using the Einstein summation notation.
        operands (`list|Tensor`):
            The input tensors over which to compute the Einstein summation. The number of
            operands should equal the number of input terms in the equation.
    
    Returns:
        result (`Tensor`): the result tensor.
    
    Examples:
        .. code-block:: python

        import paddle
        paddle.seed(102)
        x = paddle.rand([4])
        y = paddle.rand([5])

        # sum
        print(paddle.einsum('i->', x))
        # Tensor(shape=[], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
        #   1.95791852)

        # dot
        print(paddle.einsum('i,i->', x, x))
        # Tensor(shape=[1], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
        #   [1.45936954])
        
        # outer
        print(paddle.einsum("i,j->ij", x, y))
        # Tensor(shape=[4, 5], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
        #   [[0.00079869, 0.00120950, 0.00136844, 0.00187187, 0.00192194],
        #    [0.23455200, 0.35519385, 0.40186870, 0.54970956, 0.56441545],
        #    [0.11773264, 0.17828843, 0.20171674, 0.27592498, 0.28330654],
        #    [0.32897076, 0.49817693, 0.56364071, 0.77099484, 0.79162055]])
        
        A = paddle.rand([2, 3, 2])
        B = paddle.rand([2, 2, 3])
        
        # transpose
        print(paddle.einsum('ijk->kji', A))
        #  Tensor(shape=[2, 3, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
        #   [[[0.95649719, 0.49684682],
        #     [0.80071914, 0.46258664],
        #     [0.49814570, 0.33383518]],
        #
        #    [[0.07637714, 0.29374704],
        #     [0.51470858, 0.51907635],
        #     [0.99066722, 0.55802226]]])
        
        # batch matrix multiplication
        print(paddle.einsum('ijk, ikl->ijl', A,B))
        # Tensor(shape=[2, 3, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
        #   [[[0.32172769, 0.50617385, 0.41394392],
        #     [0.51736701, 0.49921003, 0.38730967],
        #     [0.69078457, 0.42282537, 0.30161136]],
        #
        #    [[0.32043904, 0.18164253, 0.27810261],
        #     [0.50226176, 0.24512935, 0.39881429],
        #     [0.51476848, 0.23367381, 0.39229113]]])
        
        # Ellipsis transpose
        print(paddle.einsum('...jk->...kj', A))
        # Tensor(shape=[2, 2, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
        #   [[[0.95649719, 0.80071914, 0.49814570],
        #     [0.07637714, 0.51470858, 0.99066722]],
        #
        #    [[0.49684682, 0.46258664, 0.33383518],
        #     [0.29374704, 0.51907635, 0.55802226]]])
        
        # Ellipsis batch matrix multiplication
        print(paddle.einsum('...jk, ...kl->...jl', A,B))
        # Tensor(shape=[2, 3, 3], dtype=float32, place=CUDAPlace(0), stop_gradient=True,
        #   [[[0.32172769, 0.50617385, 0.41394392],
        #     [0.51736701, 0.49921003, 0.38730967],
        #     [0.69078457, 0.42282537, 0.30161136]],
        #
        #    [[0.32043904, 0.18164253, 0.27810261],
        #     [0.50226176, 0.24512935, 0.39881429],
        #     [0.51476848, 0.23367381, 0.39229113]]])
    r   NZFLAGS_new_einsum1z!At least one operand is expected.r   r   r   r   r   c                 S   s
   |  dS )Nr   )rU   )rB   r#   r#   r$   rN     rO   zeinsum.<locals>.<lambda>c                 S   s   g | ]}|j qS r#   )r   r   r#   r#   r$   r^     s    zeinsum.<locals>.<listcomp>)osr   environr   r   r   r   r   r*   r/   r+   r<   r,   ri   r   r[   rf   r   r   )r   r-   r  r   r   r5   r.   r6   rM   rX   rY   rZ   re   r   r   rs   resultr#   r#   r$   r   G  s,    
r   ):r>   numpyr   r8   Zlinalgr   r   r   Zmanipulationr   r   r   mathr	   r
   rn   Zfluid.frameworkr   Zpaddler   r   Zfluid.data_feederr   r   r   Zfluid.layer_helperr   r   r   r   r   r   Zpaddle.common_ops_importr   __all__r%   r/   r7   rG   r[   rf   rg   ri   rx   r~   r   r   r   r   r   r   r   r   r   r   r   r   r   r#   r#   r#   r$   <module>   sP   %7<*)p"'