o
    Mef                     @   sX  d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlm	Z	 d dl
Zd dlZd dlmZ ddlmZ ddlmZ dd	lmZ dd
lmZmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZm Z  ddl!m"Z"m#Z# e$d e$d ej$d G dd dZ%G dd dZ&G dd dZ'G dd de'Z(G dd dZ)dS )    N)reduce)chainproduct)OrderedDict)auto   )estimate_cost)DistributedOperator)_g_process_group_map)ProcessGroupget_process_group)is_elementwise_op)'get_distributed_operator_impl_container)+update_op_dims_mapping_by_default_dist_impl)4update_op_dims_mapping_by_elementwise_like_dist_impl) get_all_distributed_main_program)DistributedContextDistributedOperatorContext)OperatorDistributedAttributeTensorDistributedAttribute{   c                   @   s0   e Zd Zedd Zedd Zedd ZdS )
PlanFilterc                 C   st   d}t |t |ksJ t|D ]'\}}|dkr+|| | |  dks)||dkr+d}|dkr7| d dkr7d}q|S )NTr   r   F)len	enumeratecount)process_mesh_topologytensor_shapedims_mappingvalididxZdim_mapping r!   XD:\Projects\ConvertPro\env\Lib\site-packages\paddle/distributed/auto_parallel/planner.pycheck_dims_mapping_for_tensor-   s&   z(PlanFilter.check_dims_mapping_for_tensorc                 C   s   |j }|d usJ d| jD ]1}||}t|j|| j|s# dS || jr?t|dkr?|dd  D ]
}|dkr>  dS q4q| j	D ]}|
|}t|j|| j|sX dS qCdS )Nz$The process mesh should not be None.Fr   r   T)process_meshinput_arg_namesget_input_dims_mappingr   r#   topologyshapeis_datar   output_arg_namesget_output_dims_mapping)opop_dist_attrvarsr$   var_namer   dimr!   r!   r"   check_dims_mapping_for_op>   s,   



z$PlanFilter.check_dims_mapping_for_opc                 C   s   | j dks| j dks| j dkr;| jD ]}||D ]
}|dkr#  dS qq| jD ]}||D ]
}|dkr9  dS q/q(| j dkrZ| jD ]}|dkrY||D ]
}|dkrX  dS qNqCdS )	NZelementwise_addZ
layer_normsoftmax_with_cross_entropyr   FZlookup_table_v2Zpos_embeddingsT)typer%   r&   r*   r+   )r,   r-   r.   nameitemr!   r!   r"   !check_dims_mapping_for_special_opT   s,   



z,PlanFilter.check_dims_mapping_for_special_opN)__name__
__module____qualname__staticmethodr#   r1   r6   r!   r!   r!   r"   r   +   s    

r   c                   @   sP   e Zd Zg dZg dZedd Zedd Zedd Ze		dd
dZ	dS )	PlanSpace)Zcreate_py_readerZcreate_double_buffer_readerread)lod_tensor_blocking_queue_0Zcreate_py_reader_0Zdouble_buffer_0c              	   C   s   t tdt| }|t|kr=d}t|D ]\}}	|	dkr0|| | |	  dks.||	dkr0d}q|r;|t| dS tt|D ])}
||
 sl|
dkrQd||
< |||
  t	| |||d || d||
< |
  qCdS )zCEnumerate dims mapping of tensor by the given process_mesh_topologyr   Tr   r   FN)listranger   r   r   appendcopydeepcopyr;   _enum_dims_mappingpop)r   visitedpathdepthresr   numsr   r    r5   ir!   r!   r"   rC   o   s8   zPlanSpace._enum_dims_mappingc                 C   s  | dksJ dg }t d| d D ]}| | dkr|| qg }t t|d ddD ]}g }|||  |t|d krG|t| q+d}|t|k rt|dkr]|||  n`t|dkr| |d |d   dkr| |d |d   dkr|t| n?|| |d |d    |t| |d |d |d7 }n|d |d  | k r|d |d7 }nn|t|k sOq+|S )z6Enumerate all process meshes with the given processes.r   z0The processes must be number and greater than 0.r   r      )r?   r@   r   rA   rB   rD   )	processesZdivisorsrJ   resultsresultjr!   r!   r"   enum_process_mesh_topology   sD   





z$PlanSpace.enum_process_mesh_topologyc                    sR  |   j}t  g }t|j}t|j|jD ]0}dd tt	t
tdt	|jD }d}g }	g }
t|j||	||
|| j t|
 |< qt
t fdd  D  }|D ]}t }||_t
  }t|D ],\}}|| |jv r||| | qk|| |jv r||| | qktdjddd	t||}|d
u r0t|jrd}d}zt|}W n ty } zd}W Y d
}~nd
}~ww |r|st||j |rt!||j |rd|j _"d|j _#|$|j  qXd}d}zt%|}W n ty } zd}W Y d
}~nd
}~ww |r/|s/t||j |r/t!||j |r/d|j _"d|j _#|$|j  qX|j&}t|D ]$\}}|'|rZt||j |rZ|j(j|j _"||j _#|$|j  q7qX|st }||_t||}|jD ]}||| dd || jD  qn|jD ]}||| dd || jD  qd|j _"d|j _#|$|j  |S )zQEnumerate the valid distributed attribute for op based on the given process mesh.c                 S      g | ]}d qS Fr!   ).0_r!   r!   r"   
<listcomp>   s    z:PlanSpace._enum_valid_dist_attr_for_op.<locals>.<listcomp>r   r   c                    s   g | ]} | qS r!   r!   )rS   keyZdims_mapping_dictr!   r"   rU      s    z0The {varname} is not input or output of op {op}.zvar_names[idx]r,   )varnamer,   NTFZelementwisedefaultc                 S   rQ   r   r!   rS   rJ   r!   r!   r"   rU         c                 S   rQ   rZ   r!   r[   r!   r!   r"   rU     r\   ))global_blockr.   r   r   r3   r   r%   r*   r?   r   r>   r'   r;   rC   r(   rA   rB   r   keysr   r$   r   set_input_dims_mappingset_output_dims_mapping
ValueErrorformatr	   r   r   	Exceptionr   r1   Z	dist_attrr6   Z	impl_typeZimpl_idxr@   r   implsZis_auto_compatibleZ	serial_op)programr,   r$   r.   op_valid_dist_attrsZdist_op_impl_containerr/   rE   rG   rF   Zdims_mapping_listZcomposed_dims_mapping_listZcomposed_dims_mappingr-   Z	var_namesr    r   dist_opchangedr   erd   implr!   rW   r"   _enum_valid_dist_attr_for_op   s   





	


z&PlanSpace._enum_valid_dist_attr_for_opFc                    sT  t  }|  j}|  j}tdd |}dd t|D  d}d}|r[|d }	t||	 }
t|dkrK|dd ||	  fddt|	D }n+t|dkrZd	d t|	D }nt|dkrptjt	
 | d
}ntj d
}t|D ]\}}d}|}d}|dur||
 t|k r||
 n||
 d }|t|krt|d }|| }|jtjv rt }||_|jD ]}|tjv r||g  qdd || jD }||| q|jD ]}|tjv r||g  qdd || jD }||| q|g}|dkrdn|}nt| ||}|dusJ d|||g||j < qz|||fS )z>Enumerate valid distributed attributes for all ops in program.c                 S   s   | | S Nr!   )xyr!   r!   r"   <lambda>(  s    z<PlanSpace.enum_valid_dist_attr_for_program.<locals>.<lambda>c                 S   s   g | ]}|qS r!   r!   r[   r!   r!   r"   rU   )  r\   z>PlanSpace.enum_valid_dist_attr_for_program.<locals>.<listcomp>Nr   r   c              	      s<   g | ]}t jt | |d     dqS )r   Zmesh)r   ProcessMeshnparrayreshapetolistr[   Zglobal_groupZper_process_mesh_groupZprocess_mesh_shaper!   r"   rU   4  s    


c                 S   s   g | ]	}t j|gd qS )rp   )r   rq   r[   r!   r!   r"   rU   7  s    rp   c                 S   rQ   rZ   r!   r[   r!   r!   r"   rU   T  r\   c                 S   rQ   rZ   r!   r[   r!   r!   r"   rU   \  r\   r   z0Enumerate {} valid distributed attribute failed.)r   r]   opsr.   r   r?   r   r   rq   rr   rs   rt   ru   r   r3   r;   not_enum_opsr   r$   r%   special_varsr_   r(   r*   r`   rk   rb   descid)re   r   Zis_pipelinevalid_dist_attr_dictrw   r.   rL   global_process_meshpipeline_process_meshesZpipeline_stagesZop_count_per_stager    r,   rf   Zop_process_meshpipeline_stager-   r/   r   r!   rv   r"    enum_valid_dist_attr_for_program  s   








z*PlanSpace.enum_valid_dist_attr_for_programNrR   )
r7   r8   r9   rx   ry   r:   rC   rP   rk   r   r!   r!   r!   r"   r;   i   s    

)
kr;   c                   @   s(   e Zd Zdd Zedd Zdd ZdS )SearchAlgorithmc                 C   s
   || _ d S rl   )_name)selfr4   r!   r!   r"   __init__p     
zSearchAlgorithm.__init__c                 C   s
   t | _ d S rl   )r4   r   r!   r!   r"   r4   s  s   
zSearchAlgorithm.namec                 C   s   t d)Nz)Please Implement this method in subclass.)NotImplementedErrorr   r!   r!   r"   searchw  s   zSearchAlgorithm.searchN)r7   r8   r9   r   propertyr4   r   r!   r!   r!   r"   r   n  s
    
r   c                       s   e Zd Zd fdd	Zedd Zedd Zedd	 Zd
d Zdd Z		dddZ
dd Zdd Z	dddZ	dddZdd Z  ZS )MCMC   c                    s&   t t| d || _|| _|| _d S )Nmcmc)superr   r   _serial_program_info_max_search_times_parallelizer)r   serial_program_infoparallelizermax_search_times	__class__r!   r"   r   }  s   
zMCMC.__init__c                 C      | j S rl   r   r   r!   r!   r"   r        zMCMC.serial_program_infoc                 C   r   rl   r   r   r!   r!   r"   r     r   zMCMC.parallelizerc                 C   r   rl   )r   r   r!   r!   r"   r     r   zMCMC.max_search_timesc                 C   s   |j dkrm|jD ]f}|||}|||| jkrld}|D ]E}	||	jv ra||	j  d }
|
D ].}|	||kr`|
|	| |	jD ]}t }|j|_|	||_||| | qDd} nq2|re nq |sltdqd S d S )Nr2   Fr   Tz2Change softmax_with_cross_entropy dist attr failed)r3   r%   get_op_dist_attr_for_programr&    get_tensor_dist_attr_for_programr   r*   rz   r{   r+   set_op_dist_attr_for_programr   r$    set_tensor_dist_attr_for_programra   )r   r,   rw   r.   dist_contextr|   r/   r   Zhas_changedZ	search_opZop_dist_attr_listr-   r4   tensor_dist_attrr!   r!   r"   make_special_op_unshard  sf   



zMCMC.make_special_op_unshardc                 C   s*  |  j}|  j}t }|D ]j}||j  d }	tjt	|	}
|	|
 }|
|| |jD ]%}|dkr6q/||| d u rTt }|j|_|||_||| | q/|jD ]}t }|j|_|||_||| | qX| ||||| q|d ur|| |S |d ur|D ]}|| q|S )Nr   r=   )r]   rw   r.   r   rz   r{   rr   randomrandintr   r   r%   r   r   r$   r&   r   r   r*   r+   r   Zadd_process_mesh)r   r|   re   r~   r}   rw   r.   new_dist_contextr,   op_valid_dist_attr_listZrandom_op_dist_attrZinit_op_dist_attrr/   r   r$   r!   r!   r"   init_program  s\   




zMCMC.init_programNc                 C   s   d }t | j|| j}|d urdd |D nd }d}|D ] }d}| D ]}	|	jr6d|	jv r6|	jd }d} nq#|r; nqddlm}
 |
|}t	|d |||d	}|S )
Nc                 S   s   g | ]}|j qS r!   )rL   )rS   r$   r!   r!   r"   rU     s    z8MCMC.estimate_searched_strategy_cost.<locals>.<listcomp>r   Fz@RESHARDr   T)get_standalone_cost_data)clusterpipeline_configstandalone_cost_dataZ
batch_size)
r   r   r   Z	list_varsr)   r4   r(   utilsr   r   )r   r   r~   costZall_dist_main_programr   Zmicrobatch_sizere   Zsearched_batch_sizevarr   r   r!   r!   r"   estimate_searched_strategy_cost  s>   

z$MCMC.estimate_searched_strategy_costc                 C   s   |j D ]}|j}t }||_|||_||| | q|jD ]#}|| js,|| jrC|j}t }||_|	||_||| | q d S rl   )
r*   r$   r   r+   r   r   r%   is_parameterr)   r&   )r   r,   r-   r.   r   r/   r$   r   r!   r!   r"   set_tensor_dist_attr   s.   

zMCMC.set_tensor_dist_attrc                 C   s\   || |_|jD ]
}|||| _q	|jD ]}|| js#|| jr+|||| _qd S rl   )r   r$   r*   r   r%   r   r)   )r   r,   Zchanged_process_meshr.   r   r/   r!   r!   r"   change_process_mesh  s(   

zMCMC.change_process_meshc                 C   sd  |  j}g }|D ]}|jtjvr|| q	|sJ d|  j}t|}	t	 |	_
d }
tjt|}|| }||j  d }||j  d }tjt|}t|| }|d j }|dkrt|}
tjd}|dkr|	|| | ||||	 n	|dkr|d }|dks|t|d ks|d t|d kr|
||d  j  d |d kr|	|| | ||||	 n|| }||d  j }|
| d |d kr|d t|d kr||
| d< |
| d D ]}||_q| ||d  |||	 ||
|j  d< || }||_|
|j  d D ]}||_q|	|| | ||||	 t|d ddD ]K}|
|| j  d }|
|| j  d }|| }||d kr||
|| j  d< |D ]}||_ql||	|| _| || |||	 q< n|d }|t|ks|dks|
||d  j  d |d kr|dkr|	|| | ||||	 n|| }||d  j }|
| d |d kr|dkr||
| d< |
| d D ]}||_q| ||d  |||	 ||
|j  d< || }||_|
|j  d D ]}||_q|	|| | ||||	 t|d t|D ]K}|
|| j  d }|
|| j  d }|| }||d kr~||
|| j  d< |D ]}||_qc||	|| _| || |||	 q3 n|	|| | ||||	 |D ]}|jdkr| ||||	|  nq|
d u r||	fS |
|	fS )Nz2The ops of program have no distributed attributes.r   r   r      r2   )r]   rw   r3   r;   rx   r@   r.   rA   rB   r   _dist_op_contextrr   r   r   r   rz   r{   r   r   r$   r   r?   r   r   )r   re   r|   r   r~   Zraw_opsrw   r,   r.   r   Znew_valid_dist_attr_dictZrandom_selected_op_idxZselected_opr   r   Zrandom_selected_dist_attr_idxZselected_op_dist_attrZ	start_idxZchanged_modeZchanged_stageZselected_op_process_meshZ
next_op_idr-   Znew_process_meshr    ZstageZvalid_dist_attr_listZ	pre_op_idr!   r!   r"   search_once!  s  






6



.






zMCMC.search_oncec           
      C   s   d}|}|  ||j}|}|| jk r=|d7 }| | jj|||d }|  ||j}	||	 dkr8t|}|	}d}|| jk s||fS )Nr   r   )r   Zruntimer   r   r   train_programrA   rB   )
r   r|   init_dist_contextr~   timesbest_dist_contextr   min_costr   Zcur_costr!   r!   r"   _search_core  s6   


zMCMC._search_corec                 C   s  t d t }| jj}| jj}|d u rtj nt|	d}|dks(J dt
|}d }d }d }d }	|D ]H}
t d|
 t
||
d\}}}| ||||}| |||\}}t d||
 t |_|	d u rm|n|	}	|d u ru|n|}|	|kr|}|}	q7d }d }|D ]O}
t|
dkrqt d	|
 t
||
d
\}}}| ||||}| |||\}}t d||
 t |_|d u r|n|}|d u r|n|}||kr|}|}q||	kr|}|	}t d n|}|}td}|jD ]}||j qt }t d|||  ||fS )NzStart MCMC searching.ZGPUr   zGet process failed.z7MCMC search: search process mesh {} with pipeline mode.TzJMCMC search: the min cost is {} in the process mesh {} with pipeline mode.r   z:MCMC search: search process mesh {} without pipeline mode.FzMMCMC search: the min cost is {} in the process mesh {} without pipeline mode.zHBetter set FLAGS_benchmark=1 to avoid hang problem in the pipeline mode.zBEnd MCMC searching: the min cost is {} and the search time is {}s.)printtimer   r   r   paddledistributedZget_world_sizer   Zget_all_devicesr;   rP   rb   r   r   r   r   r   r   Z_process_meshesZ	add_ranksrL   )r   
start_timer   r   rL   Zprocess_mesh_topology_listZsearched_dist_contextr   Zsearched_pipeline_dist_contextZpipeline_min_costr   r|   r~   r}   r   r   r   Z"searched_non_pipeline_dist_contextZnon_pipeline_min_costZpg0r$   end_timer!   r!   r"   r     s   



zMCMC.search)r   rl   )r7   r8   r9   r   r   r   r   r   r   r   r   r   r   r   r   r   __classcell__r!   r!   r   r"   r   {  s(    


#/
!
 4
r   c                   @   sX   e Zd Z	dddZedd Zedd Zedd	 Zed
d Zdd Z	dd Z
dS )PlannerNc                 C   s"   || _ || _|| _| || _d S rl   )r   r   _algorithm_configcreate_algorithm_searcher_algorithm_searcher)r   r   r   algorithm_configr!   r!   r"   r   ?  s   
zPlanner.__init__c                 C   r   rl   r   r   r!   r!   r"   r   I  r   zPlanner.serial_program_infoc                 C   r   rl   )r   r   r!   r!   r"   r   M  r   zPlanner.algorithm_configc                 C   r   rl   )r   r   r!   r!   r"   algorithm_searcherQ  r   zPlanner.algorithm_searcherc                 C   r   rl   r   r   r!   r!   r"   r   U  r   zPlanner.parallelizerc                 C   sj   | dd }|d usJ dd }|dkr1| dd }|d ur(t| j| j|}|S t| j| j}|S td)Nr4   zInvalid algorithm config.r   r   z4Other search algorithms have not been supported now.)getr   r   r   r   )r   r   r4   r   r   r!   r!   r"   r   Y  s&   z!Planner.create_algorithm_searcherc                 C   s
   | j  S rl   )r   r   r   r!   r!   r"   r   k  r   zPlanner.searchrl   )r7   r8   r9   r   r   r   r   r   r   r   r   r!   r!   r!   r"   r   =  s    





r   )*rA   r   r   	functoolsr   	itertoolsr   r   collectionsr   numpyrr   r   Zpaddle.distributed.fleetr   Z
cost_modelr   rg   r	   Zprocess_groupr
   r   r   Zoperators.commonr   r   r   r   r   r   r   r   r   Zdist_attributer   r   seedr   r;   r   r   r   r!   r!   r!   r"   <module>   sB   

>     E