o
    Me                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dl	Z	d dl
mZ d dl	mZmZ d dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZ d dlmZ d dlm Z  d dl!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z*m+Z+ ddl,m-Z- ddl.m/Z/ ddl0m1Z1 ddl2m3Z3 ddl4m5Z5m6Z6 ddl7m8Z8m9Z9 ddl:m;Z;m<Z< ddl=m>Z> ddl?m@Z@mAZA ddlmBZBmCZCmDZDmEZE ddlmFZFmGZG ddlHmIZI dd lJmKZK G d!d" d"ZLdS )#    N)defaultdict)fluidstatic)Metric)	InputSpec)core)Variable)flatten)global_scope_to_name_str)Operator_non_static_mode)_current_expected_place)ParallelEnv)fleet   )config_callbacks)	Converter)ProgramHelper)Clusterget_default_cluster)Planner)Parallelizer)DistributedOperator)DistributedSaver)"DistributedDataLoaderFromGeneratorDistributedDataLoader)new_process_groupget_all_process_groups)DistributedContextget_default_distributed_context)Strategy)CollectionNamesget_collection)to_listget_dist_attrget_lrvalidate_opt)initialize_pg_in_full_modeget_input_split_info)get_cost_from_engine   )
get_loggerc                   @   sh  e Zd ZdZ						dbddZdd Zdd Zd	d
 Zdd Zdd Z							dbddZ
dd Zdd Zdd Zdd ZdcddZdd Zdd Z		 	 		!		 			 				"ddd#d$Z		 		!			"ded%d&Z		 				"dfd'd(Z	 				)	*	*	)		 		 	dgd+d,Z	-	*	*		*	 	 			 	dhd.d/Z							did0d1Zdjd2d3Z	*	 				)	*	*	)		 	dkd4d5Z		*	*			*	 	 		dld6d7Zdmd8d9Zd:d; Zd<d= Zd>d? Zd@dA ZdBdC Z dDdE Z!dFdG Z"dHdI Z#dJdK Z$dndLdMZ%dodNdOZ&dpdPdQZ'e(dRdS Z)e(dTdU Z*e(dVdW Z+e(dXdY Z,e(dZd[ Z-e(d\d] Z.e(d^d_ Z/e(d`da Z0dS )qEnginea
  
    An Engine object can provide the full power of auto parallel to users.
    With the help of it, users can easily obtain the abilities of the
    distributed training and inference. It also support the dynamic graph and
    static graph at the same time.

    Args:
        model (paddle.nn.Layer, optional): The model is an instance of
            paddle.nn.Layer.
        loss (Loss|Callable|None, optional): The loss can be a `paddle.nn.Layer`
            instance or any callable function taken the predicted values and
            ground truth values as input. It can be None when there is no loss.
            Default: None.
        optimizer (Optimizer|None, optional): The optimizer need to be set in training
            and should be None in eval and predict mode. Default: None.
        metrics (Metric|list[Metric]|None, optional): If metrics is set, all
            metrics will be calculated and output in train/eval mode. Default: None.
        cluster (Cluster|None, optional): The cluster represents the topology information
            about the used physical devices. Default: None. (Unused for now)
        strategy (Strategy|None, optional): The strategy is used to configure the
        parallelization and optimization behaviors. Default: None.

    Examples:

        .. code-block:: python

            import paddle
            import paddle.vision.transforms as T
            from paddle.distributed.fleet import auto
            from paddle.vision.datasets import MNIST

            transform = T.Compose([
                T.Transpose(),
                T.Normalize([127.5], [127.5])
            ])
            train_dataset = MNIST(mode='train', transform=transform)
            valid_dataset = MNIST(mode='test', transform=transform)

            model = paddle.vision.models.LeNet()
            loss = paddle.nn.CrossEntropyLoss()
            optimizer = paddle.optimizer.Adam(
                learning_rate=0.001, parameters=model.parameters())
            metrics = paddle.metric.Accuracy(topk=(1, 2))

            engine = auto.Engine(model, loss, optimizer, metrics)
            # fit
            engine.fit(train_dataset,
                       epochs=2,
                       batch_size=64)
            # evaluate
            engine.evaluate(valid_dataset,
                            batch_size=64)
            # predict
            engine.predict(valid_dataset,
                           batch_size=64)
            # save
            engine.save("./my_model")
            # load
            engine.load("./my_model")

    Nc                 C   s4  |rt |tjjst|std|| _|r't |tjjtfs't|s'td|| _|r<t |tj	j
tjj	j
fs<tdt|| _t| j| _|pKg }t|D ]}|rbt |tsbtd|jjqPt|| _|rst |tsstd|pwt | _|rt |tstd|pt | _ttj| _t !dr| j"d t#j$d	d
 d | _%tj&' | _(tj&) | _*t+ | _,t-. | _/t-0 | _1t2 | _3i | _4i | _5i | _6i | _7i | _8t9t:| _;t9t:| _<i | _=i | _>i | _?dddd| _@dddd| _Ag | _Bg | _Cg | _Dg | _Eg | _Fd | _Gd| _Hd| _Id | _Jd| _K| jjL| _Md | _Nd S )NzI'model must be sub classes of `paddle.nn.Layer` or any callable function.zW'loss' must be sub classes of `paddle.nn.Layer` or any callable function or a Variable.zg'optimizer' must be object of class `paddle.optimizer.Optimizer` or `paddle.fluid.optimizer.Optimizer`.z{} is not sub class of MetriczP'cluster' must be the object or class `paddle.distributed.auto_parallel.Cluster`zN'strategy' must be object of class `paddle.distributed.auto_parallel.Strategy`ZPOD_NAMEz0Distribute training by paddle.distributed.launchT)Zis_collectiveFtrainevalpredict)O
isinstancepaddlennLayercallable	TypeError_modelr   _loss	optimizerZ	Optimizerr   r'   
_optimizercopydeepcopyZ_orig_optimizerr$   r   format	__class____name___metricsr   r   _clusterr!   	_strategyr,   loggingINFO_loggerosgetenvinfor   init	_executordistributedZget_rank	_cur_rankZget_world_size_nranksr   _saverr   default_main_program_orig_main_progdefault_startup_program_orig_startup_progr    Z_orig_dist_context_dist_contexts_fwd_main_progs_fwd_dist_contexts_serial_main_progs_serial_startup_progsr   dict_dist_main_progs_dist_startup_progs
_feed_vars_fetch_vars	_planners_has_prepared_has_prepared_reader_inputs_spec_labels_spec_inputs_labels_losses_mode_skip_build_outside_dataloader_planned_mode_dygraph_modeZtuning_tuninghistory)selfmodellossr:   metricsclusterstrategymetric rt   WD:\Projects\ConvertPro\env\Lib\site-packages\paddle/distributed/auto_parallel/engine.py__init__|   s   








zEngine.__init__c                    s  g }g }t |tjjr+|d u rtt|\}}n@tt|}|d | }||d  }n-t |tjjrN|d u r=|d \}}n|d }|d | }||d  }n
tdt	|j
t|}t|}| jjjdd   fdd}	|d urt|D ]\}
}|d usJ ddt|
 }|	|||| qx|d urt|D ]\}
}|d usJ dd	t|
 }|	|||| q| |}| |}||fS )
Nr   z<Data should be a Dataset or IterableDatset, but received {}.c                 S   s6   | dkrt |jdkr|jd |  |jd< d S d S d S )Nr   r   )lenshape)
num_shardsspecrt   rt   ru   _adjust_item_spec  s   z4Engine._prepare_data_spec.<locals>._adjust_item_specc                    s   t | tjr&t| |}|d u r | || d S ||| d S t | ttj	tj
jfrQt| |} | |d u rG|| d S ||| d S t | tjre|t|gt| | d S tdt| j)NzYThe sample's dtype returned of dataset should be number, np.ndarray or Tensor, but got {})r2   npZndarrayr   Z
from_numpyappendbatchr   r   ZVarBaseeagerZTensorZfrom_tensornumbersNumbertyper7   r>   r@   )itemname
batch_sizespecsrz   r{   ry   rt   ru   _infer_item_spec	  s&   

z3Engine._prepare_data_spec.<locals>._infer_item_speczReceive None input.inputlabel)r2   r3   ioZIterableDatasetnextiterZDatasetr7   r>   r   r@   r$   rC   datasetry   	enumeratestr_validate_spec)rm   datasplitr   inputs_speclabels_specinputslabelssampler   ir   r   rt   r   ru   _prepare_data_spec   sJ   


zEngine._prepare_data_specc           	      C   s   t  s| jr
td|rKt|tsJ dt|t|ts(J dt|t|t|ks4J dt||D ]\}}|j	|j	krJ|j
|j	 q9|rt|ts[J dt|t|tsiJ dt|t|t|ksuJ dt||D ]\}}|j	|j	kr|j
|j	 qz||fS )NzOnly support static graph mode.z&inputs should be list, but received {}z:the number of `inputs_spec` should be equal to `inputs`'s.z&labels should be list, but received {}z:the number of `labels_spec` should be equal to `labels`'s.)r   rj   
ValueErrorr2   listr>   r   rw   ziprx   desc	set_shape)	rm   r   r   r   r   Z
input_specr   Z
label_specr   rt   rt   ru   _prepare_data_tensor0  s^   zEngine._prepare_data_tensorc                 C   s  | j | j | j }| j| j }| }g d}|jd jdkr7tt|D ]}|jd j|v r6|j	ddd q%|
  g }t|jD ]\}}|j|v rP|| qBg }	t|D ]'}|j }
|
|j| j t||
|
 d}|	| t|}|| qW|	D ]	}|jd| qtt|D ]}||  t|7  < qt|D ]}|j|}|j	||d  q|
  d| j| j< d S )	N)create_py_readerZcreate_double_buffer_readerreadr   r   F)Zsync)r   r   T)rZ   rf   rM   rT   global_blockopsr   rangerw   Z
_remove_opZ_sync_with_cppr   r}   reversedr   Z_prepend_opZ	copy_fromr   r   Zadd_dist_op_for_programinsertpopr`   )rm   dist_main_progdist_contextdist_main_blockZrelated_reader_opsr   Zreader_op_indicesidxopZnew_reader_opsZnew_op_descZnew_opdist_oprt   rt   ru   _prepare_readerU  sD   




zEngine._prepare_readerc                 C   s   i }|d urKt |ttfr1t|dkr*t |d tr*|d  D ]\}}|||< q n!td|t |trD| D ]\}}|||< q:ntd||d urkt |ts^J dt|j	| D ]\}}|||< qb|S )Nr   r   zUnsupported data {}z)user_feeds must be a dict, but receive {})
r2   r   tuplerw   rY   itemsr   r>   r   r@   )rm   r   Z
user_feedsmodeZfeedsr   rt   rt   ru   _prepare_feed  s0   



zEngine._prepare_feedc                    s   |d urt |tsJ dt|jg g   fdd}|dkr-|dj| d  |dkrJj| d }t|D ]\}}|dt| | q<|dkrX|dj| d  d	d
 tt	j
D }|peg |phg  }|d|  fS )Nz+user_fetches must be a list, but receive {}c                    s^   g }|D ]} |r t|}|vr| || q|s(g   | d S N)_is_local_varr   r}   index)
group_namevar_listZgroup_indicesvarvar_namefetch_indicesfetch_namesrm   rt   ru   _process_fetch_group  s   


z3Engine._prepare_fetch.<locals>._process_fetch_groupr1   ro   rp   Zmetrics_outputsc                 S   s   g | ]}|d  qS )r   rt   ).0r   rt   rt   ru   
<listcomp>      z)Engine._prepare_fetch.<locals>.<listcomp>fetches)r2   r   r>   r   r@   r]   r   r   r#   r"   FETCHES)rm   Zuser_fetchesr   r   rp   r   r   Zuser_fetches_collectionrt   r   ru   _prepare_fetch  s2   
zEngine._prepare_fetchc                 C   s  i }|d ur
||d< |d ur|d |d< |d ur||d< d}	|dkr||	 }
t |
dks.J |
D ]
}|| d |d< q0|	d7 }	| j| d }|r| jD ]6}||	 }g }|D ]	}|||  qU|r}|j|  | }tt|D ]\}}||| | < qp|	d7 }	qKn|dkr||	 }i }|D ]
}|| |d	| < q||d
< |	d7 }	t	t
j}i }|D ]\}}|j|v r||j}|| ||p|j< q||d< |S )Nepochr   steplrr   r1   ro   rp   zout%dr   r   )rw   r]   rA   r}   update
accumulater   r$   r   r#   r"   r   r   )rm   outsr   r   r   r   r   r   logsZ	group_idxZloss_indicesr   metric_varsrs   Zmetrics_indicesZ
metric_outresultsr   resZoutputs_indicesZlogs_outZcollect_fetchesZ
logs_fetchr   r   rt   rt   ru   _prepare_logger  sX   





zEngine._prepare_loggerc                 C   s6   |  | | | | | | | d| j|< d S )NT)_build_plan	_parallel_initializer_   rm   r   rt   rt   ru   _prepare_program  s
   



zEngine._prepare_programc           
   	   C   s  t  s| jrPt  d| _| jd t| j| j| j	| j
| j| _| j| | jj| _| jj}| jj}| jj| _| jj| _| jj}| jj| _| jj}t  n| j|d }|d ur]d S g }g }g | _| j }| j }| jst !||v t"j#$ a dd | j
D | _dd | jD | _t%| j| j }|dkr| jrt&| jtj'j(st)| jsJ dt%| j|| j  | _|dkr|s| jr| j	D ]}|*t%|j+|| j   qW d    n1 sw   Y  W d    n1 sw   Y  nt&| jt,sJ dt%| j| _t- }|j.st/t0t1| j2 d|_3| j| jd	}t4|| j|d
}	|dkr5|jdd}| 5  t6||| j7| j||	| j8| j9| j:|< t6||| j7| j||	| j8| j9| j;|< | j9j<| j:| _<| | j=|< d S )NTz'Building model with 'to_static' method.c                 S      g | ]}|  qS rt   Z_create_feed_layerr   srt   rt   ru   r   .  r   z!Engine._build.<locals>.<listcomp>c                 S   r   rt   r   r   rt   rt   ru   r   1  r   r1   zothe type of `loss` of the Engine arguments should be sub classes of `paddle.nn.Layer` or any callable function.z>the type of `loss` of the Engine arguments should be Variable.)r   r   )r   ro   rp   r/   )Zfor_test)>r   rj   r3   Zdisable_staticrF   rI   r   r8   r9   rA   ra   rb   program_helperZbuild_programZconcrete_programmain_programstartup_programZ
input_varsrc   Z
label_varsrd   Zoutput_varsZ	loss_varsre   r   Zenable_staticrW   getrQ   clonerS   rg   r   program_guardutilsZunique_nameguardr$   r2   r4   r5   r6   r}   Zcomputer   r    Zhas_annotationr   r   r   rN   Zdata_parallelr	   _set_recompute_ckptsr   r;   rB   rC   rT   rV   Zgradient_scalerU   )
rm   r   Zserial_main_progZserial_startup_progr   rp   rs   Zdefault_ctx	feed_vars
fetch_varsrt   rt   ru   r     s   








 




zEngine._buildc              	   C   s   | j jstd|dksJ | | | | | j|_| j|_ddl	m
} || j  | j| || j| j|| jd| _| j  | j jrO| j | j| _d S d S )Nz Please set `tuning.enable=True`.r/   r   )OptimizationTuner)r   Zrank)rk   enabler   r   r   _dp_world_sizesdp_world_size	_dp_ranksdp_rankZtuner.optimization_tunerr   to_dictrT   ra   rb   rM   Z_optimization_tunerZtuneZrun_after_tuningZget_best_configrC   )rm   r   r   r   r   rt   rt   ru   _optimization_tuningx  s2   



zEngine._optimization_tuningc           
      C   s   | j d u r	|| _ n| | t|| j| | j|< | j|   | j| jd }| j| jd }| j| j }g }|| D ]}|j	|j
v rO||j
|j	  q>g | _g | _|D ]}t| j|| j| \}}	| j| | j|	 qXd S )Nr   r   )ri   _init_dist_contextr   rT   r^   Zplanserial_feed_varsserial_main_programr   r   varsr}   r   r   r)   rM   )
rm   r   
inputs_var
labels_varblock	feed_listr   Zfeed_varr   r   rt   rt   ru   r     s,   

zEngine._planFc                 C   s:   t || j| j| j| }|s|| j d S |  d S r   )r   r^   Z	completerrT   ZparallelrM   Zparallel_all)rm   r   Z	all_ranksZparallelizerrt   rt   ru   r     s   zEngine._parallelc              	   C   s   | j | }|j}| j}| j | }|j}|j}t|jD ]2\}}	t|	jD ](\}
}|| j|
 }|j|jksAJ d||j||j||}|	|| q$qd S )Nz2'{}' mode op '{}' is different with '{}' op '{}'. )
rT   Z_original_serial_main_programri   blocksr   r   r   r>   Zget_op_dist_attr_for_programZset_op_dist_attr_for_program)rm   r   r   Zorigin_main_progZref_modeZref_dist_contextZref_origin_main_progZ
ref_blocksZibr   Ziopr   Zref_opZref_op_dist_attrrt   rt   ru   r     s&   

zEngine._init_dist_contextc                 C   sL  | j | j| j|< | j | j| j|< | j | j| j|< | j | j| j|< | j | j	| j
|< | j | j| j|< | j | j| _| jdkrat }| jjdkrQt|t n|D ]}| j|jvr\qS|  qSt }t|tjrqtt j}| jjrt| jj| j d   t!j"| jj| j d   t"| jj| j d   | j#r| j | }| j| | j }| j$%||| | j&d u r	tj'(|| _&g }| j| | j }|) D ]}	t* +|	j,}
|
r|
- . rq|/|	 q|r|0|}| j&1| t2| dr	t2| dr	| 3|| j4| j5| j6 | jj7r$| j89d | j| | j }| j&1| d S d S )Nr   fullr   _state_dict
_dist_attrz(NOTE: parameters will be re-initialized.):rT   r   rW   serial_startup_programrX   Zdist_main_programsrZ   Zdist_startup_programsr[   r   r\   Zserial_fetch_varsr]   _serial_optimizerr;   rN   r   rC   	auto_moder(   Zcur_rankrM   ZranksZinstantiate_get_devicer2   r   Z	CUDAPlacer   Zdev_idseedr3   r   r|   randomrj   r   rJ   rK   r   ExecutorZ	list_varsr
   Zfind_varr   Z
get_tensorZ_is_initializedr}   Z_prunerunhasattr_set_state_dict_strictr   r   ZreinitrF   rI   )rm   r   Zall_process_groupsprocess_groupZplacer   dist_main_programZuninitializeddist_startup_progr   Z	scope_varZprune_startup_progrt   rt   ru   r     s~   




zEngine._initializer   
   r+   c                 C   s  d| _ | |||\| _| _| j| j  s| | j  n| | j  | j|dd||||d}| jd| j d\}}t	|| |||j
|||||  | jd}|d t|D ]}i }|| t|D ]@\}}|d|| z| jj| j|| jj| jjd}W n tjy   Y  nw t| j}| ||||||| j }|d|| q`|	r|d	 | d
kr| |	|
||||||}dd | D }| | | d n| !  |"|| qS|#d| | j$S )a  
        Trains the model for a fixed number of epochs. If `valid_data` is set,
        evaluation will be done at the end of each epoch.

        Args:
            train_data (Dataset): An instance of paddle paddle.io.Dataset. Default: None.
            train_sample_split (int, optional): Each sample of the train dataset is assumed
                to be a (input, label) pair by default and has two items. If each sample has
                more than two items, train_sample_split specifies how to split these items into
                input and label. The items before it are input and the left are label. Default: None.
            batch_size (int, optional): The batch size of train_data and valid_data if provided.
                The user's data will be used directly without batching if set to None. Default: 1.
            epochs (int, optional): The number of epochs to train the model. Default: 1.
            steps_per_epoch (int, optional): The total number of steps (batches of samples)
                is executed in one epoch before stating the next one. If None, it is equal to
                the number samples in your dataset divided by the batch size. Default: None.
            valid_data (Dataset, optional): An instance of paddle paddle.io.Dataset used for
                evaluation at the end of epoch. No evaluation will be done if set to None.
                Default: None. (Unsupported for now)
            valid_freq (int, optional): Only relevant if valid_data is provided. This specifies
                how many training epochs before a new evaluation is performed. Default: 1.
            valid_sample_split (int, optional): Only relevant if valid_data is provided.
                Each sample of the valid dataset is assumed to be a (input, label) pair
                by default and has two items. If each sample has more than two items,
                valid_sample_split specifies how to split these items into input and label.
                The items before it are input and the left are label. Default: None.
            valid_steps (int, optional): Only relevant if valid_data is provided.
                It is the total number of steps (batches of samples) to draw before
                stopping validation at the end of every epoch. If None, validation will run until the
                `valid_data` dataset is exhausted. The validation will start from the
                beginning of the dataset at each epoch. Default: None.
            collate_fn(callable, optional): function to generate mini-batch data by merging
                the sample list, None for only stack each fields of sample in axis
                0. Default None.
            callbacks (Callback|None, optional): A list of `Callback` instances to apply
                during training. Default: None. (Unused for now)

        Returns:
            None

        Examples:

            .. code-block:: python

                import paddle
                import paddle.vision.transforms as T
                from paddle.distributed.fleet import auto
                from paddle.vision.datasets import MNIST

                transform = T.Compose([
                    T.Transpose(),
                    T.Normalize([127.5], [127.5])
                ])
                train_dataset = MNIST(mode='train', transform=transform)

                model = paddle.vision.models.LeNet()
                loss = paddle.nn.CrossEntropyLoss()
                optimizer = paddle.optimizer.Adam(
                    learning_rate=0.001, parameters=model.parameters())
                metrics = paddle.metric.Accuracy(topk=(1, 2))

                engine = auto.Engine(model, loss, optimizer, metrics)
                engine.fit(train_dataset,
                           epochs=2,
                           batch_size=64)
        r/   F   F)r   capacityiterabler   epochssteps_per_epoch
collate_fnNr   )
enginer   r  stepslog_freq	save_freqsave_dirverboserp   Zacc_step
fetch_listuse_program_cachereturn_numpyr   r   c                 S   s   i | ]	\}}d | |qS )Zval_rt   )r   r   valrt   rt   ru   
<dictcomp>  s    zEngine.fit.<locals>.<dictcomp>)%rf   r   ra   rb   r_   r   _switch_mode"_prepare_dataloader_from_generatorr   r   _steps_metrics_name_k_stepson_beginr   Zon_epoch_beginr   on_batch_beginrK   r  r   rC   	use_cacher  r   EOFExceptionr&   r;   r   on_batch_endevaluater   r   _reset_metricsZon_epoch_endon_endrl   )rm   Z
train_dataZtrain_sample_splitr   r  r  r  r  r  
valid_datavalid_sample_splitZ
valid_freqZvalid_stepsr  	callbacksr  Ztrain_dataloaderr   r   cbksr   r   r   _r   r   Zval_logsrt   rt   ru   fit  s   T




	

z
Engine.fitc	              
   C   s<  d| _ | |||\| _| _| j| j  s| | j  n| | j  | j|dd|||d}	| jd| j d\}
}t	|| |||| 
 d}|	j}|d|| 
 d i }t|	D ];\}}|d|| z| jj| j|
| jj| jjd	}W n tjy}   Y  nw | |d|d|
|| j }|d|| qV|d| |   |S )
a  
        Evaluate the loss and metrics of the model on evaluation data.

        Args:
            valid_data (Dataset): An instance of paddle paddle.io.Dataset. Default: None.
            valid_sample_split (int, optional): Each sample of the eval dataset is assumed
                to be a (input, label) pair by default and has two items. If each sample has
                more than two items, valid_sample_split specifies how to split these items into
                input and label. The items before it are input and the left are label. Default: None.
            batch_size (int, optional): The batch size of valid_data. The user's data will
                be used directly without batching if set to None. Default: 1.
            steps (int, optional): It is the total number of steps (batches of samples) to draw before
                stopping evaluation. If None, evaluation will run until the `valid_data` dataset is exhausted.
                The evaluation will start from the beginning of the dataset in each run. Default: None.
            collate_fn(callable, optional): function to generate mini-batch data by merging
                the sample list, None for only stack each fields of sample in axis
                0. Default None.
            callbacks (Callback|None, optional): A list of `Callback` instances to apply
                during evaluating. Default: None. (Unused for now)

        Returns:
            None

        Examples:

            .. code-block:: python

                import paddle
                import paddle.vision.transforms as T
                from paddle.distributed.fleet import auto
                from paddle.vision.datasets import MNIST

                transform = T.Compose([
                    T.Transpose(),
                    T.Normalize([127.5], [127.5])
                ])
                valid_dataset = MNIST(mode='test', transform=transform)

                model = paddle.vision.models.LeNet()
                loss = paddle.nn.CrossEntropyLoss()
                metrics = paddle.metric.Accuracy(topk=(1, 2))

                engine = auto.Engine(model, loss, metrics=metrics)
                engine.evaluate(valid_dataset, batch_size=64)

        r0   r	  Fr   r
  r  r   r  r  Nr  )r  r   r  r  rp   )r  rp   r  )rf   r   ra   rb   r_   r   r  r  r   r   r  r  r!  r   r"  rK   r  r   rC   r#  r  r   r$  r   r%  r(  r'  )rm   r)  r*  r   r  r  r  r+  r  Zvalid_dataloaderr   r   r,  Z
eval_stepsr   r   r-  r   rt   rt   ru   r&    s`   9		
zEngine.evaluatec              
   C   s>  d| _ | |||\| _| _| j| j  s| | j  n| | j  | j|dd|||d}| jd| j d\}	}
g }t	|| |d}|j
}|dd|i i }t|D ]F\}}|d|| z| jj| j|	| jj| jjd	}W n tjyw   Y  n w | |d|d|	|
| j }|d|| |t|d
   qP|d| |S )a  
        Compute the output predictions on testing data.

        Args:
            test_data (Dataset): An instance of paddle paddle.io.Dataset. Default: None.
            test_sample_split (int, optional): Each sample of the test dataset is assumed
                to be a (input, label) pair by default and has two items. If each sample has
                more than two items, test_sample_split specifies how to split these items into
                input and label. The items before it are input and the left are label. Default: None.
            batch_size (int, optional): The batch size of test_data. The user's data will
                be used directly without batching if set to None. Default: 1.
            steps (int, optional): It is the total number of steps (batches of samples) to draw before
                stopping predict. If None, predict will run until the `test_data` dataset is exhausted.
                The predict will start from the beginning of the dataset in each run. Default: None.
            collate_fn(callable, optional): function to generate mini-batch data by merging
                the sample list, None for only stack each fields of sample in axis
                0. Default None.
            callbacks (Callback|None, optional): A list of `Callback` instances to apply
                during testing. Default: None. (Unused for now)

        Returns:
            None

        Examples:

            .. code-block:: python

                import paddle
                import paddle.vision.transforms as T
                from paddle.distributed.fleet import auto
                from paddle.vision.datasets import MNIST

                transform = T.Compose([
                    T.Transpose(),
                    T.Normalize([127.5], [127.5])
                ])
                valid_dataset = MNIST(mode='test', transform=transform)

                model = paddle.vision.models.LeNet()

                engine = auto.Engine(model)
                engine.predict(valid_dataset, batch_size=64)
        r1   r	  Fr/  Nr  )r  r  r  r  r   )rf   r   ra   rb   r_   r   r  r  r   r   r  r!  r   r"  rK   r  r   rC   r#  r  r   r$  r   r%  r}   r   valuesr(  )rm   Z	test_dataZtest_sample_splitr   r  r  r+  r  Ztest_dataloaderr   r   r   r,  Z
test_stepsr   r   r-  r   rt   rt   ru   r1   3  sP   5	
zEngine.predictr   Tc                 C   sv   |d ur	|  | | |||\| _| _| j| j s!| | j n| | j | j|d||||||||	|
||d}|S )NF)return_listr   shuffle	drop_lastr  num_workersuse_buffer_readeruse_shared_memorytimeoutworker_init_fnr  r  )	to_moder   ra   rb   r_   rf   r   r  _prepare_dataloader)rm   r   r   r2  r3  r  r4  r5  r6  r7  r8  r  r  sample_splitr   
dataloaderrt   rt   ru   r<    s0   
zEngine.dataloaderr	  c                 C   sr   |d ur	|  | | |||\| _| _| j| j s!| | j n| | j | j||||d|||||	|
d}|S )NF)r   r
  use_double_bufferr  r1  use_multiprocessr3  r   r  r  r  )	r9  r   ra   rb   r_   rf   r   r  r  )rm   r   r
  r=  r  r>  r3  r   r  r  r  r;  r   r<  rt   rt   ru   dataloader_from_generator  s,   
z Engine.dataloader_from_generatorc                 C   sD  |d ur	|  | | jstd| j| j rd S | |}| |}| |}| |}|| _|| _|s6|rXd| _| 	||||\}}| jd u rMt
 | _| jd u rWt
 | _n&|s\|rtd| _| jd u rit
 | _| jd u rst
 | _n
| jrz| js~J d||| _| _||| _| _| j| j s| | j d S | | j d S )Nz7Please set mode to be prepared with `prepare(mode=...)`Tz;Please call the dataloader(...) before calling prepare(...))r9  rf   r   r_   r   _validate_varsrQ   rS   rg   r   r   rP   rR   rh   ra   rb   rc   rd   r   r  )rm   r   r   r   r   r   r   r   rt   rt   ru   prepare  sT   













zEngine.preparec           
   	   C   s   |d ur	|  | | ||| j}| || j\}}| jr'| j| j s'|   | jj| j	||| j
j| j
jd}| |d d d ||| j}	|	S )N)feedr  r  r  )r9  r   rf   r   rh   r`   r   rK   r  r   rC   r#  r  r   )
rm   r   rB  r  r   Z	feed_dictr   r   r   r   rt   rt   ru   r    s(   

z
Engine.runc                 C   s  | j jr|d ur|| j dksJ d|| j|| j }| j| j | j }| j| j | j }| }| j	| j d }| j	| j d }g }|| D ]'}|j
|jv r[||j|j
  qI|||j}|j|j  || qItj }t||F t|fi d|d|d|d|d	|d
|d|d|d|d|	d|
d|d|d|d| j jd| jd| j}W d    |S 1 sw   Y  |S )Nr   9Requires batch_size:[{}] to be divisible by k_steps:[{}].r   r   r   placesr1  r   r2  r3  r  r4  r5  r6  r7  r8  r  r  
split_datadata_parallel_world_sizedata_parallel_rank)rC   gradient_merger   r>   rZ   rf   rM   r[   r   r\   r   r   r}   _clone_variablepersistabler   set_original_idoriginal_idr3   r   cuda_placesr   r   rE  r   r   )rm   r   r1  r   r2  r3  r  r4  r5  r6  r7  r8  r  r  r   r  r   r   r   r   r   copy_varrD  r<  rt   rt   ru   r:  5  s|   

	

zEngine._prepare_dataloaderc                 C   s  | j jr|d ur|| j dksJ d|| j|| j }| j| j | j }| j| j | j }| }| j	| j d }| j	| j d }g }|| D ]'}|j
|jv r[||j|j
  qI|||j}|j|j  || qItj }t||A tdi d|d|d|d|d	|d
|d|d|d|d|d|	d|
d|d| j jd| jd| j}W d    n1 sw   Y  |   |S )Nr   rC  r   r   r   r   r
  r=  r  r1  r>  r3  rD  r   r  r  r  rE  rF  rG  rt   )rC   rH  r   r>   rZ   rf   rM   r[   r   r\   r   r   r}   rI  rJ  r   rK  rL  r3   r   rM  r   r   rE  r   r   r   )rm   r   r
  r=  r  r1  r>  r3  r   r  r  r  r   r  r   r   r   r   r   rN  rD  r<  rt   rt   ru   r  {  sr   

	
z)Engine._prepare_dataloader_from_generatorc                 C   s0   d| _ | |||\| _| _| | j || d S )Nr/   )rf   r   ra   rb   r   )rm   Z	tune_dataZtune_sample_splitr   rt   rt   ru   _tune  s
   zEngine._tunec                 C   s   t |}| jjj| _|d urXt|D ]E\}}t|tstd|j	d u r,t
d||| jdkrWt|j}|d | j dksKJ d|jd | j|d  | j  < ||_q|p[g S )Nz9'spec' must be object of class `paddle.static.InputSpec`.z<Requires Input[{}].name != None, but receive `None` with {}.r   r   z7Requires batch_size[{}] to be divisible by k_steps[{}].)r$   rC   rH  Zk_stepsr   r   r2   r   r7   r   r   r>   r   rx   )rm   r   r   rz   rx   rt   rt   ru   r     s2   



zEngine._validate_specc                 C   s<   t |}|d urt|D ]\}}t|tstdq|pg S )Nz'var' must be a `Variable`.)r$   r   r2   r   r7   )rm   r   r   r   rt   rt   ru   r@    s   
zEngine._validate_varsc                 C   s   t |}|| j jv S r   )r   r   r   r   )rm   r   r   rt   rt   ru   r     s   zEngine._is_local_varc                 C   s   | j j}t| jtjjr#t| jdr| jjj	dv r| jj
j}n|j}n|j}|jr@|d d  |_| jjj	|d}| j| d S d S )Ngpt)ZGPTForPretrainingZGPTForPretrainingAuto)zModel ClasszApplied Recompute ckpts)rC   	recomputer2   r8   r3   r4   r5   r  r?   r@   rP  Zcheckpointsr   rF   rI   )rm   rQ  Zexact_ckptsr   rt   rt   ru   r     s    zEngine._set_recompute_ckptsc                 C   s   | j D ]}|  qd S r   )rA   reset)rm   rs   rt   rt   ru   r'    s   

zEngine._reset_metricsc                 C   s2   | j rdgng }| jD ]}|t|  q|S )Nro   )r9   rA   extendr$   r   )rm   Zmetrics_namemrt   rt   ru   r  
  s   
zEngine._metrics_namec                 C   s4   || j v sJ d|| | | j| j| _d S )Nz5{} model is not ready, please call `prepare()` first.)rZ   r>   r9  rT   r   r;   r   rt   rt   ru   r    s
   
zEngine._switch_modec                 C   s    |dv sJ d ||| _d S )Nr.   z5mode {} should be one of ['train', 'eval', 'predict'])r>   rf   r   rt   rt   ru   r9    s   

zEngine.to_modec           	      C   sJ   | j | | j }| j| }t||}t|||}|j|d}|| d S )N)strict)rZ   rM   rT   r%   r   convertZset_state_dict)	rm   r   rU  Z
state_dictZ	dist_attrprogramr   Zcur_dist_attr	converterrt   rt   ru   r    s   

zEngine._set_state_dictc                 C   s   |r+| j | jv s
J | j| j  }| j| j  | j }| j| j  }| jj||||d dS d| jv s2J | jd d }| jd d }| jd | j }| jj	|||| j
|d dS )a  
        Saves the model, parameters, optimizer state to path.
        If `training` is set to False, only inference model will be saved.

        Args:
            path (str): The file prefix to save model. The format
                is 'dirname/file_prefix' or 'file_prefix'. if empty str.
                A exception will be raised.
            training (bool, optional): Whether to save for training. If not, save
                for inference only. If `training` is set to True, the optimizer state
                will be saved. Otherwise, only the model and parameters are saved.
                This function will silently overwrite existing file at the target
                location. Default: True.

        Returns:
            None

        Examples:

            .. code-block:: python
                import paddle
                import paddle.vision.transforms as T
                from paddle.distributed.fleet import auto
                from paddle.vision.datasets import MNIST

                transform = T.Compose([
                    T.Transpose(),
                    T.Normalize([127.5], [127.5])
                ])
                train_dataset = MNIST(mode='train', transform=transform)

                model = paddle.vision.models.LeNet()
                loss = paddle.nn.CrossEntropyLoss()
                optimizer = paddle.optimizer.Adam(
                    learning_rate=0.001, parameters=model.parameters())
                metrics = paddle.metric.Accuracy(topk=(1, 2))

                engine = auto.Engine(model, loss, optimizer, metrics)
                engine.fit(train_dataset,
                           epochs=1,
                           batch_size=64)
                engine.save("./my_model")

        )serial_programr  r   r1   r   r   )rW  N)rf   rW   rZ   rM   rT   rO   saver\   r]   Zsave_inference_modelrK   )rm   pathZtrainingrY  r   r   r   r   rt   rt   ru   rZ  '  s,   -

zEngine.savec                 C   s(   || _ | j||\| _| _| j| jfS )a  
        Load the stored model, parameters and optimizer states.

        Args:
            path (str): The prefix of files storing the model states and
                optimizer states.
            strict (bool, optional): Whether to skip the loading of mismatch
                parameter or raise an error when mismatch happens (not found
                the parameter in file storing model states of or receives a
                mismatch shape). Default: True.
            load_optimizer (bool, optional): If True, the stored optimizer
                states is restored. Otherwise, the optimizer states is initialized
                from scratch. Default: True.

        Returns:
            None

        Examples:

            .. code-block:: python
                import paddle
                import paddle.vision.transforms as T
                from paddle.distributed.fleet import auto
                from paddle.vision.datasets import MNIST

                transform = T.Compose([
                    T.Transpose(),
                    T.Normalize([127.5], [127.5])
                ])
                train_dataset = MNIST(mode='train', transform=transform)

                model = paddle.vision.models.LeNet()
                loss = paddle.nn.CrossEntropyLoss()
                optimizer = paddle.optimizer.Adam(
                    learning_rate=0.001, parameters=model.parameters())
                metrics = paddle.metric.Accuracy(topk=(1, 2))

                engine = auto.Engine(model, loss, optimizer, metrics)
                engine.fit(train_dataset,
                           epochs=1,
                           batch_size=64)
                engine.save("./my_model")
                engine.load("./my_model")

        )r  rO   loadr   r   )rm   r[  rU  Zload_optimizerrt   rt   ru   r\  l  s
   .zEngine.loadc                 C   s  | j jdkr| jd dS |dur|n| j}|dusJ d|| jvr1td|t| j	 | 
| |durV| j| sV| || _| || _| | | | n(t s\| jr`td| jd tj }| jru| js~| j| s~tdt| |\}}|j|fS )a  
        Get and Print cost, including memory of every rank,
        max memory among all ranks, and the global cost of one step based on
        communication cost(computation cost is 0 by default).
        In the future, the flops information of every rank and global cost including
        computation cost will be added.

        Args:
            inputs_spec(InputSpec): The specification of inputs. Default: None.
            labels_spec(InputSpec): The specification of labels. Default: None.
            mode (str): The engine mode must be in ["train", "predict", "eval"]. Default: None.

        Returns:
            Return the global execution time (ms) and max memory (B).

        r   zMThe cost will be calcudated in the search process when the auto mode is full.NzPlease set mode.z'The mode {} is not in accepted modes {}z\Please call `prepare()` or `fit()` or  `evaluate()` or  `predict()` before calling `cost()`.zThe program whose cost to be estimated must be static default program. Otherwise, please call `prepare()`before calling `cost()`.)rC   r   rF   rI   rf   r_   r   r>   r   keysr9  r   ra   rb   r   r   r   rj   r3   r   rP   r   r   r*   time)rm   r   r   r   rW  Zglobal_costZ
max_memoryrt   rt   ru   cost  sL   




zEngine.costc                 C      | j | j | j S r   )rZ   rf   rM   rm   rt   rt   ru   r        zEngine.main_programc                 C   r`  r   )r[   rf   rM   ra  rt   rt   ru   r     rb  zEngine.startup_programc                 C      | j | j S r   )rT   rf   ra  rt   rt   ru   r        zEngine.dist_contextc                 C   rc  r   )rW   rf   ra  rt   rt   ru   r     rd  zEngine.serial_main_programc                 C   rc  r   )rX   rf   ra  rt   rt   ru   r     rd  zEngine.serial_startup_programc                 C   rc  r   )r]   rf   ra  rt   rt   ru   r     rd  zEngine.fetch_varsc                 C      | j S r   )rc   ra  rt   rt   ru   r        zEngine.inputsc                 C   re  r   )rd   ra  rt   rt   ru   r     rf  zEngine.labels)NNNNNN)F)Nr   r   Nr  Nr   NNr   NNNr+   )Nr   Nr  NNr+   )Nr   NNNr+   )r   FFNr   TTr   Nr   Nr   N)r	  TTFTr   r   NNr   N)NNNNNNN)NNNN)Tr   FFNr   TTr   Nr   N)
NTTFFTr   r   NN)Nr   )T)TT)NNN)1r@   
__module____qualname____doc__rv   r   r   r   r   r   r   r   r   r   r   r   r   r   r.  r&  r1   r<  r?  rA  r  r:  r  rO  r   r@  r   r   r'  r  r  r9  r  rZ  r\  r_  propertyr   r   r   r   r   r   r   r   rt   rt   rt   ru   r-   =   s&   @
mG%-)
9t!
G
 .
q
d
/
*

7
I

A

E
4?






r-   )MrG   r<   rD   r   r   numpyr|   collectionsr   r3   Zpaddle.utilsr   r   r   Zpaddle.metricr   Zpaddle.staticr   Zpaddle.fluidr   r   Zpaddle.fluid.layers.utilsr	   Zpaddle.fluid.executorr
   r   Zpaddle.fluid.frameworkr   r   r   r   Zpaddle.fluid.dygraph.parallelr   Zpaddle.distributedr   r+  r   rX  r   helperr   rq   r   r   Z
planner_v2r   Zparallelizer_v2r   r   r   Z
dist_saverr   Zdist_loaderr   r   r  r   r   r   r   r    rr   r!   Z	interfacer"   r#   r$   r%   r&   r'   r(   r)   Zcost.estimate_costr*   Zutils.log_utilsr,   r-   rt   rt   rt   ru   <module>   sL   