o
    Nen                     @   s   d Z ddlZddlZddlZddlZddlmZ ddlm	Z	m
Z
mZ ddlmZ ddlmZ ddlmZ ddlmZ dd	lmZ ejjZej Zg d
ZdddZdddZdd ZdddZG dd deZG dd deZG dd deZ dS )z;
Functions for Auto SParsity (ASP) training and inference.
    N)dygraph_only)global_scopeprogram_guardlayers)ConstantInitializer)sparsity)core)#supported_layers_and_prune_func_map)_default_pruning)decorateprune_modelset_excluded_layersreset_excluded_layersc                 C   s$   |du r	t j }tj| |d dS )a  
    Set parameter name of layers which would not be pruned as sparse weights.

    Args:
        param_names (list of string): A list contains names of parameters.
        main_program (Program, optional): Program with model definition and its parameters.
                                          If None is given, then it would be set as `paddle.static.default_main_program().
                                          Default is None.
    Examples:
        1. Usage of Dynamic Graph

            .. code-block:: python

                import paddle

                class MyLayer(paddle.nn.Layer):
                    def __init__(self):
                        super(MyLayer, self).__init__()
                        self.conv1 = paddle.nn.Conv2D(
                            in_channels=3, out_channels=4, kernel_size=3, padding=2)
                        self.linear1 = paddle.nn.Linear(4624, 100)

                    def forward(self, img):
                        hidden = self.conv1(img)
                        hidden = paddle.flatten(hidden, start_axis=1)
                        prediction = self.linear1(hidden)
                        return prediction

                my_layer = MyLayer()
                optimizer = paddle.optimizer.SGD(
                    learning_rate=0.01, parameters=my_layer.parameters())

                # Need to set excluded layers before calling decorate
                paddle.incubate.asp.set_excluded_layers([my_layer.linear1.full_name()])

                optimizer = paddle.incubate.asp.decorate(optimizer)

        2. Usage of Static Graph

            .. code-block:: python

                import paddle

                paddle.enable_static()

                class MyLayer(paddle.nn.Layer):
                    def __init__(self):
                        super(MyLayer, self).__init__()
                        self.conv1 = paddle.nn.Conv2D(
                            in_channels=3, out_channels=4, kernel_size=3, padding=2)
                        self.linear1 = paddle.nn.Linear(4624, 100)

                    def forward(self, img):
                        hidden = self.conv1(img)
                        hidden = paddle.flatten(hidden, start_axis=1)
                        prediction = self.linear1(hidden)
                        return prediction

                main_program = paddle.static.Program()
                startup_program = paddle.static.Program()

                with paddle.static.program_guard(main_program, startup_program):
                    input_data = paddle.static.data(name='data', shape=[None, 3, 224, 224])
                    label = paddle.static.data(name='label', shape=[None, 100])
                    my_layer = MyLayer()
                    prob = my_layer(input_data)
                    loss = paddle.mean(paddle.nn.functional.square_error_cost(prob, label))

                    # Setup exluded layers out from ASP workflow.
                    # Please note, excluded_layers must be set before calling optimizer.minimize().
                    paddle.incubate.asp.set_excluded_layers([my_layer.linear1.full_name()], main_program)

                    optimizer = paddle.optimizer.SGD(learning_rate=0.1)
                    optimizer = paddle.static.amp.decorate(optimizer )
                    # Calling paddle.incubate.asp.decorate() to wrap minimize() in optimizer, which 
                    # will insert necessary masking operations for ASP workflow.
                    optimizer = paddle.incubate.asp.decorate(optimizer)
                    optimizer.minimize(loss, startup_program)
    Nparam_namesmain_program)paddlestaticdefault_main_program	ASPHelperr   r    r   QD:\Projects\ConvertPro\env\Lib\site-packages\paddle/fluid/contrib/sparsity/asp.pyr   '   s
   P

r   c                 C   s   t j| d dS )an  
    Reset exculded layers setting corresponding to :attr:`main_program`. If :attr:`main_program` 
    is None, then all configurations of excluded_layers would be cleaned.

    Args:
        main_program (Program, optional): Program with model definition and its parameters.
                                          If None is given, then this function would reset all excluded_layers.
                                          Default is None.
    Examples:
        1. Usage of Dynamic Graph

            .. code-block:: python

                import paddle

                class MyLayer(paddle.nn.Layer):
                    def __init__(self):
                        super(MyLayer, self).__init__()
                        self.conv1 = paddle.nn.Conv2D(
                            in_channels=3, out_channels=4, kernel_size=3, padding=2)
                        self.linear1 = paddle.nn.Linear(4624, 100)

                    def forward(self, img):
                        hidden = self.conv1(img)
                        hidden = paddle.flatten(hidden, start_axis=1)
                        prediction = self.linear1(hidden)
                        return prediction

                my_layer = MyLayer()
                optimizer = paddle.optimizer.SGD(
                    learning_rate=0.01, parameters=my_layer.parameters())

                # Need to set excluded layers before calling decorate
                paddle.incubate.asp.set_excluded_layers([my_layer.linear1.full_name()])
                # Reset excluded_layers, all supported layers would be included into Automatic SParsity's workflow.
                # Please note, reset_excluded_layers also must be called before calling sparsity.decorate().
                paddle.incubate.asp.reset_excluded_layers()

                optimizer = paddle.incubate.asp.decorate(optimizer)

        2. Usage of Static Graph

            .. code-block:: python

                import paddle

                paddle.enable_static()

                class MyLayer(paddle.nn.Layer):
                    def __init__(self):
                        super(MyLayer, self).__init__()
                        self.conv1 = paddle.nn.Conv2D(
                            in_channels=3, out_channels=4, kernel_size=3, padding=2)
                        self.linear1 = paddle.nn.Linear(4624, 100)

                    def forward(self, img):
                        hidden = self.conv1(img)
                        hidden = paddle.flatten(hidden, start_axis=1)
                        prediction = self.linear1(hidden)
                        return prediction

                main_program = paddle.static.Program()
                startup_program = paddle.static.Program()

                with paddle.static.program_guard(main_program, startup_program):
                    input_data = paddle.static.data(name='data', shape=[None, 3, 224, 224])
                    label = paddle.static.data(name='label', shape=[None, 100])
                    my_layer = MyLayer()
                    prob = my_layer(input_data)
                    loss = paddle.mean(paddle.nn.functional.square_error_cost(prob, label))

                    # Setup exluded layers out from ASP workflow.
                    # Please note, excluded_layers must be set before calling optimizer.minimize().
                    paddle.incubate.asp.set_excluded_layers([my_layer.linear1.full_name()], main_program)
                    # Reset excluded_layers, all supported layers would be included into Automatic SParsity's workflow.
                    # Please note, reset_excluded_layers also must be called before calling optimizer.minimize().
                    paddle.incubate.asp.reset_excluded_layers(main_program)

                    optimizer = paddle.optimizer.SGD(learning_rate=0.1)
                    optimizer = paddle.static.amp.decorate(optimizer )
                    # Calling paddle.incubate.asp.decorate() to wrap minimize() in optimizer, which 
                    # will insert necessary masking operations for ASP workflow.
                    optimizer = paddle.incubate.asp.decorate(optimizer)
                    optimizer.minimize(loss, startup_program)
    r   N)r   r   r   r   r   r   r   }   s   Vr   c                 C   s
   t | S )a  
    Wrap the given optimizer as a OptimizerWithSparsityGuarantee,
    If runnig with dynamic graph mode. ASP would creates mask variables for supported parameters.
    Else if in static graph mode, ASP would creates mask variables and inserts necessary ops 
    when calling minimize()

    Args:
        optimizer (Optimizer): A Optimizer used for training.
    Returns:
        OptimizerWithSparsityGuarantee: A wrapper for ASP to decorate `minimize` function of the given optimizer.
    Examples:
        1. Usage of Dynamic Graph

            .. code-block:: python

                import paddle

                class MyLayer(paddle.nn.Layer):
                    def __init__(self):
                        super(MyLayer, self).__init__()
                        self.conv1 = paddle.nn.Conv2D(
                            in_channels=3, out_channels=4, kernel_size=3, padding=2)
                        self.linear1 = paddle.nn.Linear(4624, 32)
                        self.linear2 = paddle.nn.Linear(32, 32)
                        self.linear3 = paddle.nn.Linear(32, 10)

                    def forward(self, img):
                        hidden = self.conv1(img)
                        hidden = paddle.flatten(hidden, start_axis=1)
                        hidden = self.linear1(hidden)
                        hidden = self.linear2(hidden)
                        prediction = self.linear3(hidden)
                        return prediction

                my_layer = MyLayer()
                optimizer = paddle.optimizer.SGD(
                    learning_rate=0.01, parameters=my_layer.parameters())

                # Calling paddle.incubate.asp.decorate() to wrap step() in optimizer, which 
                # will apply necessary masking operations for ASP workflow.
                # In dynamic graph mode, ASP would create related mask variables during decoration.
                optimizer = paddle.incubate.asp.decorate(optimizer)

        2. Usage of Static Graph

            .. code-block:: python

                import paddle

                paddle.enable_static()

                class MyLayer(paddle.nn.Layer):
                    def __init__(self):
                        super(MyLayer, self).__init__()
                        self.conv1 = paddle.nn.Conv2D(
                            in_channels=3, out_channels=4, kernel_size=3, padding=2)
                        self.linear1 = paddle.nn.Linear(4624, 100)

                    def forward(self, img):
                        hidden = self.conv1(img)
                        hidden = paddle.flatten(hidden, start_axis=1)
                        prediction = self.linear1(hidden)
                        return prediction

                main_program = paddle.static.Program()
                startup_program = paddle.static.Program()

                with paddle.static.program_guard(main_program, startup_program):
                    input_data = paddle.static.data(name='data', shape=[None, 3, 224, 224])
                    label = paddle.static.data(name='label', shape=[None, 100])
                    my_layer = MyLayer()
                    prob = my_layer(input_data)
                    loss = paddle.mean(paddle.nn.functional.square_error_cost(prob, label))

                    optimizer = paddle.optimizer.SGD(learning_rate=0.1)
                    # Calling paddle.incubate.asp.decorate() to wrap minimize() in optimizer, which 
                    # will insert necessary masking operations for ASP workflow.
                    # In static graph mode, ASP creates related mask variables 
                    # during minimize().
                    optimizer = paddle.incubate.asp.decorate(optimizer)
                    optimizer.minimize(loss, startup_program)
    )r   r   )	optimizerr   r   r   r      s   
Sr         mask_1dTc           
      C   s   t j }t |}tjjtjjtjjd}||v sJ dd}t	| t j
jr+tj}n3t	| t jjrUtj}t| drT| jd dkrTt j rTttjdd}	t |	}n	td	t| ||| |||| |d
S )a  
    Pruning parameters of supported layers in :attr:`model` via 
    specified mask generation function given by :attr:`mask_algo`. This 
    function supports both training and inference controlled by :attr:`with_mask`.
    If :attr:`with_mask` is True, it would also prune parameter related ASP mask Variables,
    else only prunes parameters.

    *Note*: (Static graph mode) If calling this function with :attr:`with_mask`, it should call `OptimizerWithSparsityGuarantee.minimize` 
    and initialization (`exe.run(startup_program`)) before (For successfully obtain mask Variable). 
    Typically set `with_mask` as true for training (have called `OptimizerWithSparsityGuarantee.minimize`) and false for 
    inference only. To obtain OptimizerWithSparsityGuarantee, please see `paddle.incubate.asp.decoreate()`.

    Args:
        model (Program|nn.Layer): Program with model definition and its parameters, or a object of `paddle.nn.Layer`.
        n (int, optional): n of `n:m` sparse pattern. Default is 2.
        m (int, optional): m of `n:m` sparse pattern. Default is 4.
        mask_algo (string, optional): The function name to generate spase mask. Default is `mask_1d`.
                                      The vaild inputs should be one of 'mask_1d', 'mask_2d_greedy' and 'mask_2d_best'.
        with_mask (bool, optional): To prune mask Variables related to parameters or not. Ture is purning also, False is not. Default is True.
    Returns:
        dictionary: A dictionary with key: `parameter name` (string) and value: its corresponding mask Variable.
    Examples:
        1. Usage of Dynamic Graph

            .. code-block:: python

                import paddle
                import numpy as np

                class MyLayer(paddle.nn.Layer):
                    def __init__(self):
                        super(MyLayer, self).__init__()
                        self.conv1 = paddle.nn.Conv2D(
                            in_channels=3, out_channels=4, kernel_size=3, padding=2)
                        self.linear1 = paddle.nn.Linear(4624, 32)
                        self.linear2 = paddle.nn.Linear(32, 32)
                        self.linear3 = paddle.nn.Linear(32, 10)

                    def forward(self, img):
                        hidden = self.conv1(img)
                        hidden = paddle.flatten(hidden, start_axis=1)
                        hidden = self.linear1(hidden)
                        hidden = self.linear2(hidden)
                        prediction = self.linear3(hidden)
                        return prediction

                my_layer = MyLayer()
                loss_fn = paddle.nn.MSELoss(reduction='mean')

                optimizer = paddle.optimizer.SGD(
                    learning_rate=0.01, parameters=my_layer.parameters())

                # Calling paddle.incubate.asp.decorate() to wrap step() in optimizer, which 
                # will apply necessary masking operations for ASP workflow.
                # In dynamic graph mode, ASP would create related mask variables during decoration.
                optimizer = paddle.incubate.asp.decorate(optimizer)

                # Must call paddle.incubate.asp.decorate() first before calling paddle.incubate.asp.prune_model()
                paddle.incubate.asp.prune_model(my_layer, mask_algo='mask_2d_best')

                for i in range(10):
                    imgs = paddle.to_tensor(
                        np.random.randn(64, 3, 32, 32),
                        dtype='float32', stop_gradient=False)
                    labels = paddle.to_tensor(
                        np.random.randint(10, size=(64, 1)),
                        dtype='float32', stop_gradient=False)
                    output = my_layer(imgs)
                    loss = loss_fn(output, labels)
                    loss.backward()
                    optimizer.step()
                    optimizer.clear_grad()

        2. Usage of Static Graph

            .. code-block:: python

                import paddle
                import numpy as np

                paddle.enable_static()

                class MyLayer(paddle.nn.Layer):
                    def __init__(self):
                        super(MyLayer, self).__init__()
                        self.conv1 = paddle.nn.Conv2D(
                            in_channels=3, out_channels=4, kernel_size=3, padding=2)
                        self.linear1 = paddle.nn.Linear(4624, 32)
                        self.linear2 = paddle.nn.Linear(32, 32)
                        self.linear3 = paddle.nn.Linear(32, 10)

                    def forward(self, img):
                        hidden = self.conv1(img)
                        hidden = paddle.flatten(hidden, start_axis=1)
                        hidden = self.linear1(hidden)
                        hidden = self.linear2(hidden)
                        prediction = self.linear3(hidden)
                        return prediction

                main_program = paddle.static.Program()
                startup_program = paddle.static.Program()

                with paddle.static.program_guard(main_program, startup_program):
                    input_data = paddle.static.data(name='data', shape=[None, 3, 32, 32])
                    label = paddle.static.data(name='label', shape=[None, 1])
                    my_layer = MyLayer()
                    prob = my_layer(input_data)
                    loss = paddle.mean(paddle.nn.functional.square_error_cost(prob, label))

                    optimizer = paddle.optimizer.SGD(learning_rate=0.1)
                    # Calling paddle.incubate.asp.decorate() to wrap minimize() in optimizer, which 
                    # will insert necessary masking operations for ASP workflow.
                    # In static graph mode, ASP creates related mask variables 
                    # during minimize().
                    optimizer = paddle.incubate.asp.decorate(optimizer)
                    optimizer.minimize(loss, startup_program)

                device = paddle.device.get_device()
                place = paddle.set_device(device)

                exe = paddle.static.Executor(place)
                exe.run(startup_program)

                # Must call exe.run(startup_program) first before calling paddle.asp.prune_model()
                paddle.incubate.asp.prune_model(my_layer, mask_algo='mask_2d_best')
                # it also be accepted to call 
                # paddle.incubate.asp.prune_model(main_program, mask_algo='mask_2d_best')

                for i in range(10):
                    imgs = np.random.randn(64, 3, 32, 32).astype('float32')
                    labels = np.random.randint(10, size=(64, 1)).astype('float32')
                    exe.run(main_program, feed={'data':imgs, 'label':labels})
    )r   Zmask_2d_greedyZmask_2d_bestzNThe "mask_algo" should be one of ["mask_1d", "mask_2d_greedy", "mask_2d_best"]Ndistributed_info_Zsharding_degree   ZFLAGS_selected_gpusr   zDmodel should be paddle.nn.Layer or paddle.static.Program, but got {}nm	mask_algo	with_mask)r   deviceZ
get_deviceZ
set_devicer   MaskAlgoMASK_1DZMASK_2D_GREEDYZMASK_2D_BEST
isinstancennZLayerr   prune_model_by_layerr   ZProgramprune_model_by_programhasattrr   fluidZis_compiled_with_cudaintosenvirongetZ	CUDAPlace	TypeErrorformattype)
modelr    r!   r"   r#   r$   placeZMaskAlgo_mapping
prune_funcZgpu_idr   r   r   r   ,  sB    





r   c                   @   s\   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d Ze	dd Z
e	dd Ze	dd ZdS )ProgramASPInfoa$  
    ProgramASPInfo is a container to keep ASP relevant information of Pragrom. It contains three inner-variables:
    1. __mask_vars (Dictionary): Key is parameter's name and vaule is its corresponding sparse mask Variable object, which is created by `ASPHelper.create_mask_variables`.
    2. __masks (Dictionary): Key is parameter's name and vaule is its corressponding sparse mask Numpy array, which is created by `ASPHelper.prune_model`.
    3. __excluded_layers (List): It stores name of layers which should not involve into ASP workflow.
    c                 C   s   i | _ i | _g | _d S N)_ProgramASPInfo__mask_vars_ProgramASPInfo__masks _ProgramASPInfo__excluded_layersselfr   r   r   __init__  s   
zProgramASPInfo.__init__c                 C      || j |< d S r8   r9   r=   
param_namevarr   r   r   update_mask_vars     zProgramASPInfo.update_mask_varsc                 C   r?   r8   r:   rA   r   r   r   update_masks  rE   zProgramASPInfo.update_masksc                 C   s   | j t| d S r8   )r;   extendcopydeepcopy)r=   r   r   r   r   update_excluded_layers  s   z%ProgramASPInfo.update_excluded_layersc                 C   s
   g | _ d S r8   r;   r<   r   r   r   r        
z$ProgramASPInfo.reset_excluded_layersc                 C      | j S r8   r@   r<   r   r   r   	mask_vars     zProgramASPInfo.mask_varsc                 C   rN   r8   rF   r<   r   r   r   masks  rP   zProgramASPInfo.masksc                 C   rN   r8   rL   r<   r   r   r   excluded_layers  rP   zProgramASPInfo.excluded_layersN)__name__
__module____qualname____doc__r>   rD   rG   rK   r   propertyrO   rQ   rR   r   r   r   r   r7     s    

r7   c                   @   s   e Zd ZdZdZdZi Zedd Zed$ddZ	e
d	d
 ZedddejjdfddZeddejjdfddZe
dd Ze
dd Zedd Zedd Zedd Ze				d%ddZeedd Zed d! Zed"d# ZdS )&r   a0  
    ASPHelper is a collection of Auto SParsity (ASP) functions to enable 

    1. training models with weights in 2:4 sparse pattern on FP16 or 1:2 sparse pattern on FP32 from scratch.
    2. pruning well-trained models into 2:4 sparse pattern on FP16 or 1:2 sparse pattern on FP32 for fine-tuning.
    Zasp_maskZw_c                 C   s   |  |}|| dS )z
        This is the implementation of `sparsity.set_excluded_layers`, for details please see explanation in `sparsity.set_excluded_layers`.
        N)_get_program_asp_inforK   )clsr   r   asp_infor   r   r   r     s   
zASPHelper.set_excluded_layersNc                 C   s8   |du r| j D ]	}| j |   qdS | |  dS )z
        This is the implementation of `sparsity.reset_excluded_layers`, for details please see explanation in `sparsity.reset_excluded_layers`.
        N)_ASPHelper__asp_infor   rX   )rY   r   progr   r   r   r     s
   
zASPHelper.reset_excluded_layersc                 C   s4   t  rt j }t j }t||| j t| S )z
        This is the implementation of `sparsity.decorate`, for details please see explanation in `sparsity.decorate`.
        )	r   in_dynamic_moder   r   default_startup_programr   _create_mask_variables_parameter_listOptimizerWithSparsityGuarantee)r   	main_progZstartup_progr   r   r   r     s   

zASPHelper.decorater   r   Tc                 C   s  |du r	t j }| |}|  D ]i}t||jr}t	 
|j }	t|	}
t|j}||
||||j\}}||
j}|	|| |rvt	 
t|j}|duscJ dt|j| }|t|j}||| ||j| q|j S )
        This is the implementation of `sparsity.prune_model`, for details please see explanation in `sparsity.prune_model`.
        NzCannot find {} variable, please call optimizer.minimize (paddle.sparsity.decorate(optimizer).minimize(loss) and initialization (exe.run(startup_program)) first!)r   r   r   rX   global_blockall_parametersr   _is_supported_layernamer   Zfind_varZ
get_tensornparray_get_prune_func_by_nameastypedtypeset_get_mask_namer2   rG   rQ   rI   )rY   r5   r   r    r!   r"   r#   rZ   paramZweight_tensorweight_nparrayr6   weight_pruned_nparrayweight_sparse_maskweight_mask_paramZweight_mask_tensorr   r   r   r*   *  s<   






z ASPHelper.prune_model_by_programc                 C   s  t  rft j }| |}| D ]N}	t||	jr`|		 }
t
|	j}||
||||	j\}}||
j}|	| |rY|j|	jd}|dusTJ dt|	j|| ||	j| q|j S d}| D ]}	|	jj}ql|dus{J dtj||||||dS )rc   NzZCannot find {} variable, please call sparsity.decorate() to decorate your optimizer first!z6Cannot get paddle.static.Program from Paddle.nn.Layer.r   )r   r]   r   r   rX   
parametersr   rf   rg   numpyrj   rk   rl   	set_valuerO   r0   r2   rn   rG   rQ   rI   blockprogramr*   )rY   r5   layerr    r!   r"   r#   r   rZ   ro   rp   r6   rq   rr   rs   Ztarget_programr   r   r   r)   U  sL   







zASPHelper.prune_model_by_layerc                 C   s   | d t j S )z
        Return mask name by given parameter name :attr:`param_name`.

        Args:
            param_name (string): The name of parameter.
        Returns:
            string: The mask name of :attr:`param_name`.
        .)r   MASK_APPENDDED_NAME)rB   r   r   r   rn     s   
zASPHelper._get_mask_namec                 C   s:   g }|    D ]}|jd}tj|vr|| q|S )aF  
        Get all parameters's Variables in :attr:`main_program` but excluded ASP mask Variables.

        Args:
            main_program (Program): Program with model definition and its parameters.
        Returns:
            list: A list of parameter Variables in :attr:`main_program` (excluded ASP mask Variables).
        rz   )rd   re   rg   splitr   r{   append)r   Zvar_listro   param_name_listr   r   r   _get_not_ASP_relevant_vars  s   


z$ASPHelper._get_not_ASP_relevant_varsc                 C   s    || j vrt | j |< | j | S r8   )r[   r7   )rY   r   r   r   r   rX     s   

zASPHelper._get_program_asp_infoc                 C   s   | d}tj|v rdS | |jD ]	}||v r dS q|tv r"dS t|dkr*dS |d }|d }|d|d }tj|vrBdS |tv sJ|tv rLdS dS )a  
        Verify if given :attr:`param_name` is supported by ASP.

        Args:
            param_name (string): The name of parameter.
        Returns:
            bool: True if it is supported, else False.
        Examples:
            .. code-block:: python

              from paddle.static.sparsity.asp import ASPHelper

              main_program = paddle.static.Program()
              startup_program = paddle.static.Program()

              with paddle.static.program_guard(main_program, startup_program):
                  input_data = paddle.static.data(name='data', shape=[None, 128])
                  fc = paddle.static.nn.fc(x=input_data, num_flatten_dims=-1, size=32, activation=None)

              for param in main_program.global_block().all_parameters():
                  ASPHelper._is_supported_layer(main_program, param.name)
              # fc_0.w_0 -> True
              # fc_0.b_0 -> False
        rz   FTr   r   N_)	r|   r   r{   rX   rR   r	   lenrfindPADDLE_WEIGHT_SUFFIX)rY   r   rB   r~   ry   param_name_no_weight_suffixZparam_type_suffix
layer_namer   r   r   rf     s,   


zASPHelper._is_supported_layerc                 C   sX   t |d }|dd }|d u rt |d }|d u r*|d |d }t |t}|S )Nrz   r   r   )r	   r0   r|   r   r
   )rY   rB   funcr   r   r   r   r   rj     s   z!ASPHelper._get_prune_func_by_namec           
      C   sh   |du r|j j}|du rtj }|j||||d\}}dd |D }	| |||	 | ||	 ||fS )a  
        This function is a decorator of `minimize` function in `Optimizer`.
        There are three steps:

        1. Call :attr:`optimizer`.minimize(:attr:`loss`)
        2. Create sparse mask Tensors according to supported layers in :attr:`main_program`.
        3. Insert masking ops in the end of parameters update.

        *Note*: Please use `ASP.decorate` instead when applying distributed training with `Fleet`. 
        (Due to there is a invisiable graphs optimization in `Fleet.minimize()` which make training graph 
        cannot be modified anymore.)

        Args:
            optimizer (Optimizer): A Optimizer used for training.
            loss (Variable): A Variable containing the value to minimize.
            main_program (Program, optional): Program with model definition and its parameters. Default is `loss.block.program`.
            startup_program (Program, optional): Program for initializing parameters in `parameter_list`. Default is `paddle.static.default_startup_program()`.
            parameter_list (Iterable, optional): Iterable of `Variable` or `Variable.name` to update to minimize `loss`. The default value is None, at this time all parameters will be updated.
            no_grad_set (set, optional): Set of `Variable  or `Variable.name` that don't need to be updated. The default value is None.
        Returns:
            list: operators from :attr:`optimizer`.minimize(:attr:`loss`).
            list: pairs of parameters and their gradients.
        N)no_grad_setc                 S   s   g | ]}|d  qS )r   r   ).0Zpgr   r   r   
<listcomp>  s    z'ASPHelper._minimize.<locals>.<listcomp>)rw   rx   r   r   r^   minimizer_   _insert_sparse_mask_ops)
rY   r   lossr   startup_programparameter_listr   Zoptimizer_opsZparams_and_gradsZparams_onlyr   r   r   	_minimize  s   

zASPHelper._minimizec                 C   sR   |   tj }tjj  t||j	 W d   dS 1 s"w   Y  dS a  
        This function is a decorator of `step` function in `Optimizer`.
        There are three steps:

        1. Call :attr:`optimizer`.step()
        2. Mask parameters with sparse masks.

        *Note*: Please use `ASP.decorate` instead when applying distributed training with `Fleet`. 
        (Due to there is a invisiable graphs optimization in `Fleet.minimize()` which make training graph 
        cannot be modified anymore.)

        Args:
            optimizer (Optimizer): A Optimizer used for training.
        N)
stepr   r   r   r,   ZdygraphZno_gradr   r   r`   )rY   r   rb   r   r   r   _step"  s   
"zASPHelper._stepc              	   C   s   |  |}t||: |D ].}t||jr;|j|jvr;tjt|j|j	|j
tddd}d|_d|_||j| qW d   dS 1 sGw   Y  dS )a  
        Create sparse mask Tensors according to supported layers in :attr:`main_program`.
        This function is called in second step of `ASPHelper._minimize`

        Args:
            main_program (Program): Program with model definition and its parameters.
            startup_program (Program): Program for initializing parameters.
            params (list): Variable parameters.
        g      ?)value)rg   shaperl   Zdefault_initializerTFN)rX   r   r   rf   rg   rO   r   Zcreate_parameterrn   r   rl   r   Zstop_gradientZ	trainablerD   )rY   r   r   paramsrZ   ro   Z
mask_paramr   r   r   r_   9  s"   

"z ASPHelper._create_mask_variablesc                 C   sb   |  }| |}|D ]#}|j|jv r.|jd||j|j dd|iddddtttjid qd	S )
a!  
        Insert masking ops in the end of parameters update.
        This function is called in third step of `ASPHelper._minimize`

        Args:
            main_program (Program): Program with model definition and its parameters.
            params (list): Variable parameters.
        Zelementwise_mul)XYZOutZaxisZ
use_mkldnnF)r3   ZinputsZoutputsattrsN)	rd   rX   rg   rO   Z	append_opOP_ROLE_KEYr-   OpRoleZOptimize)rY   r   r   rw   rZ   ro   r   r   r   r   R  s    



z!ASPHelper._insert_sparse_mask_opsr8   )NNNN)rS   rT   rU   rV   r{   r   r[   classmethodr   r   staticmethodr   r   r%   r&   r*   r)   rn   r   rX   rf   rj   r   r   r   r_   r   r   r   r   r   r     s^    


*5



6
,
r   c                   @   sT   e Zd ZdZdd Zdd Z			dddZed	d
 Zedd Z	edd Z
dS )ra   a  
    OptimizerWithSparsityGuarantee is a wrapper to decorate `minimize` function of given optimizer by `_minimize` of ASPHelper.
    The decorated `minimize` function would do three things (exactly same as `ASPHelper._minimize`):
    1. Call `minimize` function of given optimizer.
    2. Call `ASPHelper._create_mask_variables` to create mask Variables.
    3. Call `ASPHelper._insert_sparse_mask_ops` to insert weight masking ops in the end of `loss`'s Program.
    c                 C   s
   || _ d S r8   )
_optimizer)r=   r   r   r   r   r>   v  rM   z'OptimizerWithSparsityGuarantee.__init__c                 C   s   t | j|S r8   )getattrr   )r=   itemr   r   r   __getattr__y  s   z*OptimizerWithSparsityGuarantee.__getattr__Nc                 C   s   t j| j||||dS )a0  
        This function is to call `ASPHelper.minimize()` and return its return

        Args:
            loss (Variable): A Variable containing the value to minimize.
            startup_program (Program, optional): Program for initializing parameters in `parameter_list`. Default is `paddle.static.default_startup_program()`.
            parameter_list (Iterable, optional): Iterable of `Variable` or `Variable.name` to update to minimize `loss`. The default value is None, at this time all parameters will be updated.
            no_grad_set (set, optional): Set of `Variable  or `Variable.name` that don't need to be updated. The default value is None.
        Returns:
            list: operators from :attr:`optimizer`.minimize(:attr:`loss`).
            list: pairs of parameters and their gradients.
        )r   r   r   )r   r   r   )r=   r   r   r   r   r   r   r   r   |  s   z'OptimizerWithSparsityGuarantee.minimizec                 C   s   t | j dS r   )r   r   r   r<   r   r   r   r     s   z#OptimizerWithSparsityGuarantee.stepc                 C   sF   | j  }ttj }|j D ]\}}|	t
||i q|S )z
        This function is a decorator of `state_dict` function in `Optimizer`.

        Returns:
            state_dict(dict) : dict contains all the Tensor used by optimizer
        )r   
state_dictr   rX   r   r   r   rO   itemsupdatern   )r=   r   rZ   rB   rC   r   r   r   r     s   
z)OptimizerWithSparsityGuarantee.state_dictc                 C   sn   t tj }|j D ]#\}}t |}||v s!J d||	||  |
||  q| j|S )z
        This function is a decorator of `set_state_dict` function in `Optimizer`.
        Args: 
            state_dict(dict) : Dict contains all the Tensor needed by optimizer
        Return:
            None
        zThe {} is not found.)r   rX   r   r   r   rO   r   rn   r2   rv   rG   ru   r   set_state_dict)r=   r   rZ   rB   rC   Zparam_mask_namer   r   r   r     s   	

z-OptimizerWithSparsityGuarantee.set_state_dict)NNN)rS   rT   rU   rV   r>   r   r   r   r   r   r   r   r   r   r   ra   m  s    


ra   r8   )r   r   r   T)!rV   r.   rI   ru   rh   r   Zpaddle.fluid.frameworkr   Zpaddle.fluidr   r   r   Zpaddle.fluid.initializerr   Zpaddle.fluid.contribr   r   Z2paddle.fluid.contrib.sparsity.supported_layer_listr	   r
   Zop_proto_and_checker_makerr   ZkOpRoleAttrNamer   __all__r   r   r   r   objectr7   r   ra   r   r   r   r   <module>   s2   


VY
V )&  u