o
    Ne-                     @   s   d dl mZ d dlZddlmZ ddlmZmZmZ ddlmZ d dl	m
Z
mZ g dZG d	d
 d
eZG dd deZG dd deZeZeZdS )    )print_functionN   )	framework)_non_static_mode_varbase_creatorin_dygraph_mode)core)_C_ops_legacy_C_ops)L1DecayL2DecayL1DecayRegularizerL2DecayRegularizerc                   @   s(   e Zd ZdZdd Zdd Zdd ZdS )	WeightDecayRegularizera  Base class for weight decay regularizers

    Defines the common interface of weight-decay regularizers.
    Weight-decay regularizers are added only during the backward
    pass for faster regularization. They add operations to the network
    that correspond to gradient of the regularization function.
    Users should not use this class directly, but need to use one
    of its implementations
    c                 C   s   d S N selfr   r   HD:\Projects\ConvertPro\env\Lib\site-packages\paddle/fluid/regularizer.py__init__%   s   zWeightDecayRegularizer.__init__c                 C      t  )zAAdd corresponding weight decay operations to the network
        NotImplementedError)r   paramgradblockr   r   r   __call__(      zWeightDecayRegularizer.__call__c                 C   r   )zDebug string
        r   r   r   r   r   __str__-   r   zWeightDecayRegularizer.__str__N)__name__
__module____qualname____doc__r   r   r   r   r   r   r   r      s
    
r   c                       2   e Zd ZdZd	 fdd	Zdd Zdd Z  ZS )
r   a:   
    Implement the L2 Weight Decay Regularization, which helps to prevent the model over-fitting.

    It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_fluid_optimizer_SGDOptimizer` ). 
    When set in ``ParamAttr`` , it only takes effect for trainable parameters in this layer. When set in 
    ``optimizer`` , it takes effect for all trainable parameters. When set together, ``ParamAttr`` has 
    higher priority than ``optimizer`` .
    
    In the implementation, the formula of L2 Weight Decay Regularization is as follows:

    .. math::

        L2WeightDecay = reg\_coeff * parameter

    Args:
        regularization_coeff(float, optional): regularization coeff. Default:0.0

    Examples:
        .. code-block:: python

            # Example1: set Regularizer in optimizer
            import paddle.fluid as fluid

            main_prog = fluid.Program()
            startup_prog = fluid.Program()
            with fluid.program_guard(main_prog, startup_prog):
                data = fluid.layers.data(name='image', shape=[3, 28, 28], dtype='float32')
                label = fluid.layers.data(name='label', shape=[1], dtype='int64')
                hidden = fluid.layers.fc(input=data, size=128, act='relu')
                prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
                loss = fluid.layers.cross_entropy(input=prediction, label=label)
                avg_loss = fluid.layers.mean(loss)
            optimizer = fluid.optimizer.Adagrad(
                learning_rate=1e-4,
                regularization=fluid.regularizer.L2Decay(
                    regularization_coeff=0.1))
            optimizer.minimize(avg_loss)


            # Example2: set Regularizer both in ParamAttr and optimizer
            import paddle.fluid as fluid

            l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1)
            l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1)
            x = fluid.layers.uniform_random([3,4])
            
            # set L1 regularization in fluid.ParamAttr
            w_param = fluid.ParamAttr(regularizer=l1)
            hidden1 = fluid.layers.fc(x, 8, param_attr=w_param)  # fc_0.w_0(L1), fc_0.b_0
            hidden2 = fluid.layers.fc(hidden1, 16, param_attr=w_param)   # fc_1.w_0(L1), fc_1.b_0
            predict = fluid.layers.fc(hidden2, 32)    # fc_3.w_0, fc_3.b_0
            avg_loss = fluid.layers.mean(predict)

            # set L2 regularization in optimizer
            optimizer = fluid.optimizer.SGD(learning_rate=1e-4, regularization=l2)
            optimizer.minimize(avg_loss)
            
            # it will Print Message:
            # Regularization of [fc_0.w_0, fc_1.w_0] have been set by ParamAttr or WeightNormParamAttr already. 
            # So, the Regularization of Optimizer will not take effect for these parameters!

            c                    $   |d usJ t t|   || _d S r   )superr   r   _regularization_coeffr   Zregularization_coeff	__class__r   r   r   s      
zL2DecayRegularizer.__init__c                 C   s   t |tjsJ t |tjsJ t r)t r!t|| jddS t	|d| jS |j
|j|j|jd}|jdd|id|id| jid |S )aM  Add L2 weight decay ops to network

        Adds L2 weight decay ops.
        L2WeightDecay = reg_coeff * parameter

        Args:
            param: parameter variable for which regularization is applied
            block: block in which variable is to be created

        Returns:
            new variable for weight decay
        r$   Tscaledtypeshape	lod_levelXOuttypeinputsoutputsattrs)
isinstancer   VariableBlockr   r   r	   r,   r'   r
   
create_varr.   r/   r0   	append_op)r   r   r   r   decayr   r   r   r   x   s(   zL2DecayRegularizer.__call__c                 C   
   d| j  S )Nz L2Decay, regularization_coeff=%fr'   r   r   r   r   r         
zL2DecayRegularizer.__str__r$   r   r    r!   r"   r   r   r   __classcell__r   r   r)   r   r   3   s
    ?$r   c                       r#   )
r   aL  
    Implement the L1 Weight Decay Regularization, which encourages the weights to be sparse.
    
    It can be set in :ref:`api_fluid_ParamAttr` or ``optimizer`` (such as :ref:`api_fluid_optimizer_SGDOptimizer` ). 
    When set in ``ParamAttr`` , it only takes effect for trainable parameters in this layer. When set in 
    ``optimizer`` , it takes effect for all trainable parameters. When set together, ``ParamAttr`` has 
    higher priority than ``optimizer`` .
    
    In the implementation, the formula of L1 Weight Decay Regularization is as follows:
	
    .. math::

        L1WeightDecay = reg\_coeff * sign(parameter)

    Args:
        regularization_coeff(float, optional): regularization coeff. Default:0.0.
	
    Examples:
        .. code-block:: python

            # Example1: set Regularizer in optimizer
            import paddle.fluid as fluid

            main_prog = fluid.Program()
            startup_prog = fluid.Program()
            with fluid.program_guard(main_prog, startup_prog):
                data = fluid.layers.data(name='image', shape=[3, 28, 28], dtype='float32')
                label = fluid.layers.data(name='label', shape=[1], dtype='int64')
                hidden = fluid.layers.fc(input=data, size=128, act='relu')
                prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
                loss = fluid.layers.cross_entropy(input=prediction, label=label)
                avg_loss = fluid.layers.mean(loss)
            optimizer = fluid.optimizer.Adagrad(
                learning_rate=1e-4,
                regularization=fluid.regularizer.L1DecayRegularizer(
                    regularization_coeff=0.1))
            optimizer.minimize(avg_loss)
 

            # Example2: set Regularizer both in ParamAttr and optimizer
            import paddle.fluid as fluid

            l1 = fluid.regularizer.L1Decay(regularization_coeff=0.1)
            l2 = fluid.regularizer.L2Decay(regularization_coeff=0.1)
            x = fluid.layers.uniform_random([3,4])
            
            # set L1 regularization in fluid.ParamAttr
            w_param = fluid.ParamAttr(regularizer=l1)
            hidden1 = fluid.layers.fc(x, 8, param_attr=w_param)  # fc_0.w_0(L1), fc_0.b_0
            hidden2 = fluid.layers.fc(hidden1, 16, param_attr=w_param)  # fc_1.w_0(L1), fc_1.b_0
            predict = fluid.layers.fc(hidden2, 32)   # fc_3.w_0, fc_3.b_0
            avg_loss = fluid.layers.mean(predict)

            # set L2 regularization in optimizer
            optimizer = fluid.optimizer.SGD(learning_rate=1e-4, regularization=l2)
            optimizer.minimize(avg_loss)
            
            # it will Print Message:
            # Regularization of [fc_0.w_0, fc_1.w_0] have been set by ParamAttr or WeightNormParamAttr already. 
            # So, the Regularization of Optimizer will not take effect for these parameters!

    r$   c                    r%   r   )r&   r   r   r'   r(   r)   r   r   r      r+   zL1DecayRegularizer.__init__c                 C   s   t |tjsJ t |tjsJ t r'|j|j|jd}|j|j|jd}n|j|j|j|jd}|j|j|j|jd}t	 rNt
|}t
|| jddS |jdd|id|id |jd	d|id|id	| jid
 |S )aS  Add L1 weight decay ops to network

        Adds L1 weight decay ops.
        L1WeightDecay = reg_coeff * sign(parameter)

        Args:
            param: parameter variable for which regularization is applied
            block: block in which variable is to be created

        Returns:
            new variable for weight decay
        )r.   r/   r-   r$   Tsignr1   r2   )r4   r5   r6   r,   r3   )r8   r   r9   r:   r   r;   r.   r/   r0   r   r	   rD   r,   r'   r<   )r   r   r   r   rD   r=   r   r   r   r      s.   
zL1DecayRegularizer.__call__c                 C   r>   )Nz L1Decay, regularization_coeff=%fr?   r   r   r   r   r     r@   zL1DecayRegularizer.__str__rA   rB   r   r   r)   r   r      s
    ?)r   )
__future__r   logging r   r   r   r   r   Zpaddler	   r
   __all__objectr   r   r   r   r   r   r   r   r   <module>   s   m{