o
    Qe-!                     @   st   d dl mZ ddlmZ ddlmZ ddlmZmZ ddlmZ ddl	m
Z
mZ dd	lmZ g ZG d
d deZdS )   )	Optimizer   )core)	framework)Variable
name_scope)in_dygraph_mode    )_C_ops_legacy_C_ops)no_gradc                       sP   e Zd ZdZdZdZ							d fdd		Zd
d Zdd Zdd Z	  Z
S )Adadeltaa  
    **Notes: This API does not support sparse parameter optimization.**

    Adadelta Optimizer. Please refer to this for details:
    `ADADELTA: AN ADAPTIVE LEARNING RATE METHOD <https://arxiv.org/abs/1212.5701>`_.

    The update is done as follows:

    .. math::

        E(g_t^2) &= \rho * E(g_{t-1}^2) + (1-\rho) * g^2

        learning\_rate &= \sqrt{ ( E(dx_{t-1}^2) + \epsilon ) / ( E(g_t^2) + \epsilon ) }

        E(dx_t^2) &= \rho * E(dx_{t-1}^2) + (1-\rho) * (-g*learning\_rate)^2

    Args:
        learning_rate (float|Tensor|LearningRateDecay, optional): The learning rate used to update ``Parameter``.
            It can be a float value, a ``Tensor`` with a float type or a LearningRateDecay. The default value is 0.001.
        epsilon (float): a small float number for numeric stability. Default 1.0e-6.
        rho (float): a floating point value indicating the decay rate. Default 0.95.
        parameters (list|tuple, optional): List/Tuple of ``Tensor`` to update to minimize ``loss``. \
            This parameter is required in dygraph mode. And you can specify different options for \
            different parameter groups such as the learning rate, weight decay, etc, \
            then the parameters are list of dict. Note that the learning_rate in paramter groups \
            represents the scale of base learning_rate. \
            The default value is None in static mode, at this time all parameters will be updated.
        weight_decay (float|WeightDecayRegularizer, optional): The strategy of regularization. \
            It canbe a float value as coeff of L2 regularization or \
            :ref:`api_fluid_regularizer_L1Decay`, :ref:`api_fluid_regularizer_L2Decay`.
            If a parameter has set regularizer using :ref:`api_fluid_ParamAttr` already, \
            the regularization setting here in optimizer will be ignored for this parameter. \
            Otherwise, the regularization setting here in optimizer will take effect. \
            Default None, meaning there is no regularization. 
        grad_clip (GradientClipBase, optional): Gradient cliping strategy, it's an instance of
            some derived class of ``GradientClipBase`` . There are three cliping strategies
            ( :ref:`api_fluid_clip_GradientClipByGlobalNorm` , :ref:`api_fluid_clip_GradientClipByNorm` ,
            :ref:`api_fluid_clip_GradientClipByValue` ). Default None, meaning there is no gradient clipping.
        name (str, optional): The default value is None. Normally there is no need for user
                to set this property. For more information, please refer to
                :ref:`api_guide_Name` .

    Examples:
        .. code-block:: python
	
            import paddle

            inp = paddle.uniform([10, 10], dtype="float32", min=-0.1, max=0.1)
            linear = paddle.nn.Linear(10, 10)
            out = linear(inp)
            loss = paddle.mean(out)
            beta1 = paddle.to_tensor([0.9], dtype="float32")
            beta2 = paddle.to_tensor([0.99], dtype="float32")
            adadelta = paddle.optimizer.Adadelta(learning_rate=0.1, parameters=linear.parameters(), weight_decay=0.01)
            back = out.backward()
            adadelta.step()
            adadelta.clear_grad()

            #Note that the learning_rate of linear_2 is 0.01.
            linear_1 = paddle.nn.Linear(10, 10)
            linear_2 = paddle.nn.Linear(10, 10)
            inp = paddle.uniform(shape=[10, 10], min=-0.1, max=0.1)
            out = linear_1(inp)
            out = linear_2(out)
            loss = paddle.mean(out)
            adadelta = paddle.optimizer.Adadelta(
                learning_rate=0.1,
                parameters=[{
                    'params': linear_1.parameters()
                }, {
                    'params': linear_2.parameters(),
                    'weight_decay': 0.001,
                    'learning_rate': 0.1,
                }],
                weight_decay=0.01)                   
            out.backward()
            adadelta.step()
            adadelta.clear_grad()

    Z_avg_squared_gradZ_avg_squared_updateMbP?ư>ffffff?Nc                    sl   |d u rt d|d u rt d|d u rt dtt| j|||||d d| _|| _|| _||d| _d S )Nzlearning_rate is not set.zepsilon is not set.zrho is not set.)learning_rate
parametersweight_decay	grad_clipnameZadadeltaepsilonrho)
ValueErrorsuperr   __init__type_epsilon_rho_default_dict)selfr   r   r   r   r   r   r   	__class__ ID:\Projects\ConvertPro\env\Lib\site-packages\paddle/optimizer/adadelta.pyr   o   s&   

zAdadelta.__init__c                 C   sR   t |tjs
tdt |tr|d}|D ]}| | j| | | j| qd S )N)block is not instance of framework.Block.params)	
isinstancer   Block	TypeErrordictgetZ_add_accumulator_avg_squared_grad_acc_str_avg_squared_update_acc_str)r    blockr   pr#   r#   r$   _create_accumulators   s   

zAdadelta._create_accumulatorsc              	   C   s   t |tr
| |}| | j|d }| | j|d }t rEt  t	|d |d ||| j
| j W d    d S 1 s>w   Y  d S t |tjsOtd|j| j|d |d ||d|d ||d| j| j
ddd}|S )	Nr	   r   r%   )ParamZGradZAvgSquaredGradZAvgSquaredUpdate)ZParamOutZAvgSquaredGradOutZAvgSquaredUpdateOutr   T)r   ZinputsZoutputsattrsZstop_gradient)r'   r*   _update_param_groupZ_get_accumulatorr,   r-   r   r   r
   Z	adadelta_r   r   r   r(   r)   Z	append_opr   )r    r.   Zparam_and_gradZavg_squared_grad_accZavg_squared_update_accZadadelta_opr#   r#   r$   _append_optimize_op   sN   




		zAdadelta._append_optimize_opc                 C   s6   | d| jd | _| d| jd | _| d}|S )Nr   r   r&   )r+   r   r   r   )r    r   r#   r#   r$   r3      s   
zAdadelta._update_param_group)r   r   r   NNNN)__name__
__module____qualname____doc__r,   r-   r   r0   r4   r3   __classcell__r#   r#   r!   r$   r      s    Q
.r   N)Z	optimizerr   Zfluidr   r   Zfluid.frameworkr   r   r   Zpaddler
   r   Zfluid.dygraphr   __all__r   r#   r#   r#   r$   <module>   s   