o
    NeU                     @   sv   d Z ddlm  mZ ddlmZ ddlmZ ddl	m
Z
 e e
ddg dG d	d
 d
ejZej deje_ dS )zSGD optimizer implementation.    N)	optimizer)register_keras_serializable)keras_exportz!keras.optimizers.experimental.SGDzkeras.optimizers.SGD)v1c                       s\   e Zd ZdZ													 d fdd		Z fd
dZdd Z fddZ  ZS )SGDa  Gradient descent (with momentum) optimizer.

    Update rule for parameter `w` with gradient `g` when `momentum` is 0:

    ```python
    w = w - learning_rate * g
    ```

    Update rule when `momentum` is larger than 0:

    ```python
    velocity = momentum * velocity - learning_rate * g
    w = w + velocity
    ```

    When `nesterov=True`, this rule becomes:

    ```python
    velocity = momentum * velocity - learning_rate * g
    w = w + momentum * velocity - learning_rate * g
    ```

    Args:
      learning_rate: A `Tensor`, floating point value, or a schedule that is a
        `tf.keras.optimizers.schedules.LearningRateSchedule`, or a callable
        that takes no arguments and returns the actual value to use. The
        learning rate. Defaults to 0.001.
      momentum: float hyperparameter >= 0 that accelerates gradient descent in
        the relevant direction and dampens oscillations. Defaults to 0, i.e.,
        vanilla gradient descent.
      nesterov: boolean. Whether to apply Nesterov momentum.
        Defaults to `False`.
      {{base_optimizer_keyword_args}}

    Usage:

    >>> opt = tf.keras.optimizers.experimental.SGD(learning_rate=0.1)
    >>> var = tf.Variable(1.0)
    >>> loss = lambda: (var ** 2)/2.0         # d(loss)/d(var1) = var1
    >>> opt.minimize(loss, [var])
    >>> # Step is `- learning_rate * grad`
    >>> var.numpy()
    0.9

    >>> opt = tf.keras.optimizers.experimental.SGD(0.1, momentum=0.9)
    >>> var = tf.Variable(1.0)
    >>> val0 = var.value()
    >>> loss = lambda: (var ** 2)/2.0         # d(loss)/d(var1) = var1
    >>> # First step is `- learning_rate * grad`
    >>> opt.minimize(loss, [var])
    >>> val1 = var.value()
    >>> (val0 - val1).numpy()
    0.1
    >>> # On later steps, step-size increases because of momentum
    >>> opt.minimize(loss, [var])
    >>> val2 = var.value()
    >>> (val1 - val2).numpy()
    0.18

    Reference:
        - For `nesterov=True`, See [Sutskever et al., 2013](
          http://jmlr.org/proceedings/papers/v28/sutskever13.pdf).
    {Gz?        FNGz?Tc                    sl   t  jd||||||	|
||d	| | || _|| _|| _t|ttfr2|dk s.|dkr4t	dd S d S )N)	nameweight_decayclipnorm	clipvalueglobal_clipnormuse_emaema_momentumema_overwrite_frequencyjit_compiler      z"`momentum` must be between [0, 1]. )
super__init__Z_build_learning_rate_learning_ratemomentumnesterov
isinstanceintfloat
ValueError)selflearning_rater   r   Zamsgradr   r   r   r   r   r   r   r   r
   kwargs	__class__r   [D:\Projects\ConvertPro\env\Lib\site-packages\keras/optimizers/optimizer_experimental/sgd.pyr   _   s*   
zSGD.__init__c                    sP   t  | t| dr| jrdS g | _|D ]}| j| j|dd qd| _dS )zInitialize optimizer variables.

        SGD optimizer has one variable `momentums`, only set if `self.momentum`
        is not 0.

        Args:
          var_list: list of model variables to build SGD variables on.
        _builtNm)Zmodel_variableZvariable_nameT)r   buildhasattrr$   	momentumsappendZadd_variable_from_reference)r   Zvar_listvarr!   r   r#   r&      s   	
z	SGD.buildc                 C   s"  t | j|j}d}| |}t | j|j}| j| j|  }t|t j	r_t 	|j
 | |j}|durX|||  || | jrQ|| |||  dS || dS || dS |dur|| | ||   | jr|| | ||   dS || dS || |  dS )z=Update step given gradient and the associated model variable.N)tfcastr   ZdtypeZ_var_keyr   r(   Z_index_dictr   ZIndexedSlicesvaluesindicesZassignZscatter_addr   Z
assign_add)r   Zgradientvariablelrr%   Zvar_keyr   Z	add_valuer   r   r#   update_step   s.   


zSGD.update_stepc                    s,   t   }|| | j| j| jd |S )N)r   r   r   )r   
get_configupdateZ_serialize_hyperparameterr   r   r   )r   configr!   r   r#   r2      s   
	zSGD.get_config)r   r   FFNNNNFr	   NTr   )	__name__
__module____qualname____doc__r   r&   r1   r2   __classcell__r   r   r!   r#   r      s&    B%#r   z{{base_optimizer_keyword_args}})r8   Ztensorflow.compat.v2compatv2r+   Z'keras.optimizers.optimizer_experimentalr   Z keras.saving.object_registrationr   Z tensorflow.python.util.tf_exportr   Z	Optimizerr   replaceZbase_optimizer_keyword_argsr   r   r   r#   <module>   s    .
