o
    Ne                    @   s  d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	Z	d dlm
Z
 d dlZd dlZd dlmZ d dlmZ ddlmZ ddlmZ d dlmZ dd	lmZ dd
lmZmZmZ ddlmZmZmZm Z  d dlm!Z! ddl"m#Z# d dl$m%Z%m&Z& d dl'm(Z(m)Z)m*Z* d dl'm+Z+m,Z, d dl'm-Z. d dl/m0Z0 d dl1m2Z2 d dl3m4  m5Z5 dgZ6e7dZ8e7dZ9dd Z:dd Z;G dd de<Z=G dd de<Z>dS )    N)deepcopy)in_profiler_mode   )parallel_helper   )unique_name)core)LayerObjectHelper)record_program_ops_pre_hook!set_op_customized_attrs_post_hookLayerOpsRecoder)program_desc_tracing_guardparam_guardin_declarative_mode_convert_into_variable)	framework)	ParamAttr)Executorglobal_scope)_non_static_modeconvert_np_dtype_to_dtype_in_dygraph_mode)Programprogram_guard)_current_expected_place)VarDesc)no_gradLayerz(.)([A-Z][a-z]+)z([a-z])([A-Z])c                 C   s   t d| }td| S )Nz\1_\2)_first_cap_resub_all_cap_relower)names1 r$   KD:\Projects\ConvertPro\env\Lib\site-packages\paddle/fluid/dygraph/layers.py_convert_camel_to_snake5   s   r&   c                 C   sd   |  d}t|dkr| S g }t|D ]\}}|dkr&|t|d |  q|d d d| S )N
r   r    )splitlen	enumerateappendstrjoin)stringindentr#   s2idxliner$   r$   r%   
_addindent:   s   
r4   c                   @   s$   e Zd ZdZdZdd Zdd ZdS )HookRemoveHelperz5 A HookRemoveHelper that can be used to remove hook. r   c                 C   s&   t || _tj| _t jd7  _d S )Nr   )weakrefref
_hooks_refr5   next_hook_id_hook_idselfhooksr$   r$   r%   __init__J   s   zHookRemoveHelper.__init__c                 C   s.   |   }|d ur| j|v r|| j= d S d S d S N)r8   r:   r;   r$   r$   r%   removeO   s   zHookRemoveHelper.removeN)__name__
__module____qualname____doc__r9   r>   r@   r$   r$   r$   r%   r5   E   s
    r5   c                   @   s  e Zd ZdZdiddZdd Zdd	 Zd
d Zdd Zdd Z	dd Z
				djddZedddddkddZdkddZdlddZd d! Zd"d# Zdmd$d%Zdnd'd(Zdod)d*Zdld+d,Zdld-d.Zdnd/d0Zd1d2 Zd3d4 Zd5d6 Zd7d8 Zd9d: Zd;d< Zd=d> Zd?d@ ZdAdB Z dCdD Z!dEdF Z"dGdH Z#dIdJ Z$dKdL Z%dMdN Z&dOdP Z'dQdR Z(dSdT Z)			&dpdUdVZ*			&		dqdWdXZ+			&	drdYdZZ,			&	drd[d\Z-e.j/dld]d^Z0dkd_d`Z1dldadbZ2dcdd Z3					dsdedfZ4dgdh Z5e0Z6e0Z7dS )tr   a  
    Dynamic graph Layer based on OOD, includes the parameters of the layer, the structure of the forward graph and so on.

    Parameters:
        name_scope (str, optional): prefix name used by the layer to name parameters.
            If prefix is "my_layer", parameter name in MyLayer
            can be "my_layer_0.w_n", where "w" is the parameter
            base name and "n" is an unique suffix auto-generated.
            If None, prefix name will be snake cased class name. Default: None.
        dtype(str, optional): data type of this parameter.
                If set str, it can be "bool",  "float16", "float32", "float64",
                "int8", "int16", "int32", "int64", "uint8" or "uint16".
                Default: "float32"

    Returns:
        None

    Examples:
        .. code-block:: python

            import paddle
            class MyLayer(paddle.nn.Layer):
                def __init__(self):
                    super(MyLayer, self).__init__()
                    self._linear = paddle.nn.Linear(1, 1)
                    self._dropout = paddle.nn.Dropout(p=0.5)
                def forward(self, input):
                    temp = self._linear(input)
                    temp = self._dropout(temp)
                    return temp
            x = paddle.randn([10, 1], 'float32')
            mylayer = MyLayer()
            mylayer.eval()  # set mylayer._dropout to eval mode
            out = mylayer(x)
            mylayer.train()  # set mylayer._dropout to train mode
            out = mylayer(x)
    Nfloat32c                 C   s   d| _ |d u rt| jj}t|| _t| j| _d| _	|| _
t | _t | _t | _t | _t | _t | _tg g d| _i | _t | _t | _d| _t | _t | _d S )NTF)opsr=   )trainingr&   	__class__rA   r   generate
_full_namer	   _helper_built_dtyper   r   Z_init_in_dynamic_modecollectionsOrderedDict_parameters_buffersset!_non_persistable_buffer_names_set_sub_layers_loaddict_holderr   _op_recorder_customized_attrs_forward_pre_hooks_forward_post_hooksZ_casted_by_pure_fp16_state_dict_hooksZ_original_funcs)r<   Z
name_scopedtyper$   r$   r%   r>   |   s(   







zLayer.__init__c                 C   0   t  r	t   d| _|  D ]}d|_qdS )a  
        Sets this Layer and all its sublayers to training mode.
        This only effects certain modules like `Dropout` and `BatchNorm`.

        Returns:
            None

        Example::
            .. code-block:: python

                import paddle

                class MyLayer(paddle.nn.Layer):
                    def __init__(self):
                        super(MyLayer, self).__init__()
                        self._linear = paddle.nn.Linear(1, 1)
                        self._dropout = paddle.nn.Dropout(p=0.5)

                    def forward(self, input):
                        temp = self._linear(input)
                        temp = self._dropout(temp)
                        return temp

                x = paddle.randn([10, 1], 'float32')
                mylayer = MyLayer()
                mylayer.eval()  # set mylayer._dropout to eval mode
                out = mylayer(x)
                mylayer.train()  # set mylayer._dropout to train mode
                out = mylayer(x)

        TN)r   r   _dygraph_tracerZ
train_moderG   	sublayersr<   layerr$   r$   r%   train   s   #zLayer.trainc                 C   r\   )a  
        Sets this Layer and all its sublayers to evaluation mode.
        This only effects certain modules like `Dropout` and `BatchNorm`.

        Returns:
            None

        Example::
            .. code-block:: python

                import paddle

                class MyLayer(paddle.nn.Layer):
                    def __init__(self):
                        super(MyLayer, self).__init__()
                        self._linear = paddle.nn.Linear(1, 1)
                        self._dropout = paddle.nn.Dropout(p=0.5)

                    def forward(self, input):
                        temp = self._linear(input)
                        temp = self._dropout(temp)
                        return temp

                x = paddle.randn([10, 1], 'float32')
                mylayer = MyLayer()
                mylayer.eval()  # set mylayer._dropout to eval mode
                out = mylayer(x)
                print(out)

        FN)r   r   r]   Z	eval_moderG   r^   r_   r$   r$   r%   eval   s   "z
Layer.evalc                 C   s$   |   D ]}|| q||  | S )a  
        Applies ``fn`` recursively to every sublayer (as returned by ``.sublayers()``)
        as well as self. Typical use includes initializing the parameters of a model.

        Parameters:
            fn (function): a function to be applied to each sublayer

        Returns:
            Layer: self

        Example::
            .. code-block:: python

              import paddle
              import paddle.nn as nn

              net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2))

              def init_weights(layer):
                  if type(layer) == nn.Linear:
                      print('before init weight:', layer.weight.numpy())
                      new_weight = paddle.full(shape=layer.weight.shape, dtype=layer.weight.dtype, fill_value=0.9)
                      layer.weight.set_value(new_weight)
                      print('after init weight:', layer.weight.numpy())

              net.apply(init_weights)

              print(net.state_dict())
        )childrenapply)r<   fnr`   r$   r$   r%   rd      s   zLayer.applyc                 C      | j S )a  Full name for this layer, composed by name_scope + "/" + MyLayer.__class__.__name__

        Returns:
            str: full name of this layer.

        Example::
            .. code-block:: python

                import paddle

                class LinearNet(paddle.nn.Layer):
                    def __init__(self):
                        super(LinearNet, self).__init__(name_scope = "demo_linear_net")
                        self._linear = paddle.nn.Linear(1, 1)

                    def forward(self, x):
                        return self._linear(x)

                linear_net = LinearNet()
                print(linear_net.full_name())   # demo_linear_net_0

        )rJ   r<   r$   r$   r%   	full_name  s   zLayer.full_namec                 C      t | j}|| j|j< |S )a  Register a forward post-hook for Layer. The hook will be called after `forward` function has been computed.

        It should have the following form, `input` and `output` of the `hook` is `input` and `output` of the `Layer` respectively.
        User can use forward post-hook to change the output of the Layer or perform information statistics tasks on the Layer.

        hook(Layer, input, output) -> None or modified output

        Parameters:
            hook(function): a function registered as a forward post-hook

        Returns:
            HookRemoveHelper: a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .

        Examples:
            .. code-block:: python

                import paddle
                import numpy as np

                # the forward_post_hook change the output of the layer: output = output * 2
                def forward_post_hook(layer, input, output):
                    # user can use layer, input and output for information statistis tasks

                    # change the output
                    return output * 2

                linear = paddle.nn.Linear(13, 5)

                # register the hook
                forward_post_hook_handle = linear.register_forward_post_hook(forward_post_hook)

                value1 = np.arange(26).reshape(2, 13).astype("float32")
                in1 = paddle.to_tensor(value1)

                out0 = linear(in1)

                # remove the hook
                forward_post_hook_handle.remove()

                out1 = linear(in1)

                # hook change the linear's output to output * 2, so out0 is equal to out1 * 2.
                assert (out0.numpy() == (out1.numpy()) * 2).any()
        )r5   rY   r:   r<   hookZhook_remove_helperr$   r$   r%   register_forward_post_hook+  s   
-z Layer.register_forward_post_hookc                 C   ri   )a  Register a forward pre-hook for Layer. The hook will be called before `forward` function has been computed.

        It should have the following form, `input` of the `hook` is `input` of the `Layer`,
        hook can either return a tuple or a single modified value in the hook. We will wrap the value into a tuple if
        a single value is returned(unless that value is already a tuple).
        User can use forward pre-hook to change the input of the Layer or perform information statistics tasks on the Layer.

        hook(Layer, input) -> None or modified input

        Parameters:
            hook(function): a function registered as a forward pre-hook

        Returns:
            HookRemoveHelper: a HookRemoveHelper object that can be used to remove the added hook by calling `hook_remove_helper.remove()` .

        Examples:
            .. code-block:: python

                import paddle
                import numpy as np

                # the forward_pre_hook change the input of the layer: input = input * 2
                def forward_pre_hook(layer, input):
                    # user can use layer and input for information statistis tasks

                    # change the input
                    input_return = (input[0] * 2)
                    return input_return

                linear = paddle.nn.Linear(13, 5)

                # register the hook
                forward_pre_hook_handle = linear.register_forward_pre_hook(forward_pre_hook)

                value0 = np.arange(26).reshape(2, 13).astype("float32")
                in0 = paddle.to_tensor(value0)
                out0 = linear(in0)

                # remove the hook
                forward_pre_hook_handle.remove()

                value1 = value0 * 2
                in1 = paddle.to_tensor(value1)
                out1 = linear(in1)

                # hook change the linear's input to input * 2, so out0 is equal to out1.
                assert (out0.numpy() == out1.numpy()).any()
        )r5   rX   r:   rj   r$   r$   r%   register_forward_pre_hook\  s   
1zLayer.register_forward_pre_hookFc                 C   s6   t |}t|tjr|dkrd}| j|||||S )aY  Create parameters for this layer.

        Parameters:
            shape(list): Shape of the parameter.
            attr(ParamAttr, optional): Parameter attribute of weight. Please refer to :ref:`api_paddle_ParamAttr`. Default: None.
            dtype(str, optional): Data type of this parameter.
                If set str, it can be "bool",  "float16", "float32", "float64",
                "int8", "int16", "int32", "int64", "uint8" or "uint16". Default: "float32".
            is_bias(bool, optional): if this is a bias parameter. Default: False.
            default_initializer(Initializer, optional): the default initializer for this parameter.
                If set None, default initializer will be set to paddle.nn.initializer.Xavier and paddle.nn.initializer.Constant
                for non-bias and bias parameter, respectively. Default: None.

        Returns:
            :Tensor, created parameter.

        Examples:
            .. code-block:: python

                import paddle

                class MyLayer(paddle.nn.Layer):
                    def __init__(self):
                        super(MyLayer, self).__init__()
                        self._linear = paddle.nn.Linear(1, 1)
                        w_tmp = self.create_parameter([1,1])
                        self.add_parameter("w_tmp", w_tmp)

                    def forward(self, input):
                        return self._linear(input)

                mylayer = MyLayer()
                for name, param in mylayer.named_parameters():
                    print(name, param)      # will print w_tmp,_linear.weight,_linear.bias

         N)copyr   
isinstancesixstring_typesrK   create_parameter)r<   shapeattrr[   Zis_biasZdefault_initializerZ	temp_attrr$   r$   r%   rs     s   
*zLayer.create_parameterz2.0.0zpaddle.nn.Layer.create_tensorz(New api in create_tensor, easier to use.)ZsinceZ	update_toreasonc                 C   P   |durd | j|g}ntd | jdg}| jj j|||tj	j
jdS )a  

        Create Tensor for this layer.

        Parameters:
            name(str, optional): name of the tensor. Please refer to :ref:`api_guide_Name` . Default: None

            persistable(bool, optional): if set this tensor persistable. Default: False

            dtype(str, optional): data type of this parameter. If set str, it can be "bool", "float16", "float32", "float64","int8", "int16", "int32", "int64", "uint8" or "uint16". If set None, it will be "float32". Default: None

        Returns:
            Tensor, created Tensor.

        Examples:
            .. code-block:: python

                import paddle

                class MyLinear(paddle.nn.Layer):
                    def __init__(self,
                                in_features,
                                out_features):
                        super(MyLinear, self).__init__()
                        self.linear = paddle.nn.Linear( 10, 10)

                        self.back_var = self.create_variable(name = "linear_tmp_0", dtype=self._dtype)

                    def forward(self, input):
                        out = self.linear(input)
                        paddle.assign( out, self.back_var)

                        return out

        N._generated_varr"   persistabler[   typer.   rJ   r   rI   rK   Zmain_programZcurrent_blockZ
create_varr   r   VarTypeZ
LOD_TENSORr<   r"   r{   r[   var_namer$   r$   r%   create_variable  s   'zLayer.create_variablec                 C   rw   )a  

        Create Tensor for this layer.

        Parameters:
            name(str, optional): name of the tensor. Please refer to :ref:`api_guide_Name` . Default: None
            persistable(bool, optional): if set this tensor persistable. Default: False
            dtype(str, optional): data type of this parameter.
                If set str, it can be "bool",  "float16", "float32", "float64",
                "int8", "int16", "int32", "int64", "uint8" or "uint16".
                If set None, it will be "float32". Default: None

        Returns:
            Tensor, created Tensor.

        Examples:
            .. code-block:: python

                import paddle

                class MyLinear(paddle.nn.Layer):
                    def __init__(self,
                                in_features,
                                out_features):
                        super(MyLinear, self).__init__()
                        self.linear = paddle.nn.Linear( 10, 10)

                        self.back_var = self.create_tensor(name = "linear_tmp_0", dtype=self._dtype)

                    def forward(self, input):
                        out = self.linear(input)
                        paddle.assign( out, self.back_var)

                        return out

        Nrx   ry   rz   r}   r   r$   r$   r%   create_tensor  s   %zLayer.create_tensorTc                 C      dd | j |dD }|S )aa  Returns a list of all Parameters from current layer and its sub-layers.

        Returns:
            list of Tensor : a list of Parameters.

        Examples:
            .. code-block:: python

            import paddle

            linear = paddle.nn.Linear(1,1)
            print(linear.parameters())  # print linear_0.w_0 and linear_0.b_0

        c                 S      g | ]\}}|qS r$   r$   ).0_paramr$   r$   r%   
<listcomp>5      z$Layer.parameters.<locals>.<listcomp>include_sublayers)named_parametersr<   r   retr$   r$   r%   
parameters&  s   zLayer.parametersc                 c   s    |   D ]\}}|V  qdS )aJ  Returns an iterator over immediate children layers.

        Yields:
            Layer: a child layer

        Examples:
            .. code-block:: python

                import paddle

                linear1 = paddle.nn.Linear(10, 3)
                linear2 = paddle.nn.Linear(3, 10, bias_attr=False)
                model = paddle.nn.Sequential(linear1, linear2)

                layer_list = list(model.children())

                print(layer_list)   # [<paddle.nn.layer.common.Linear object at 0x7f7b8113f830>, <paddle.nn.layer.common.Linear object at 0x7f7b8113f950>]

        N)named_children)r<   r   r`   r$   r$   r%   rc   ;  s   zLayer.childrenc                 c   sD    t  }| j D ]\}}|dur||vr|| ||fV  q	dS )a  Returns an iterator over immediate children layers, yielding both
        the name of the layer as well as the layer itself.

        Yields:
            (string, Layer): Tuple containing a name and child layer

        Examples:
            .. code-block:: python

                import paddle

                linear1 = paddle.nn.Linear(10, 3)
                linear2 = paddle.nn.Linear(3, 10, bias_attr=False)
                model = paddle.nn.Sequential(linear1, linear2)
                for prefix, layer in model.named_children():
                    print(prefix, layer)
                    # ('0', <paddle.nn.layer.common.Linear object at 0x7fb61ed85830>)
                    # ('1', <paddle.nn.layer.common.Linear object at 0x7fb61ed85950>)

        N)rR   rT   itemsadd)r<   memor"   r`   r$   r$   r%   r   R  s   

zLayer.named_childrenc                 C   r   )a  Returns a list of sub layers.

        Parameters:
            include_self(bool, optional): Whether return self as sublayers. Default: False

        Returns:
            list of Layer : a list of sub layers.

        Examples:
            .. code-block:: python

                import paddle

                class MyLayer(paddle.nn.Layer):
                    def __init__(self):
                        super(MyLayer, self).__init__()
                        self._linear = paddle.nn.Linear(1, 1)
                        self._dropout = paddle.nn.Dropout(p=0.5)

                    def forward(self, input):
                        temp = self._linear(input)
                        temp = self._dropout(temp)
                        return temp

                mylayer = MyLayer()
                print(mylayer.sublayers())  # [<paddle.nn.layer.common.Linear object at 0x7f44b58977d0>, <paddle.nn.layer.common.Dropout object at 0x7f44b58978f0>]

        c                 S   r   r$   r$   )r   r   r`   r$   r$   r%   r     s    z#Layer.sublayers.<locals>.<listcomp>)include_self)named_sublayers)r<   r   r   r$   r$   r%   r^   m  s   
zLayer.sublayersrn   c                 c       t  }|r| j|ddnt|g| g}|D ]-\}}|j }|D ]!\}}	|	du s-|	|v r.q!||	 ||r8dnd | }
|
|	fV  q!qdS )a?  
        Returns an iterator over all parameters in the Layer, yielding tuple of name and parameter.

        Parameters:
            prefix(str, optional): Prefix to prepend to all parameter names. Default: ''.
            include_sublayers(bool, optional): Whether include the parameters of sublayers.
                If True, also include the named parameters from sublayers. Default: True.

        Yields:
            (string, Parameter): Tuple of name and Parameter

        Examples:
            .. code-block:: python

                import paddle

                fc1 = paddle.nn.Linear(10, 3)
                fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
                model = paddle.nn.Sequential(fc1, fc2)
                for name, param in model.named_parameters():
                    print(name, param)

        Tprefixr   Nrx   rn   )rR   r   ziprP   r   r   )r<   r   r   Z
params_setr   layer_prefixsublayerparamskeyr   r"   r$   r$   r%   r     s(   

zLayer.named_parametersc           	      c   s    |du rt  }|r| |vr||  || fV  | j D ]%\}}|du r&q||r+dnd | }|j|d|dD ]	\}}||fV  q8qdS )au  
        Returns an iterator over all sublayers in the Layer, yielding tuple of name and sublayer.
        The duplicate sublayer will only be yielded once.

        Parameters:
            prefix(str, optional): Prefix to prepend to all parameter names. Default: ''.
            include_self(bool, optional): Whether include the Layer itself. Default: False.
            layers_set(set, optional): The set to record duplicate sublayers. Default: None.

        Yields:
            (string, Layer): Tuple of name and Layer

        Examples:
            .. code-block:: python

                import paddle

                fc1 = paddle.nn.Linear(10, 3)
                fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
                model = paddle.nn.Sequential(fc1, fc2)
                for prefix, layer in model.named_sublayers():
                    print(prefix, layer)

        Nrx   rn   T)r   r   
layers_set)rR   r   rT   r   r   )	r<   r   r   r   r   r`   r   plr$   r$   r%   r     s"   

zLayer.named_sublayersc                 C   s   d| j vr	tdt|tjstdt|jd|v r!t	d|dkr)t	dt
| |r:|| jvr:t	d||d	urWt|tjksWt|tjjksWtd
t|j|| j|< |rf| j| d	S | j| d	S )aO  
        Registers a tensor as buffer into the layer.

        `buffer` is a non-trainable tensor and will not be updated by optimizer,
        but is necessary for evaluation and inference. For example, the mean and variance in BatchNorm layers.
        The registered buffer is persistable by default, and will be saved into
        `state_dict` alongside parameters. If set persistable=False, it registers
        a non-persistable buffer, so that it will not be a part of `state_dict` .

        Buffers can be accessed as attributes using given names.

        Parameters:
            name (string): name of the buffer. The buffer can be accessed
                from this layer using the given name
            tensor (Tensor): the tensor to be registered as buffer.
            persistable (bool): whether the buffer is part of this layer's
                state_dict.

        Returns:
            None

        Examples:
            .. code-block:: python

                import numpy as np
                import paddle

                linear = paddle.nn.Linear(10, 3)
                value = np.array([0]).astype("float32")
                buffer = paddle.to_tensor(value)
                linear.register_buffer("buf_name", buffer, persistable=True)

                # get the buffer by attribute.
                print(linear.buf_name)

        rQ   8super(YourLayer, self).__init__() should be called firstz7The name of buffer should be a string, but received {}.rx   zThe name of buffer can not contain `.`, because when you access the newly added buffer in the form of `self.**.**`, it will cause AttributeError.rn   z$The name of buffer can not be empty.zattribute '{}' already exists.NzAThe registered buffer should be a Paddle.Tensor, but received {}.)__dict__
ValueErrorrp   rq   rr   	TypeErrorformatr|   rA   KeyErrorhasattrrQ   r   VarBaseeagerTensorrS   discardr   )r<   r"   Ztensorr{   r$   r$   r%   register_buffer  s6   
&
zLayer.register_bufferc                 C   r   )a  
        Returns a list of all buffers from current layer and its sub-layers.

        Parameters:
            include_sublayers(bool, optional): Whether include the buffers of sublayers. If True, also include the buffers from sublayers. Default: True

        Returns:
            list of Tensor : a list of buffers.

        Examples:
            .. code-block:: python

                import numpy as np
                import paddle

                linear = paddle.nn.Linear(10, 3)
                value = np.array([0]).astype("float32")
                buffer = paddle.to_tensor(value)
                linear.register_buffer("buf_name", buffer, persistable=True)

                print(linear.buffers())     # == print([linear.buf_name])

        c                 S   r   r$   r$   )r   r   bufferr$   r$   r%   r   6  r   z!Layer.buffers.<locals>.<listcomp>r   )named_buffersr   r$   r$   r%   buffers  s   zLayer.buffersc                 c   r   )aJ  
        Returns an iterator over all buffers in the Layer, yielding tuple of name and Tensor.

        Parameters:
            prefix(str, optional): Prefix to prepend to all buffer names. Default: ''.
            include_sublayers(bool, optional): Whether include the buffers of sublayers.
                If True, also include the named buffers from sublayers. Default: True.

        Yields:
            (string, Tensor): Tuple of name and tensor

        Examples:
            .. code-block:: python

                import numpy as np
                import paddle

                fc1 = paddle.nn.Linear(10, 3)
                buffer1 = paddle.to_tensor(np.array([0]).astype("float32"))
                # register a tensor as buffer by specific `persistable`
                fc1.register_buffer("buf_name_1", buffer1, persistable=True)

                fc2 = paddle.nn.Linear(3, 10)
                buffer2 = paddle.to_tensor(np.array([1]).astype("float32"))
                # register a buffer by assigning an attribute with Tensor.
                # The `persistable` can only be False by this way.
                fc2.buf_name_2 = buffer2

                model = paddle.nn.Sequential(fc1, fc2)

                # get all named buffers
                for name, buffer in model.named_buffers():
                    print(name, buffer)

        Tr   Nrx   rn   )rR   r   r   rQ   r   r   )r<   r   r   Zbuffers_setr   r   r   r   r   r   r"   r$   r$   r%   r   <  s(   $

zLayer.named_buffersc                 C   s    |   D ]	}|jr|  qdS )a  
        Clear the gradients of all parameters for this layer.

        Returns:
            None

        Examples:
            .. code-block:: python

                import paddle
                import numpy as np

                value = np.arange(26).reshape(2, 13).astype("float32")
                a = paddle.to_tensor(value)
                linear = paddle.nn.Linear(13, 5)
                adam = paddle.optimizer.Adam(learning_rate=0.01,
                                            parameters=linear.parameters())
                out = linear(a)
                out.backward()
                adam.step()
                linear.clear_gradients()

        N)r   Z	trainableZclear_gradient)r<   r   r$   r$   r%   clear_gradientsm  s
   zLayer.clear_gradientsc                 O   s   d S r?   r$   )r<   argskwargsr$   r$   r%   _build_once  s   zLayer._build_oncec                 O   s"  | j  D ]}|| |}|d urt|ts|f}|}q| jsMtd  | j|i | t r;t	
 r;t| j  W d    n1 sEw   Y  d| _t rst| jjtjj | j|i |}W d    n1 smw   Y  n| j|i |}| j D ]}|| ||}|d ur|}q|S )NFT)rX   valuesrp   tuplerL   r   r   r   Z_is_data_parallel_modepaddleZis_compiled_with_xpuZ_broadcast_parametersrP   r   profilerZRecordEventrH   rA   ZTracerEventTypeForwardforwardrY   )r<   inputsr   Zforward_pre_hookhook_resultZoutputsZforward_post_hookr$   r$   r%   _dygraph_call_func  sB   



zLayer._dygraph_call_funcc                 O   sT   t  s"| js"| js"| js"t r"t s"| j|i | | j|i |S | j|i |S r?   )	r   rX   rY   rL   r   r   r   r   r   r<   r   r   r$   r$   r%   __call__  s   zLayer.__call__c                 O   s   t )z
        Defines the computation performed at every call.
        Should be overridden by all subclasses.

        Parameters:
            *inputs(tuple): unpacked tuple arguments
            **kwargs(dict): unpacked dict arguments
        )NotImplementedErrorr   r$   r$   r%   r     s   	zLayer.forwardc                 G   s   t d)Nz"Layer shouldn't implement backward)r   )r<   r   r$   r$   r%   backward  s   zLayer.backwardc                 C   s$   t |ts|dksJ || j|< |S )a1  Adds a sub Layer instance.

        Added sublayer can be accessed by self.name

        Parameters:
            name(str): name of this sublayer.
            sublayer(Layer): an instance of Layer.
        Returns:
            Layer: the sublayer passed in.

        Examples:
            .. code-block:: python

                import paddle

                class MySequential(paddle.nn.Layer):
                    def __init__(self, *layers):
                        super(MySequential, self).__init__()
                        if len(layers) > 0 and isinstance(layers[0], tuple):
                            for name, layer in layers:
                                self.add_sublayer(name, layer)
                        else:
                            for idx, layer in enumerate(layers):
                                self.add_sublayer(str(idx), layer)

                    def forward(self, input):
                        for layer in self._sub_layers.values():
                            input = layer(input)
                        return input

                fc1 = paddle.nn.Linear(10, 3)
                fc2 = paddle.nn.Linear(3, 10, bias_attr=False)
                model = MySequential(fc1, fc2)
                for prefix, layer in model.named_sublayers():
                    print(prefix, layer)
        N)rp   r   rT   )r<   r"   r   r$   r$   r%   add_sublayer  s   %
zLayer.add_sublayerc                 C   s   d| j vr	tdt|tjstdt|jd|v r!t	d|dkr)t	dt
| |r:|| jvr:t	d||d	urNt|tjsNtd
t|j|d	u rWd	| j|< t| jdkru|j| jv slJ d|j|| j|j  || j|< |S )a  Adds a Parameter instance.

        Added parameter can be accessed by self.name

        Parameters:
            name(str): name of this sublayer.
            parameter(Parameter): an instance of Parameter.
        Returns:
            Parameter: the parameter passed in.
        Examples:
            .. code-block:: python

                import paddle

                class MyLayer(paddle.nn.Layer):
                    def __init__(self):
                        super(MyLayer, self).__init__()
                        self._linear = paddle.nn.Linear(1, 1)
                        w_tmp = self.create_parameter([1,1])
                        self.add_parameter("w_tmp", w_tmp)

                    def forward(self, input):
                        return self._linear(input)

                mylayer = MyLayer()
                for name, param in mylayer.named_parameters():
                    print(name, param)      # will print w_tmp,_linear.weight,_linear.bias

        rP   z;super(YourLayer, self).__init__() should be called firstly.z:The name of parameter should be a string, but received {}.rx   zThe name of parameter can not contain `.`, because when you access the newly added parameter in the form of `self.**.**`, it will cause AttributeError.rn   z'The name of parameter can not be empty.z"The parameter '{}' already exists.NzAThe parameter to be added should be a Parameter, but received {}.r   8Parameter not found, Can't not find [ {} ] in state_dict)r   RuntimeErrorrp   rq   rr   r   r   r|   rA   r   r   rP   r   	Parameterr*   rU   r"   	set_value)r<   r"   Z	parameterr$   r$   r%   add_parameter  sB   


zLayer.add_parameterc                    s    fdd}t |tstdt|j j| |dds4 t	}t
 jjdks/J |g j_|ddsa t}t
 jdkrN jj|jdd	 t
 jjdksXJ  jj| d
S d
S )aY  
        Add customized attribute while append_op. In case of quantization, we want to save
        some attributes into op_desc while exporting inference model by @to_static.

        Arguments:
            attrs(dict): customized attributes that will be added into op_descs.

        NOTE: The interface is only exposed to developers.
        c                    s@   | r j n j}| rtnt}d}|rtt|}|| |k}|S )NF)rX   rY   r
   r   nextreversed)is_pre_hookZlayers_hooksZcandidate_hookZalready_registedlast_keyrg   r$   r%   is_already_registered;  s   z2Layer._set_op_attrs.<locals>.is_already_registeredz+attrs should be type(dict), but received {}T)r   r   Fr   )lastN)rp   dictr   r   r|   rA   rW   updaterm   r
   r*   rV   r=   rl   r   rY   move_to_endr:   r,   )r<   attrsr   Zpre_hook_helperZpost_hook_helperr$   rg   r%   _set_op_attrs0  s2   




zLayer._set_op_attrsc                 C   rf   r?   )r   rg   r$   r$   r%   __getstate__a  s   zLayer.__getstate__c                 C   s   | j | d S r?   )r   r   )r<   stater$   r$   r%   __setstate__d  s   zLayer.__setstate__c                 C   s   d| j v r| j d }|| jv rt rt| j| S | j| S d| j v r2| j d }|| jv r2| j| S d| j v rM| j d }||v rMt rIt|| S || S t| |S )NrP   rT   rQ   )r   rP   r   r   rT   object__getattribute__)r<   r"   rP   rT   rQ   r$   r$   r%   __getattr__g  s"   









zLayer.__getattr__c                    s   fdd}t tt|  d trt|  | | jdd }t |tj	rZ|d u r-t
dt| jdkrK|j| jv sBJ d|j|| j|j  || j| j| j || < d S |d urw |v rw|d urqtd t|jd | < d S | jdd }t |tr|d u rt
d|| j| j| j || < d S |d ur |v r|d urtd	 t|jd | < d S | jd
d }t |tjtjjfr|d u rt
d|| j| j| j  | jvr| j  |jstd  |_|| < d S |d urY |v rYt|tjkrCddlm } t! r|  d u rt"d |  d u s1tt|  tjkr9||| < d S ||t|   d S |d urStd t|jd | < d S t|  | d S )Nc                     s   | D ]	} |v r| = qd S r?   r$   )Zdictsdr"   r$   r%   _remove_if_exist|  s
   z+Layer.__setattr__.<locals>._remove_if_existrP   r   r   r   zNassignment to parameter '{}' should be of type Parameter or None, but got '{}'rT   zIassignment to sublayer '{}' should be of type Layer or None, but got '{}'rQ   Z	_buffers_)assignziIn Dy2stat, self.{0} is a buffer and self.{0} is not allowed to be set to Variable when self.{0} is None.zOassignment to buffers '{}' should be of type core.VarBase or None, but got '{}')#rp   getattrr|   propertyr   __setattr__r   getr   r   r   r*   rU   r"   r   r   rQ   rT   r   rA   r   rP   r   r   r   r   rS   r   r   rI   Variabler   r   r   r   )r<   r"   valuer   r   ZlayersrQ   r   r$   r   r%   r   z  s   



zLayer.__setattr__c                 C   s^   || j v r| j |= d S || jv r| j|= d S || jv r'| j|= | j| d S t| | d S r?   )rP   rT   rQ   rS   r   r   __delattr__)r<   r"   r$   r$   r%   r     s   


zLayer.__delattr__c                 C   sZ   t | j}t| j }t| j }t| j }t| j }|| | | | }|S )a  
        Return a list. Get all parameters, buffers(non-parameter tensors), sublayers, method and attr of Layer.

        Examples:
            .. code-block:: python
                import paddle
                import numpy as np

                class Mylayer(paddle.nn.Layer):
                    def __init__(self):
                        super(Mylayer, self).__init__()
                        self.linear1 = paddle.nn.Linear(10, 10)
                        self.linear2 = paddle.nn.Linear(5, 5)
                        self.conv2d = paddle.nn.Conv2D(3, 2, 3)
                        self.embedding = paddle.nn.Embedding(128, 16)
                        self.h_0 = paddle.to_tensor(np.zeros([10, 10]).astype('float32'))

                mylayer = Mylayer()
                print(dir(mylayer))
                # only parts are shown, because of list have too much content
                # ['__call__', '__class__',  ... , 'conv2d', 'embedding', 'h_0', 'linear1', 'linear2', ... , 'sublayers', 'train']

        )dirrH   listr   keysrP   rT   rQ   )r<   methodr   r   r^   r   r   r$   r$   r%   __dir__  s   
zLayer.__dir__c                 C   s   dS )zs
        Extra representation of this layer, you can have custom implementation
        of your own layer.
        rn   r$   rg   r$   r$   r%   
extra_repr  s   zLayer.extra_reprc                 C   s   g }|   }|d}g }| j D ]\}}t|}t|d}|d| d |  q| jjd }|rQt	|dkrE|dd
| d 7 }nt	|dkrQ||d 7 }|r^|dd
| d 7 }|d7 }|S )	Nr'   r   (z): r   z
  r   ))r   r)   rT   r   reprr4   r,   rH   rA   r*   r.   )r<   extra_linesr   Zsublayer_linesr"   r`   Zsublayer_strZ	final_strr$   r$   r%   __repr__  s$   

zLayer.__repr__c                 C   ri   r?   )r5   rZ   r:   rj   r$   r$   r%   register_state_dict_hook"  s   
zLayer.register_state_dict_hookc           
   	   C   s   |du rt  }| j D ]\}}|dur|||| < q| j D ]\}}|dur4|| jvr4|||| < q!|rY| j D ]\}}|durX| }	|	|	|	||| d  |	}q<|S )z
        The difference from state_dict() is that state_dict_hook will not be called, 
        but the original types of parameters and buffers will be maintained.
        Nrx   )
rN   rO   rP   r   rQ   rS   rT   ro   r   _obtain_parameters_buffers)
r<   destinationr   structured_name_prefixr"   datar   
layer_name
layer_itemdestination_tempr$   r$   r%   r   '  s.   
z Layer._obtain_parameters_buffersc              
   C   s   |du rt  }| j D ]\}}|dur|||| < q| j D ] \}}|s7|dur6|| jvr6|||| < q!|durA|||| < q!|rh| j D ]\}	}
|
durg| }||
	||||	 d || |}qI|r|| j
 D ]}||}|dur{|}qo|S )a  
        Get all parameters and persistable buffers of current layer and its sub-layers. And set them into a dict

        Parameters:
            destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None
            include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True
            include_non_persistable_buffer(bool, optional): If true, include non persistable buffers of current layer and its sub-layers, it is used in pure fp16 and jit.save. Default: False
            use_hook(bool, optional) : If true, the operations contained in _state_dict_hooks will be appended to the destination. Default: True
        Nrx   )rN   rO   rP   r   rQ   rS   rT   ro   r   _state_dict_implrZ   r   )r<   r   r   r   include_non_persistable_bufferuse_hookr"   r   r   r   r   r   Zstate_dict_hookr   r$   r$   r%   r   C  sD   
zLayer._state_dict_implc                 C      | j |||d|dS )a  
        Get all parameters and buffers of current layer and its sub-layers. And set them into a dict

        Parameters:
            destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None
            include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True
            use_hook(bool, optional) : If true, the operations contained in _state_dict_hooks will be appended to the destination. Default: True

        Retruns:
            dict: a dict contains all the parameters and persistable buffers.

        Examples:
            .. code-block:: python

                import paddle

                emb = paddle.nn.Embedding(10, 10)

                state_dict = emb.to_static_state_dict()
                paddle.save( state_dict, "paddle_dy.pdparams")

        Tr   r   r   r   r   r   r<   r   r   r   r   r$   r$   r%   to_static_state_dictr     zLayer.to_static_state_dictc                 C   r   )a  
        Get all parameters and persistable buffers of current layer and its sub-layers. And set them into a dict

        Parameters:
            destination(dict, optional) : If provide, all the parameters and persistable buffers will be set to this dict . Default: None
            include_sublayers(bool, optional) : If true, also include the parameters and persistable buffers from sublayers. Default: True
            use_hook(bool, optional) : If true, the operations contained in _state_dict_hooks will be appended to the destination. Default: True

        Retruns:
            dict: a dict contains all the parameters and persistable buffers.

        Examples:
            .. code-block:: python

                import paddle

                emb = paddle.nn.Embedding(10, 10)

                state_dict = emb.state_dict()
                paddle.save( state_dict, "paddle_dy.pdparams")

        Fr   r   r   r$   r$   r%   
state_dict  r   zLayer.state_dictc                    s    fdd}g }| j dd D ]5\}}|r|n|j}z|||}|| W q tyE }	 ztd|t|	  W Y d}	~	qd}	~	ww t	 rW|D ]	\}}
|
|
 qKdS dd }tt j}td	d
 |D t | |D ]	\}}
|||
 qpdS )a[  
        Set parameters and persistable buffers from state_dict. All the parameters and buffers will be reset by the tensor in the state_dict

        Parameters:
            state_dict(dict) : Dict contains all the parameters and persistable buffers.
            use_structured_name(bool, optional) : If true, use structured name as key, otherwise, use parameter or buffer name as key.
                                                  Default: True
        Returns:
            None

        Examples:
            .. code-block:: python

                import paddle

                emb = paddle.nn.Embedding(10, 10)

                state_dict = emb.state_dict()
                paddle.save(state_dict, "paddle_dy.pdparams")
                para_state_dict = paddle.load("paddle_dy.pdparams")
                emb.set_state_dict(para_state_dict)

        c                    s     | d }|d u rtd| t|tst|tr4t|t|kr0td| t|t|||fS t|j	r>|	 n|j	}t|t|j	krXtd| t|t|j	||fS )Nz%{} is not found in the provided dict.z;{} receieves the length of {}, but the expected shape is {}z5{} receives a shape {}, but the expected shape is {}.)
r   r   r   rp   r   r   r*   inspectismethodrt   )r   r   r   Zstate_shaper   r$   r%   _check_match  s0   z*Layer.set_state_dict.<locals>._check_matchF)r   zSkip loading for {}. Nc                 S   s   t  | j }| }| rt }n2| rt	 }n)|
 r6t }||  t| }nt }||  t| }||| d S r?   )r   Zfind_varr"   
get_tensorZ_placeZis_cpu_placer   CPUPlaceZis_cuda_pinned_placeCUDAPinnedPlaceZis_xpu_placeZPlaceZ	set_placeXPUPlaceZxpu_device_id	CUDAPlaceZgpu_device_idrR   )varZndarraytr   placer$   r$   r%   _set_var  s   

z&Layer.set_state_dict.<locals>._set_varc                 S   s   g | ]\}}|qS r$   r$   )r   r   r   r$   r$   r%   r     s    z(Layer.set_state_dict.<locals>.<listcomp>)r   r   r"   r,   r   warningswarnr   r-   r   r   r   _get_device_default_executorr   Z_create_loaded_parameterr   )r<   r   Zuse_structured_namer  Zmatched_param_stater   r   Zkey_nameZ	match_reserrr   r  executorr$   r  r%   set_state_dict  s0   
$zLayer.set_state_dictc                 C   s   | j |||dddS )a  
        Cast the parameters and buffers of Layer by the give device, dtype and blocking.

        Parameters:
            device(str|paddle.CPUPlace()|paddle.CUDAPlace()|paddle.CUDAPinnedPlace()|paddle.XPUPlace()|None, optional): The device of the Layer which want to be stored.
            If None, the device is the same with the original Tensor. If device is string, it can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the
            index of the GPUs or XPUs. Default: None.

            dtype(str|numpy.dtype|paddle.dtype|None, optional): The type of the data. If None, the dtype is the same with the original Tensor. Default: None.

            blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
              asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.
            
        Returns:
            self

        Examples:
            .. code-block:: python

                # required: skip
                import paddle

                linear=paddle.nn.Linear(2, 2)
                linear.weight
                #Parameter containing:
                #Tensor(shape=[2, 2], dtype=float32, place=CUDAPlace(0), stop_gradient=False,
                #       [[-0.32770029,  0.38653070],
                #        [ 0.46030545,  0.08158520]])

                linear.to(dtype='float64')
                linear.weight
                #Tenor(shape=[2, 2], dtype=float64, place=CUDAPlace(0), stop_gradient=False,
                #       [[-0.32770029,  0.38653070],
                #        [ 0.46030545,  0.08158520]])

                linear.to(device='cpu')
                linear.weight
                #Tensor(shape=[2, 2], dtype=float64, place=CPUPlace, stop_gradient=False,
                #       [[-0.32770029,  0.38653070],
                #        [ 0.46030545,  0.08158520]])
                linear.to(device=paddle.CUDAPinnedPlace(), blocking=False)
                linear.weight
                #Tensor(shape=[2, 2], dtype=float64, place=CUDAPinnedPlace, stop_gradient=False,
                #       [[-0.04989364, -0.56889004],
                #        [ 0.33960250,  0.96878713]])

        TF)devicer[   blockingr   floating_only)_to_impl)r<   r  r[   r  r$   r$   r%   to  s   0zLayer.toc              	   C   s   |r|   D ]}|||||| q| j D ]C\}}|d urZt  |||||}	W d    n1 s4w   Y  |jd urZt  || |||}
W d    n1 sUw   Y  q| j D ]\}}|d urr|||||| j|< q`|| _d S r?   )	rc   _applyrP   r   r   ZgradZ
_grad_ivarrQ   rM   )r<   funcr  r[   r  r   r`   r   r   Zparam_appliedZgrad_appliedbufr$   r$   r%   r  C  s(   

zLayer._applyc                 C   sD  |d u r|j }|d u r|j}t|tjurt|}|j  rOt|}t	
|j| d d d d }t }||k rL|t |}|    n|}n|}|d urz||jkrztjjj|j d |j|d}	W d    n1 stw   Y  n|}	|d ur|	j |s|	||}
n|	}
|  }|
  }|| |S )N   r   g333333?)r  )r[   )r  r[   r|   r   r~   r   Zis_gpu_placer   Zsize_of_dtypenpprodrt   gpu_memory_availableZ_copy_tor   r  r   r  _clearZfluidr   Z_dygraph_place_guardcastZ_equalsZ_share_data_with)r<   r
  r  r[   r  Z
size_dtypeZwaiting_alloc_memoryr  Zt_usedZt_castedZnew_tZ
dst_tensorZ
src_tensorr$   r$   r%   
_transformX  sF   



zLayer._transformc                    s   |du r|du r|du rS |dur5t |trtj|}nt |tjtjtjtj	fr,n	t
dt|j |du r<d}n	t |tsEJ d fdd}t  tjdtd ||||| W d   n1 skw   Y  |_S )	a  
        Cast the parameters and buffers of Layer by the give device, dtype and blocking.

        Parameters:
            device(str|paddle.CPUPlace()|paddle.CUDAPlace()|paddle.CUDAPinnedPlace()|paddle.XPUPlace()|None, optional): The device of the Layer which want to be stored.
            If None, the device is the same with the original Tensor. If device is string, it can be ``cpu``, ``gpu:x`` and ``xpu:x``, where ``x`` is the
            index of the GPUs or XPUs. Default: None.

            dtype(str|numpy.dtype|paddle.dtype|None, optional): The type of the data. If None, the dtype is the same with the original Tensor. Default: None.

            blocking(bool|None, optional): If False and the source is in pinned memory, the copy will be
              asynchronous with respect to the host. Otherwise, the argument has no effect. If None, the blocking is set True. Default: None.
            
            include_sublayers(bool|True, optional): If True, deal with self and all sublayers parameters and buffers, if not only deal with self parameters and buffers. Default: True.

            floating_only(bool|False, optional): If True, only cast all floating point parameters and buffers of Layer by the give device, dtype and blocking.

        Returns:
            self

        Nzdevice value error, must be str, paddle.CPUPlace(), paddle.CUDAPlace(), paddle.CUDAPinnedPlace() or paddle.XPUPlace(), but the type of device is Tz5blocking value error, must be the True, False or Nonec                    s"    r	t | s	| S | |||S r?   )r   Zis_floating_pointr"  )r
  r  r[   r  r  r<   r$   r%   	transform  s   z!Layer._to_impl.<locals>.transformignore)category)rp   r-   r   r  Z_convert_to_placer   r  r  r  r  r   r|   rA   boolr  catch_warningsfilterwarningsUserWarningr  rM   )r<   r  r[   r  r   r  r$  r$   r#  r%   r    s:   

zLayer._to_implc                 C   s&   t  }|  D ]	}||  q|S )z
        Return starup program containing initialization operations of all parameters.

        NOTE(dev): This is a very low level API and only for inner developer.
        )r   r   Z_create_init_opZglobal_block)r<   Zstartup_programr   r$   r$   r%   _startup_program  s   zLayer._startup_program)NrE   )NNFN)NNN)T)F)rn   T)rn   FN)NTrn   )NTrn   FT)NTrn   T)NNNTF)8rA   rB   rC   rD   r>   ra   rb   rd   rh   rl   rm   rs   
deprecatedr   r   r   rc   r   r^   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   Zdeprecate_stat_dictr  r  r  r"  r  r+  Zset_dict	load_dictr$   r$   r$   r%   r   U   s    
&*)%17
0
1
1

#
%
'
B
1$*@1^"

0
#
"
V
63
=)?rN   
contextlibsysnumpyr  rq   rero   r6   r  r   r   r   Zpaddle.profilerr   Zpaddle.profiler.utilsr   rn   r   r   Zpaddle.fluidr   Zlayer_object_helperr	   Zlayer_hooksr
   r   r   baser   r   r   r   r   Z
param_attrr   Zpaddle.fluid.executorr   r   Zpaddle.fluid.frameworkr   r   r   r   r   r   r  Zpaddle.fluid.corer   Zpaddle.fluid.dygraphr   Zpaddle.utils.deprecatedutilsr,  __all__compiler   r    r&   r4   r   r5   r   r$   r$   r$   r%   <module>   sH   

