o
    Ne%M                     @   sh   d dl mZ ddlmZmZmZmZmZmZm	Z	m
Z
 ddlZddgZG dd deZG d	d deZdS )
   )Layer   )sigmoidtanhconcatfill_constantmatmulelementwise_addelementwise_mulsplit    NLSTMCellGRUCellc                       s8   e Zd ZdZ							d
 fdd	Zdd	 Z  ZS )r   a  
    LSTMCell implementation using basic operators.
    There are two LSTMCell version, the default one is compatible with CUDNN LSTM implementation.
    The algorithm can be described as the equations below.

        .. math::

            i_t &= sigmoid(W_{ix}x_{t} + W_{ih}h_{t-1} + bx_i + bh_i)

            f_t &= sigmoid(W_{fx}x_{t} + W_{fh}h_{t-1} + bx_f + bh_f)

            o_t &= sigmoid(W_{ox}x_{t} + W_{oh}h_{t-1} + bx_o + bh_o)

            \\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + bx_c + bh_c)

            c_t &= f_t \\odot c_{t-1} + i_t \\odot \\tilde{c_t}

            h_t &= o_t \\odot tanh(c_t)

    The other LSTMCell version is compatible with the BasicLSTMUnit used in static graph.
    The algorithm can be described as the equations below.

        .. math::

            i_t &= sigmoid(W_{ix}x_{t} + W_{ih}h_{t-1} + b_i)

            f_t &= sigmoid(W_{fx}x_{t} + W_{fh}h_{t-1} + b_f + forget_bias )

            o_t &= sigmoid(W_{ox}x_{t} + W_{oh}h_{t-1} + b_o)

            \\tilde{c_t} &= tanh(W_{cx}x_t + W_{ch}h_{t-1} + b_c)

            c_t &= f_t \\odot c_{t-1} + i_t \\odot \\tilde{c_t}

            h_t &= o_t \\odot tanh(c_t)

    Args:
        hidden_size (integer): The hidden size used in the Cell.
        input_size (integer): The input size used in the Cell.
        param_attr(ParamAttr|None): The parameter attribute for the learnable
            weight matrix. Note:
            If it is set to None or one attribute of ParamAttr, LSTMCell will
            create ParamAttr as param_attr. If the Initializer of the param_attr
            is not set, the parameter is initialized with Xavier. Default: None.
        bias_attr (ParamAttr|None): The parameter attribute for the bias
            of LSTMCell.
            If it is set to None or one attribute of ParamAttr, LSTMCell will 
            create ParamAttr as bias_attr. If the Initializer of the bias_attr
            is not set, the bias is initialized as zero. Default: None.
        gate_activation (function|None): The activation function for gates (actGate).
                                  Default: 'fluid.layers.sigmoid'
        activation (function|None): The activation function for cells (actNode).
                             Default: 'fluid.layers.tanh'
        forget_bias(float|1.0): forget bias used when computing forget gate. This 
            is not used in default LSTMCell implementation (CUDNN compatiable)
        use_cudnn_impl(bool|True): whether to use CUDNN compatible LSTMCell
        dtype(string): data type used in this cell
    
    Returns:
        None

    Examples:

        .. code-block:: python

            from paddle import fluid
            import paddle.fluid.core as core
            from paddle.fluid.dygraph import LSTMCell
            import numpy as np
            batch_size = 64
            input_size = 128
            hidden_size = 256
            step_input_np = np.random.uniform(-0.1, 0.1, (
                batch_size, input_size)).astype('float64')
            pre_hidden_np = np.random.uniform(-0.1, 0.1, (
                batch_size, hidden_size)).astype('float64')
            pre_cell_np = np.random.uniform(-0.1, 0.1, (
                batch_size, hidden_size)).astype('float64')
            if core.is_compiled_with_cuda():
                place = core.CUDAPlace(0)
            else:
                place = core.CPUPlace()
            with fluid.dygraph.guard(place):
                cudnn_lstm = LSTMCell(hidden_size, input_size)
                step_input_var = fluid.dygraph.to_variable(step_input_np)
                pre_hidden_var = fluid.dygraph.to_variable(pre_hidden_np)
                pre_cell_var = fluid.dygraph.to_variable(pre_cell_np)
                new_hidden, new_cell = cudnn_lstm(step_input_var, pre_hidden_var, pre_cell_var) 

    N      ?Tfloat64c
                    s  t t| |	 || _|| _|| _|| _|	| _|pt| _	|pt
| _|| _| jr| jd urM| jjd urMt| j}
t| j}|
 jd7  _| jd7  _n| j}
| j}| jd ury| jjd uryt| j}t| j}| jd7  _| jd7  _n| j}| j}| j|
d| j | jg| jd| _| j|d| j | jg| jd| _| j|d| j g| jdd| _| j|d| j g| jdd| _d S td	g|	|d
| _d| j_| j| j| j| j d| j g|	d| _| j| jd| j g|	dd| _d S )N
_weight_ih
_weight_hh_bias_ih_bias_hh   attrshapedtypeTr   r   r   Zis_biasr   )r   valueF)superr   __init___hidden_size_input_size_param_attr
_bias_attr_dtyper   _gate_activationr   _activation_use_cudnn_implnamecopydeepcopycreate_parameterr   r   r   r   r   _forget_biasZstop_gradient_weight_bias)selfhidden_size
input_size
param_attr	bias_attrgate_activation
activationZforget_biasuse_cudnn_implr   weight_ih_param_attrweight_hh_param_attrbias_ih_param_attrbias_hh_param_attr	__class__ HD:\Projects\ConvertPro\env\Lib\site-packages\paddle/fluid/dygraph/rnn.pyr   r   sx   





zLSTMCell.__init__c                 C   sv  | j rxt|| jdd}t|| j}t|| jdd}t|| j}t|ddd}t|ddd}t|d |d }| |}t|d |d }	| |	}	t|d |d }
| 	|
}
t|d	 |d	 }| |}|	| ||
  }|| 	| }||fS t
||gd}t|| jd
}t|| j}t|ddd\}}}}tt|| t|| jtt|t|}| 	|| | }||fS )NTytranspose_yr?   r   r   Znum_or_sectionsdimr   r      xr>   )r%   r   r   r	   r   r   r   r   r#   r$   r   r+   r,   r
   r*   r   r   )r-   input
pre_hiddenZpre_celligateshgateschunked_igateschunked_hgatesZingateZ
forgetgateZcellgateZoutgateZnew_cell
new_hiddenconcat_input_hidden
gate_inputijfor;   r;   r<   forward   sB   



zLSTMCell.forward)NNNNr   Tr   __name__
__module____qualname____doc__r   rT   __classcell__r;   r;   r9   r<   r      s    ^Oc                       s6   e Zd ZdZ						d	 fdd	Zdd Z  ZS )
r   aY  
    GRU implementation using basic operators.
    There are two GRUCell version, the default one is compatible with CUDNN GRU implementation.
    The algorithm can be described as the equations below.

        .. math::

            u_t & = sigmoid(W_{ux} x_{t} + b_ux + W_{uh} h_{t-1} + b_uh)

            r_t & = sigmoid(W_{rx} x_{t} + b_rx + W_{rh} h_{t-1} + b_rh)

            \\tilde{h_{t}} & = tanh(W_{cx} x_{t} + b_cx + r_t \\odot (W_{ch} h_{t-1} + b_ch))

            h_t & = u_t h_{t-1} + (1-u_t) \\tilde{h_{t}}

    The other LSTMCell version is compatible with the BasicGRUUnit used in static graph.
    The algorithm can be described as the equations below.

        .. math::

            u_t & = sigmoid(W_{ux} x_{t} + W_{uh} h_{t-1} + b_u)

            r_t & = sigmoid(W_{rx} x_{t} + W_{rh} h_{t-1} + b_r)

            \\tilde{h_{t}} & = tanh(W_{cx} x_{t} + W_{ch} \\odot(r_t, h_{t-1}) + b_m)

            h_t & = u_t h_{t-1} + (1-u_t) \\tilde{h_{t}}

    Args:
        hidden_size (integer): The hidden size used in the Cell.
        input_size (integer): The input size used in the Cell.
        param_attr(ParamAttr|None): The parameter attribute for the learnable
            weight matrix. Note:
            If it is set to None or one attribute of ParamAttr, GRUCell will
            create ParamAttr as param_attr. If the Initializer of the param_attr
            is not set, the parameter is initialized with Xavier. Default: None.
        bias_attr (ParamAttr|None): The parameter attribute for the bias
            of GRUCell.
            If it is set to None or one attribute of ParamAttr, GRUCell will 
            create ParamAttr as bias_attr. If the Initializer of the bias_attr
            is not set, the bias is initialized zero. Default: None.
        gate_activation (function|None): The activation function for gates (actGate).
                                  Default: 'fluid.layers.sigmoid'
        activation (function|None): The activation function for cell (actNode).
                             Default: 'fluid.layers.tanh'
        use_cudnn_impl(bool|True): whether to use CUDNN compatible LSTMCell
        dtype(string): data type used in this cell
    
    Returns:
        None

    Examples:

        .. code-block:: python

            from paddle import fluid
            import paddle.fluid.core as core
            from paddle.fluid.dygraph import GRUCell
            import numpy as np
            batch_size = 64
            input_size = 128
            hidden_size = 256
            step_input_np = np.random.uniform(-0.1, 0.1, (
            batch_size, input_size)).astype('float64')
            pre_hidden_np = np.random.uniform(-0.1, 0.1, (
            batch_size, hidden_size)).astype('float64')
            if core.is_compiled_with_cuda():
                place = core.CUDAPlace(0)
            else:
                place = core.CPUPlace()
            with fluid.dygraph.guard(place):
                cudnn_gru = GRUCell(hidden_size, input_size)
                step_input_var = fluid.dygraph.to_variable(step_input_np)
                pre_hidden_var = fluid.dygraph.to_variable(pre_hidden_np)

    NTr   c	                    s  t t|   || _|| _|| _|| _|| _|pt| _	|pt
| _|| _| jr| jd urL| jjd urLt| j}	t| j}
|	 jd7  _|
 jd7  _n| j}	| j}
| jd urx| jjd urxt| j}t| j}| jd7  _| jd7  _n| j}| j}| j|	d| j | jg| jd| _| j|
d| j | jg| jd| _| j|d| j g| jdd| _| j|d| j g| jdd| _d S | jd ur| jjd urt| j}t| j}| jd	7  _| jd
7  _n| j}| j}| jd ur| jjd urt| j}t| j}| jd7  _| jd7  _n| j}| j}| j|| j| j d| j g|d| _| j|| j| j | jg|d| _| j|d| j g|dd| _| j|| jg|dd| _d S )Nr   r   r   r   rC   r   Tr   _gate_weight_candidate_weight
_gate_bias_candidate_biasr   )r   r   r   r   r   r    r!   r"   r   r#   r   r$   r%   r&   r'   r(   r)   r   r   r   r   r[   r\   r]   r^   )r-   r.   r/   r0   r1   r2   r3   r4   r   r5   r6   r7   r8   Zgate_weight_param_attrZcandidate_weight_param_attrZgate_bias_param_attrZcandidate_bias_param_attrr9   r;   r<   r   ;  s   	




zGRUCell.__init__c                 C   sZ  | j ret|| jdd}t|| j}t|| jdd}t|| j}t|ddd}t|ddd}t|d |d }| |}t|d |d }| |}||d  }	t|d |	}
| 	|
}
||
 | |
 }|S t
||gd}t|| jd	}t|| j}| |}t|ddd\}}|| }tt
||gd| j}t|| j}| 	|}|| d| |  }|S )
NTr=   r@   rC   r   rA   r   r   rD   )r%   r   r   r	   r   r   r   r   r#   r$   r   r[   r]   r\   r^   )r-   rG   rH   rI   rJ   rK   rL   Z
reset_gateZ
input_gate_tempZnew_gaterM   rN   rO   ruZr_hidden	candidatecr;   r;   r<   rT     s:   




zGRUCell.forward)NNNNTr   rU   r;   r;   r9   r<   r      s    Pf) r   Zlayersr   r   r   r   r   r	   r
   r   r'   __all__r   r   r;   r;   r;   r<   <module>   s   ( X