o
    MeD%                     @   sv   d dl mZ d dlmZ d dlmZmZmZ d dlm	Z	 d dl
mZmZ dd Zdd	 ZdddZdd Zdd ZdS )    )core)LayerHelper)_non_static_mode_in_legacy_dygraphin_dygraph_mode)check_variable_and_dtype)_C_ops_legacy_C_opsc                 C   sp   t  r
t| d|S t rtj| d|S d}t|fi t }|j| j	d}|j
|d| id|id|id |S )a  
    calculate the expert count according to the gate index.
    Args:
        numbers (Tensor): Tensor. The input gate index whose data type should be int32 or int64.
        upper_range (int): The number of the experts.
    Returns:
        out (Tensor): The output expert count.
    Examples:
        .. code-block:: python
            # required: distributed
            import paddle

            numbers = [
                [0, 2],
                [0, 2]
            ]
            upper_range = 6
            numbers = paddle.to_tensor(numbers, dtype="int32")
            number_count = paddle.distributed.utils.number_count(numbers, upper_range)
            print(number_count) # the result: [2, 0, 2, 0, 0, 0]
    upper_rangenumber_countdtypenumbersOuttypeinputsoutputsattrs)r   r	   r   r   r   opsr   locals"create_variable_for_type_inferencer   	append_op)r   r
   op_typehelperout r   SD:\Projects\ConvertPro\env\Lib\site-packages\paddle/distributed/models/moe/utils.py_number_count   s   r   c                 C   s   t  rt| ||d S t rtj| ||d S d}t|fi t }|j|j	d}|j
|| g|g|d gdd|gid |S )a  
    Assign pos decides which tokens should be fetched belong to 
    specially expert orderingly.
    
    Args:
        x (Tensor): Tensor. Every element in the list must be a Tensor whose data type
            should be float16, float32, float64, int32 or int64.
        cum_count (Tensor): The cumulative sum tokens of counters. Every element in the list must be a Tensor whose 
            data type should be int64.
  
    Returns:
        out (Tensor): Assemble numbers in the order of counters. 
    
    Examples:
        .. code-block:: python

            # required: distributed
            import paddle
            number_count = [2, 0, 2, 0]
            numbers = [
                [0, 2],
                [0, 2]
            ]
            number_count = paddle.to_tensor(number_count)
            numbers = paddle.to_tensor(numbers, dtype="int32")
            num_cum = paddle.cumsum(number_count)
            pos = paddle.distributed.utils.assign_pos(x=numbers, cum_count=num_cum)
            print(pos) # the result: (2, 0, 3, 1)
    
assign_posr   )X	cum_countZeff_num_lenr   )r   r   r   )r   r	   r    r   r   r   r   r   r   r   r   )xr"   r   r   r   r   r   r   _assign_pos=   s   r$      c                 C   sB   |dkrt  rt||| S t rtj||| S tdtd)a  
        random routing topk gate idx
        ```
            out = topk_idx
            for i in len(topk_idx):
                if topk * value[i][topk-1] < prob[i]:
                    out[i][topk-1] = -1
        ```
        Args:
            topk_idx: gate idx, shape=(N, topk)
            topk_value: values, shape = topk_idx.shape
            prob: random prob, shape=(topk_idx.shape[0],)
    r%   zNot supporting static mode nowzonly topk=2 is supported now)r   r	   Zrandom_routingr   r   r   RuntimeError)Ztopk_idxZ
topk_valueZprobZtopkr   r   r   _random_routingo   s   r'   c                 C   sv   t  rt| |d|S t rtj| |d|S d}t|fi t }|j| j	d}|j
|| |dd|id|id |S )a  
    limit the expert count by capacity.
    Args:
        expert_count (Tensor): Tensor. The input expert count whose data type should be int32 or int64.
        capacity (Tensor): Tensor. The input capacity whose data type should be int32 or int64 and the elements of capacity should be the same with expert_count.numel()/n_work.
        n_work (int): The number of the works.
    Returns:
        out (Tensor): The output expert count limit by capacity.
    Examples:
        .. code-block:: python
            # required: distributed
            import paddle
            expert_count = [1, 2, 2, 8, 3, 6]
            capacity = [5, 5, 5]
            n_work = 2
            expert_count = paddle.to_tensor(expert_count, dtype="int32")
            capacity = paddle.to_tensor(capacity, dtype="int32")
            out = paddle.distributed.utils.limit_by_capacity(expert_count, capacity, n_work)
            print(out) # the result: [1, 2, 2, 4, 3, 3]
    n_workerlimit_by_capacityr   )expert_countcapacityr   r   )r   r	   r)   r   r   r   r   r   r   r   r   )r*   r+   r(   r   r   r   r   r   r   _limit_by_capacity   s*   r,   c                 C   s   t  rt| |d|d|S t rtj| |d|d|S t| dddgd t|dddgd tdi t }|j	| j
d	}|jd| |d
d|i||dd |S )u)  
    prune gate by capacity(only support CUDA)

    Args:
        gate_idx (Tensor): Represents the gate_id sequence corresponding to the input data with type int32, int64.
        expert_count (Tensor): The quantity value counted on the gate_id sequence of the input data with type int32, int64.
        n_worker(int，optional): The number of workers on the trainer with type int64.
  
    Returns:
        new_gate_idx (Tensor): The gate_id sequence corresponding to the new input data after passing through prune.
    
    Examples:
        .. code-block:: python

            import paddle
            gate_idx = paddle.to_tensor([1, 3, 3, 3, 3, 2, 1, 1], dtype='int32')
            expert_count = paddle.to_tensor([0, 3, 1, 3, 0, 0, 0, 0], dtype='int32')
            n_worker = 1
            new_gate_id = paddle.distributed.utils.prune_gate_by_capacity(gate_idx, expert_count, n_expert, n_worker)
            print(new_gate_id)
            # Tensor(shape=[8], dtype=int32, place=CUDAPlace(0), stop_gradient=True,
              [1, 3, 3, 3, -1, 2, 1, 1])
    n_expertr(   GateIdxZint32Zint64z/paddle.distributed.utils.prune_gate_by_capacityExpertCountprune_gate_by_capacityr   )r.   r/   Z
NewGateIdx)r-   r(   r   N)r0   )r   r	   r0   r   r   r   r   r   r   r   r   r   )Zgate_idxr*   r-   r(   r   Znew_gate_idxr   r   r   _prune_gate_by_capacity   s<   
r1   N)r%   )Zpaddle.fluidr   Zpaddle.fluid.layer_helperr   Zpaddle.fluid.frameworkr   r   r   Zpaddle.fluid.data_feederr   Zpaddler   r	   r   r$   r'   r,   r1   r   r   r   r   <module>   s   '
2,