o
    Ner4                     @   s   d dl mZ d dl mZ d dlZd dlZddlmZmZm	Z	 ddl
mZmZ ddgZG d	d deZG d
d deZG dd deZdS )    )print_function)divisionN   )SamplerSequenceSamplerRandomSampler)DatasetIterableDatasetBatchSamplerDistributedBatchSamplerc                   @   s4   e Zd ZdZ					dddZdd Zd	d
 ZdS )r
   a
  
    A base implement of batch sampler used by `paddle.io.DataLoader`
    which yield mini-batch indices(a list/tuple with length as
    mini-batch size and holds sample indices) iterably.

    Batch sampler used by :code:`paddle.io.DataLoader` should be a subclass
    of :code:`paddle.io.BatchSampler`, BatchSampler subclasses should
    implement following methods:

    :code:`__iter__`: return mini-batch indices iterably.

    :code:`__len__`: get mini-batch number in an epoch.


    Args:
        dataset(Dataset): this could be a :code:`paddle.io.Dataset` 
                implement or other python object which implemented
                :code:`__len__` for BatchSampler to get indices as the
                range of :attr:`dataset` length. Default None.
        sampler (Sampler): this could be a :code:`paddle.io.Dataset`
                instance which implemented :code:`__iter__` to yield
                sample indices. :attr:`sampler` and :attr:`dataset`
                can not be set in the same time.  If :attr:`sampler`
                is set, :attr:`shuffle` should not be set. Default None.
        shuffle(bool): whether to shuffle indices order before genrating
                batch indices. Default False.
        batch_size(int): sample indice number in a mini-batch indices.
        drop_last(bool): whether drop the last incomplete batch dataset size
            is not divisible by the batch size. Default False

    Returns:
        BatchSampler: an iterable object for indices iterating

    Examples:
        
        .. code-block:: python
            
            from paddle.io import RandomSampler, BatchSampler, Dataset

            # init with dataset
            class RandomDataset(Dataset):
                def __init__(self, num_samples):
                    self.num_samples = num_samples
            
                def __getitem__(self, idx):
                    image = np.random.random([784]).astype('float32')
                    label = np.random.randint(0, 9, (1, )).astype('int64')
                    return image, label
                
                def __len__(self):
                    return self.num_samples
            
            bs = BatchSampler(dataset=RandomDataset(100),
                              shuffle=False,
                              batch_size=16,
                              drop_last=False)

            for batch_indices in bs:
                print(batch_indices)

            # init with sampler
            sampler = RandomSampler(RandomDataset(100))
            bs = BatchSampler(sampler=sampler,
                              batch_size=8,
                              drop_last=True)

            for batch_indices in bs:
                print(batch_indices)


    see `paddle.io.DataLoader`

    NFr   c                 C   s   |d u r$|d usJ dt |tsJ dt||r J d|| _n,t |tr-J d|d u s5J dt |tsCJ dt||rKt|| _nt|| _t |t	rY|dks`J d||| _
t |tsqJ d	t||| _d S )
Nz'either dataset or sampler should be setz1sampler should be a paddle.io.Sampler, but got {}z+shuffle should be False when sampler is setz1dataset should not be a paddle.io.IterableDatasetz'should not set both dataset and samplerz-shuffle should be a boolean value, but got {}r   z3batch_size should be a positive integer, but got {}z/drop_last should be a boolean value, but got {})
isinstancer   formattypesamplerr	   boolr   r   int
batch_size	drop_last)selfdatasetr   shuffler   r    r   UD:\Projects\ConvertPro\env\Lib\site-packages\paddle/fluid/dataloader/batch_sampler.py__init__f   s:   



zBatchSampler.__init__c                 c   sX    g }| j D ]}|| t|| jkr|V  g }q| js(t|dkr*|V  d S d S d S )Nr   )r   appendlenr   r   )r   batch_indicesidxr   r   r   __iter__   s   


zBatchSampler.__iter__c                 C   s.   t | j}|t| j | jd  7 }|| j S Nr   )r   r   r   r   r   r   num_samplesr   r   r   __len__   s   

zBatchSampler.__len__)NNFr   F)__name__
__module____qualname____doc__r   r   r"   r   r   r   r   r
      s    K
 
c                   @   s   e Zd ZdddZdd ZdS )_InfiniteIterableSamplerr   c                 C   s"   t |ts	J d|| _|| _d S )Nz:dataset should be an instance of paddle.io.IterableDataset)r   r	   r   r   )r   r   r   r   r   r   r      s   
z!_InfiniteIterableSampler.__init__c                 c   s    	 d g| j  V  q)N)r   r   r   r   r   r      s   z!_InfiniteIterableSampler.__iter__N)r   )r#   r$   r%   r   r   r   r   r   r   r'      s    
r'   c                   @   s:   e Zd ZdZ				dddZdd Zdd	 Zd
d ZdS )r   a  Sampler that restricts data loading to a subset of the dataset.

    In such case, each process can pass a DistributedBatchSampler instance 
    as a DataLoader sampler, and load a subset of the original dataset that 
    is exclusive to it.

    .. note::
        Dataset is assumed to be of constant size.
        
    Args:
        dataset(paddle.io.Dataset): this could be a `paddle.io.Dataset` implement
                     or other python object which implemented
                     `__len__` for BatchSampler to get sample
                     number of data source.
        batch_size(int): sample indice number in a mini-batch indices.
        num_replicas(int, optional): porcess number in distributed training.
            If :attr:`num_replicas` is None, :attr:`num_replicas` will be
            retrieved from :code:`paddle.distributed.ParallenEnv`.
            Default None.
        rank(int, optional): the rank of the current process among :attr:`num_replicas`
            processes. If :attr:`rank` is None, :attr:`rank` is retrieved from
            :code:`paddle.distributed.ParallenEnv`. Default None.
        shuffle(bool): whther to shuffle indices order before genrating
            batch indices. Default False.
        drop_last(bool): whether drop the last incomplete batch dataset size
            is not divisible by the batch size. Default False

    Examples:
        .. code-block:: python

            import numpy as np

            from paddle.io import Dataset, DistributedBatchSampler

            # init with dataset
            class RandomDataset(Dataset):
                def __init__(self, num_samples):
                    self.num_samples = num_samples
            
                def __getitem__(self, idx):
                    image = np.random.random([784]).astype('float32')
                    label = np.random.randint(0, 9, (1, )).astype('int64')
                    return image, label
                
                def __len__(self):
                    return self.num_samples
  
            dataset = RandomDataset(100)
            sampler = DistributedBatchSampler(dataset, batch_size=64)

            for data in sampler:
                # do something
                break
    NFc                 C   s  || _ t|tr|dksJ d|| _t|tsJ d|| _t|ts(J dddlm} |d urCt|tr;|dks?J d|| _n| j| _|d ur]t|trU|dksYJ d|| _	n| j	| _	|| _
d| _ttt| j d | j | _| j| j | _d S )	Nr   z'batch_size should be a positive integerz!shuffle should be a boolean valuez$drop_last should be a boolean number)ParallelEnvz)num_replicas should be a positive integerz%rank should be a non-negative integerg      ?)r   r   r   r   r   r   Zpaddle.fluid.dygraph.parallelr)   nranks
local_rankr   epochmathceilr   r!   
total_size)r   r   r   Znum_replicasZrankr   r   r)   r   r   r   r      s:   

 z DistributedBatchSampler.__init__c                 #   s    t  j}t| }||d  jt |  7 }t | jks#J  jr7tj j	|   j	d7  _	 fdd} j
dkrF||}t | jksOJ t|}g }|D ]}|| t | jkrj|V  g }qW jsyt |dkr{|V  d S d S d S )Nr   c                    s   g } j  j j  }| j dksJ | j }t j j t| |  j j D ]}|| || j   q+| t| | d  } ||  j|  jd |   |S )Nr   r   )r/   r   r*   ranger+   r   extend)indicesZsubsampled_indicesZlast_batch_sizeZlast_local_batch_sizeir(   r   r   _get_indices_by_batch_size  s(   


zDDistributedBatchSampler.__iter__.<locals>._get_indices_by_batch_sizer   )r   r   npZarangetolistr/   r   randomZRandomStater,   r*   r!   iterr   r   r   )r   r!   r2   r4   Z_sample_iterr   r   r   r(   r   r     s.   



z DistributedBatchSampler.__iter__c                 C   s*   | j }|t| j | jd  7 }|| j S r   )r!   r   r   r   r    r   r   r   r"   .  s   
zDistributedBatchSampler.__len__c                 C   s
   || _ dS )aL  
        Sets the epoch number. When :attr:`shuffle=True`, this number is used
        as seeds of random numbers. By default, users may not set this, all
        replicas (workers) use a different random ordering for each epoch.
        If set same number at each epoch, this sampler will yield the same
        ordering at all epoches.

        Arguments:
            epoch (int): Epoch number.

        Examples:
            .. code-block:: python
    
                import numpy as np
    
                from paddle.io import Dataset, DistributedBatchSampler
    
                # init with dataset
                class RandomDataset(Dataset):
                    def __init__(self, num_samples):
                        self.num_samples = num_samples
                
                    def __getitem__(self, idx):
                        image = np.random.random([784]).astype('float32')
                        label = np.random.randint(0, 9, (1, )).astype('int64')
                        return image, label
                    
                    def __len__(self):
                        return self.num_samples
      
                dataset = RandomDataset(100)
                sampler = DistributedBatchSampler(dataset, batch_size=64)
    
                for epoch in range(10):
                    sampler.set_epoch(epoch)
        N)r,   )r   r,   r   r   r   	set_epoch3  s   
%z!DistributedBatchSampler.set_epoch)NNFF)r#   r$   r%   r&   r   r   r"   r9   r   r   r   r   r      s    :
'+)
__future__r   r   numpyr5   r-   r   r   r   r   r   r   r	   __all__r
   objectr'   r   r   r   r   r   <module>   s   {