o
    Me&                     @   s2   d dl mZ d dlmZ dgZG dd deZdS )    )data_feed_pb2)text_formatDataFeedDescc                   @   s8   e Zd ZdZdd Zdd Zdd Zdd	 Zd
d ZdS )r   ag  
    :api_attr: Static Graph
    
    Datafeed descriptor, describing input training data format. This class is
    currently only used for AsyncExecutor (See comments for class AsyncExecutor
    for a brief introduction)

    DataFeedDesc shall be initialized from a valid protobuf message from disk.

    See :code:`paddle/fluid/framework/data_feed.proto` for message definition.
    A typical message might look like:

    .. code-block:: python

      import paddle.fluid as fluid
      f = open("data.proto", "w")
      print >> f, 'name: "MultiSlotDataFeed"'
      print >> f, 'batch_size: 2'
      print >> f, 'multi_slot_desc {'
      print >> f, '    slots {'
      print >> f, '         name: "words"'
      print >> f, '         type: "uint64"'
      print >> f, '         is_dense: false'
      print >> f, '         is_used: true'
      print >> f, '     }'
      print >> f, '     slots {'
      print >> f, '         name: "label"'
      print >> f, '         type: "uint64"'
      print >> f, '         is_dense: false'
      print >> f, '         is_used: true'
      print >> f, '    }'
      print >> f, '}'
      f.close()
      data_feed = fluid.DataFeedDesc('data.proto')

    However, users usually shouldn't care about the message format; instead,
    they are encouraged to use :code:`Data Generator` as a tool to generate a
    valid data description, in the process of converting their raw log files to
    training files acceptable to AsyncExecutor.

    DataFeedDesc can also be changed during runtime. Once you got familiar with
    what each field mean, you can modify it to better suit your need. E.g.:

    .. code-block:: python

      import paddle.fluid as fluid
      data_feed = fluid.DataFeedDesc('data.proto')
      data_feed.set_batch_size(128)
      data_feed.set_dense_slots('wd')  # The slot named 'wd' will be dense
      data_feed.set_use_slots('wd')    # The slot named 'wd' will be used

    Finally, the content can be dumped out for debugging purpose:

    .. code-block:: python

      print(data_feed.desc())

    Args:
        proto_file(string): Disk file containing a data feed description.

    c                 C   s|   t  | _d| j_t|d}t| | j W d    n1 s"w   Y  | jjdkr<dd t	| jj
jD | _d S d S )NcatrMultiSlotDataFeedc                 S   s   i | ]\}}|j |qS  )name).0iZslotr   r   KD:\Projects\ConvertPro\env\Lib\site-packages\paddle/fluid/data_feed_desc.py
<dictcomp>Z   s    z)DataFeedDesc.__init__.<locals>.<dictcomp>)r   r   
proto_descZpipe_commandopenr   Parsereadr	   	enumeratemulti_slot_descslots_DataFeedDesc__name_to_index)selfZ
proto_filefr   r   r   __init__T   s   
zDataFeedDesc.__init__c                 C   s   || j _dS )a  
        Set :attr:`batch_size` in :ref:`api_fluid_DataFeedDesc` . :attr:`batch_size` can be changed during training.

        Example:
            .. code-block:: python

              import paddle.fluid as fluid
              f = open("data.proto", "w")
              print >> f, 'name: "MultiSlotDataFeed"'
              print >> f, 'batch_size: 2'
              print >> f, 'multi_slot_desc {'
              print >> f, '    slots {'
              print >> f, '         name: "words"'
              print >> f, '         type: "uint64"'
              print >> f, '         is_dense: false'
              print >> f, '         is_used: true'
              print >> f, '     }'
              print >> f, '     slots {'
              print >> f, '         name: "label"'
              print >> f, '         type: "uint64"'
              print >> f, '         is_dense: false'
              print >> f, '         is_used: true'
              print >> f, '    }'
              print >> f, '}'
              f.close()
              data_feed = fluid.DataFeedDesc('data.proto')
              data_feed.set_batch_size(128)

        Args:
            batch_size (int): The number of batch size.

        Returns:
            None.

        N)r   
batch_size)r   r   r   r   r   set_batch_size_   s   $zDataFeedDesc.set_batch_sizec                 C   8   | j jdkr
td|D ]}d| j jj| j|  _qdS )ay  
        Set slots in :attr:`dense_slots_name` as dense slots. **Note: In default, all slots are sparse slots.**
 
        Features for a dense slot will be fed into a Tensor, while those for a
        sparse slot will be fed into a LoDTensor.

        Example:
            .. code-block:: python

              import paddle.fluid as fluid
              f = open("data.proto", "w")
              print >> f, 'name: "MultiSlotDataFeed"'
              print >> f, 'batch_size: 2'
              print >> f, 'multi_slot_desc {'
              print >> f, '    slots {'
              print >> f, '         name: "words"'
              print >> f, '         type: "uint64"'
              print >> f, '         is_dense: false'
              print >> f, '         is_used: true'
              print >> f, '     }'
              print >> f, '     slots {'
              print >> f, '         name: "label"'
              print >> f, '         type: "uint64"'
              print >> f, '         is_dense: false'
              print >> f, '         is_used: true'
              print >> f, '    }'
              print >> f, '}'
              f.close()
              data_feed = fluid.DataFeedDesc('data.proto')
              data_feed.set_dense_slots(['words'])

        Args:
            dense_slots_name (list(str)): a list of slot names which will be set dense.

        Returns:
            None.

        r   zNOnly MultiSlotDataFeed needs set_dense_slots, please check your datafeed.protoTN)r   r	   
ValueErrorr   r   r   Zis_dense)r   Zdense_slots_namer	   r   r   r   set_dense_slots   s   'zDataFeedDesc.set_dense_slotsc                 C   r   )a\  
        Set if a specific slot will be used for training. A dataset shall
        contain a lot of features, through this function one can select which
        ones will be used for a specific model.

        Example:
            .. code-block:: python

              import paddle.fluid as fluid
              f = open("data.proto", "w")
              print >> f, 'name: "MultiSlotDataFeed"'
              print >> f, 'batch_size: 2'
              print >> f, 'multi_slot_desc {'
              print >> f, '    slots {'
              print >> f, '         name: "words"'
              print >> f, '         type: "uint64"'
              print >> f, '         is_dense: false'
              print >> f, '         is_used: true'
              print >> f, '     }'
              print >> f, '     slots {'
              print >> f, '         name: "label"'
              print >> f, '         type: "uint64"'
              print >> f, '         is_dense: false'
              print >> f, '         is_used: true'
              print >> f, '    }'
              print >> f, '}'
              f.close()
              data_feed = fluid.DataFeedDesc('data.proto')
              data_feed.set_use_slots(['words'])

        Args:
            use_slots_name: a list of slot names which will be used in training

        Note:
            Default is not used for all slots
        r   zLOnly MultiSlotDataFeed needs set_use_slots, please check your datafeed.protoTN)r   r	   r   r   r   r   Zis_used)r   Zuse_slots_namer	   r   r   r   set_use_slots   s   %zDataFeedDesc.set_use_slotsc                 C   s   t | jS )aU  
        Returns a protobuf message for this DataFeedDesc

        Example:
            .. code-block:: python

              import paddle.fluid as fluid
              f = open("data.proto", "w")
              print >> f, 'name: "MultiSlotDataFeed"'
              print >> f, 'batch_size: 2'
              print >> f, 'multi_slot_desc {'
              print >> f, '    slots {'
              print >> f, '         name: "words"'
              print >> f, '         type: "uint64"'
              print >> f, '         is_dense: false'
              print >> f, '         is_used: true'
              print >> f, '     }'
              print >> f, '     slots {'
              print >> f, '         name: "label"'
              print >> f, '         type: "uint64"'
              print >> f, '         is_dense: false'
              print >> f, '         is_used: true'
              print >> f, '    }'
              print >> f, '}'
              f.close()
              data_feed = fluid.DataFeedDesc('data.proto')
              print(data_feed.desc())

        Returns:
            A string message
        )r   ZMessageToStringr   )r   r   r   r   desc   s    zDataFeedDesc.descN)	__name__
__module____qualname____doc__r   r   r   r   r   r   r   r   r   r      s    >&/-N)Zpaddle.fluid.protor   Zgoogle.protobufr   __all__objectr   r   r   r   r   <module>   s   