o
    QeA                     @   s   d dl Z d dlZd dlmZ d dlZd dlmZ d dlmZ g Z	dd Z
dddZG d	d
 d
eZdZdd Zdd Zdd ZG dd deZdS )    N)Image)Dataset)
try_importc                 C   s6   t |ttfsJ dtdd |D }|  |S )zChecks if a file is a vilid extension.

    Args:
        filename (str): path to a file
        extensions (list[str]|tuple[str]): extensions to consider

    Returns:
        bool: True if the filename ends with one of given extensions
    z#`extensions` must be list or tuple.c                 S   s   g | ]}|  qS  )lower).0xr   r   MD:\Projects\ConvertPro\env\Lib\site-packages\paddle/vision/datasets/folder.py
<listcomp>&       z'has_valid_extension.<locals>.<listcomp>)
isinstancelisttupler   endswith)filename
extensionsr   r   r	   has_valid_extension   s   
r   c                    s   g }t j| }  d ur fdd}t| D ]<}t j| |}t j|s(qtt j|ddD ]"\}}}	t|	D ]}
t j||
}||rR||| f}|| q:q1q|S )Nc                    
   t |  S Nr   r   r   r   r	   is_valid_file0      
z#make_dataset.<locals>.is_valid_fileTfollowlinks)	ospath
expandusersortedkeysjoinisdirwalkappend)dirclass_to_idxr   r   Zimagestargetdroot_fnamesfnamer   itemr   r   r	   make_dataset*   s$   
r.   c                   @   s:   e Zd ZdZ				dddZdd Zdd Zd	d
 ZdS )DatasetFoldera)  A generic data loader where the samples are arranged in this way:

    .. code-block:: text

        root/class_a/1.ext
        root/class_a/2.ext
        root/class_a/3.ext

        root/class_b/123.ext
        root/class_b/456.ext
        root/class_b/789.ext

    Args:
        root (str): Root directory path.
        loader (Callable, optional): A function to load a sample given its path. Default: None.
        extensions (list[str]|tuple[str], optional): A list of allowed extensions.
            Both :attr:`extensions` and :attr:`is_valid_file` should not be passed.
            If this value is not set, the default is to use ('.jpg', '.jpeg', '.png',
            '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'). Default: None.
        transform (Callable, optional): A function/transform that takes in
            a sample and returns a transformed version. Default: None.
        is_valid_file (Callable, optional): A function that takes path of a file
            and check if the file is a valid file. Both :attr:`extensions` and
            :attr:`is_valid_file` should not be passed. Default: None.

    Returns:
        :ref:`api_paddle_io_Dataset`. An instance of DatasetFolder.

    Attributes:
        classes (list[str]): List of the class names.
        class_to_idx (dict[str, int]): Dict with items (class_name, class_index).
        samples (list[tuple[str, int]]): List of (sample_path, class_index) tuples.
        targets (list[int]): The class_index value for each image in the dataset.

    Example:

        .. code-block:: python

            import shutil
            import tempfile
            import cv2
            import numpy as np
            import paddle.vision.transforms as T
            from pathlib import Path
            from paddle.vision.datasets import DatasetFolder


            def make_fake_file(img_path: str):
                if img_path.endswith((".jpg", ".png", ".jpeg")):
                    fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8)
                    cv2.imwrite(img_path, fake_img)
                elif img_path.endswith(".txt"):
                    with open(img_path, "w") as f:
                        f.write("This is a fake file.")

            def make_directory(root, directory_hierarchy, file_maker=make_fake_file):
                root = Path(root)
                root.mkdir(parents=True, exist_ok=True)
                for subpath in directory_hierarchy:
                    if isinstance(subpath, str):
                        filepath = root / subpath
                        file_maker(str(filepath))
                    else:
                        dirname = list(subpath.keys())[0]
                        make_directory(root / dirname, subpath[dirname])

            directory_hirerarchy = [
                {"class_0": [
                    "abc.jpg",
                    "def.png"]},
                {"class_1": [
                    "ghi.jpeg",
                    "jkl.png",
                    {"mno": [
                        "pqr.jpeg",
                        "stu.jpg"]}]},
                "this_will_be_ignored.txt",
            ]

            # You can replace this with any directory to explore the structure
            # of generated data. e.g. fake_data_dir = "./temp_dir"
            fake_data_dir = tempfile.mkdtemp()
            make_directory(fake_data_dir, directory_hirerarchy)
            data_folder_1 = DatasetFolder(fake_data_dir)
            print(data_folder_1.classes)
            # ['class_0', 'class_1']
            print(data_folder_1.class_to_idx)
            # {'class_0': 0, 'class_1': 1}
            print(data_folder_1.samples)
            # [('./temp_dir/class_0/abc.jpg', 0), ('./temp_dir/class_0/def.png', 0),
            #  ('./temp_dir/class_1/ghi.jpeg', 1), ('./temp_dir/class_1/jkl.png', 1),
            #  ('./temp_dir/class_1/mno/pqr.jpeg', 1), ('./temp_dir/class_1/mno/stu.jpg', 1)]
            print(data_folder_1.targets)
            # [0, 0, 1, 1, 1, 1]
            print(len(data_folder_1))
            # 6

            for i in range(len(data_folder_1)):
                img, label = data_folder_1[i]
                # do something with img and label
                print(type(img), img.size, label)
                # <class 'PIL.Image.Image'> (32, 32) 0


            transform = T.Compose(
                [
                    T.Resize(64),
                    T.ToTensor(),
                    T.Normalize(
                        mean=[0.5, 0.5, 0.5],
                        std=[0.5, 0.5, 0.5],
                        to_rgb=True,
                    ),
                ]
            )

            data_folder_2 = DatasetFolder(
                fake_data_dir,
                loader=lambda x: cv2.imread(x),  # load image with OpenCV
                extensions=(".jpg",),  # only load *.jpg files
                transform=transform,  # apply transform to every image
            )

            print([img_path for img_path, label in data_folder_2.samples])
            # ['./temp_dir/class_0/abc.jpg', './temp_dir/class_1/mno/stu.jpg']
            print(len(data_folder_2))
            # 2

            for img, label in iter(data_folder_2):
                # do something with img and label
                print(type(img), img.shape, label)
                # <class 'paddle.Tensor'> [3, 64, 64] 0

            shutil.rmtree(fake_data_dir)
    Nc           	      C   s   || _ || _|d u rt}| | j \}}t| j |||}t|dkr0td| j  d d| |d u r6tn|| _	|| _
|| _|| _|| _dd |D | _t | _d S )Nr   z&Found 0 directories in subfolders of: 
Supported extensions are: ,c                 S   s   g | ]}|d  qS )   r   )r   sr   r   r	   r
      r   z*DatasetFolder.__init__.<locals>.<listcomp>)r)   	transformIMG_EXTENSIONS_find_classesr.   lenRuntimeErrorr!   default_loaderloaderr   classesr&   samplestargetspaddleZget_default_dtypeZdtype)	selfr)   r:   r   r4   r   r;   r&   r<   r   r   r	   __init__   s.   
zDatasetFolder.__init__c                    sb   t jdkrdd tD  nfddtD      fddtt D } |fS )a  
        Finds the class folders in a dataset.

        Args:
            dir (string): Root directory path.

        Returns:
            tuple: (classes, class_to_idx) where classes are relative to (dir), 
                    and class_to_idx is a dictionary.

        )      c                 S   s   g | ]	}|  r|jqS r   )is_dirnamer   r(   r   r   r	   r
      s    z/DatasetFolder._find_classes.<locals>.<listcomp>c                    s&   g | ]}t jt j |r|qS r   )r   r   r"   r!   rE   )r%   r   r	   r
      s    c                    s   i | ]} | |qS r   r   )r   i)r;   r   r	   
<dictcomp>   s    z/DatasetFolder._find_classes.<locals>.<dictcomp>)sysversion_infor   scandirlistdirsortranger7   )r?   r%   r&   r   )r;   r%   r	   r6      s   

zDatasetFolder._find_classesc                 C   s4   | j | \}}| |}| jdur| |}||fS )z
        Args:
            index (int): Index

        Returns:
            tuple: (sample, target) where target is class_index of the target class.
        Nr<   r:   r4   )r?   indexr   r'   sampler   r   r	   __getitem__   s
   


zDatasetFolder.__getitem__c                 C   
   t | jS r   r7   r<   r?   r   r   r	   __len__  r   zDatasetFolder.__len__NNNN)__name__
__module____qualname____doc__r@   r6   rQ   rU   r   r   r   r	   r/   A   s     
r/   )	z.jpgz.jpegz.pngz.ppmz.bmpz.pgmz.tifz.tiffz.webpc                 C   sB   t | d}t |}|dW  d    S 1 sw   Y  d S )NrbRGB)openr   convert)r   fZimgr   r   r	   
pil_loader  s   
$r`   c                 C   s   t d}||| |jS )Ncv2)r   ZcvtColorZimreadZCOLOR_BGR2RGB)r   ra   r   r   r	   
cv2_loader  s   rb   c                 C   s&   ddl m} | dkrt| S t| S )Nr   )get_image_backendra   )Zpaddle.visionrc   rb   r`   )r   rc   r   r   r	   r9   !  s   
r9   c                   @   s2   e Zd ZdZ				d	ddZdd Zdd ZdS )
ImageFoldera  A generic data loader where the samples are arranged in this way:

    .. code-block:: text

        root/1.ext
        root/2.ext
        root/sub_dir/3.ext

    Args:
        root (str): Root directory path.
        loader (Callable, optional): A function to load a sample given its path. Default: None.
        extensions (list[str]|tuple[str], optional): A list of allowed extensions.
            Both :attr:`extensions` and :attr:`is_valid_file` should not be passed.
            If this value is not set, the default is to use ('.jpg', '.jpeg', '.png',
            '.ppm', '.bmp', '.pgm', '.tif', '.tiff', '.webp'). Default: None.
        transform (Callable, optional): A function/transform that takes in
            a sample and returns a transformed version. Default: None.
        is_valid_file (Callable, optional): A function that takes path of a file
            and check if the file is a valid file. Both :attr:`extensions` and
            :attr:`is_valid_file` should not be passed. Default: None.

    Returns:
        :ref:`api_paddle_io_Dataset`. An instance of ImageFolder.

    Attributes:
        samples (list[str]): List of sample path.

    Example:

        .. code-block:: python

            import shutil
            import tempfile
            import cv2
            import numpy as np
            import paddle.vision.transforms as T
            from pathlib import Path
            from paddle.vision.datasets import ImageFolder


            def make_fake_file(img_path: str):
                if img_path.endswith((".jpg", ".png", ".jpeg")):
                    fake_img = np.random.randint(0, 256, (32, 32, 3), dtype=np.uint8)
                    cv2.imwrite(img_path, fake_img)
                elif img_path.endswith(".txt"):
                    with open(img_path, "w") as f:
                        f.write("This is a fake file.")

            def make_directory(root, directory_hierarchy, file_maker=make_fake_file):
                root = Path(root)
                root.mkdir(parents=True, exist_ok=True)
                for subpath in directory_hierarchy:
                    if isinstance(subpath, str):
                        filepath = root / subpath
                        file_maker(str(filepath))
                    else:
                        dirname = list(subpath.keys())[0]
                        make_directory(root / dirname, subpath[dirname])

            directory_hirerarchy = [
                "abc.jpg",
                "def.png",
                {"ghi": [
                    "jkl.jpeg",
                    {"mno": [
                        "pqr.jpg"]}]},
                "this_will_be_ignored.txt",
            ]

            # You can replace this with any directory to explore the structure
            # of generated data. e.g. fake_data_dir = "./temp_dir"
            fake_data_dir = tempfile.mkdtemp()
            make_directory(fake_data_dir, directory_hirerarchy)
            image_folder_1 = ImageFolder(fake_data_dir)
            print(image_folder_1.samples)
            # ['./temp_dir/abc.jpg', './temp_dir/def.png',
            #  './temp_dir/ghi/jkl.jpeg', './temp_dir/ghi/mno/pqr.jpg']
            print(len(image_folder_1))
            # 4

            for i in range(len(image_folder_1)):
                (img,) = image_folder_1[i]
                # do something with img
                print(type(img), img.size)
                # <class 'PIL.Image.Image'> (32, 32)


            transform = T.Compose(
                [
                    T.Resize(64),
                    T.ToTensor(),
                    T.Normalize(
                        mean=[0.5, 0.5, 0.5],
                        std=[0.5, 0.5, 0.5],
                        to_rgb=True,
                    ),
                ]
            )

            image_folder_2 = ImageFolder(
                fake_data_dir,
                loader=lambda x: cv2.imread(x),  # load image with OpenCV
                extensions=(".jpg",),  # only load *.jpg files
                transform=transform,  # apply transform to every image
            )

            print(image_folder_2.samples)
            # ['./temp_dir/abc.jpg', './temp_dir/ghi/mno/pqr.jpg']
            print(len(image_folder_2))
            # 2

            for (img,) in iter(image_folder_2):
                # do something with img
                print(type(img), img.shape)
                # <class 'paddle.Tensor'> [3, 64, 64]

            shutil.rmtree(fake_data_dir)
     Nc                    s   || _  d u r	t g }tj|} d ur fdd}ttj|ddD ]\}}}	t|	D ]}
tj||
}||r?|| q-q$t	|dkrUt
d| j  d d  |d u r[tn|| _ | _|| _|| _d S )	Nc                    r   r   r   r   r   r   r	   r     r   z+ImageFolder.__init__.<locals>.is_valid_fileTr   r   z Found 0 files in subfolders of: r0   r1   )r)   r5   r   r   r   r   r#   r!   r$   r7   r8   r9   r:   r   r<   r4   )r?   r)   r:   r   r4   r   r<   r   r*   r+   r,   r_   r   r   r	   r@     s0   


zImageFolder.__init__c                 C   s.   | j | }| |}| jdur| |}|gS )zn
        Args:
            index (int): Index

        Returns:
            sample of specific index.
        NrN   )r?   rO   r   rP   r   r   r	   rQ     s
   



zImageFolder.__getitem__c                 C   rR   r   rS   rT   r   r   r	   rU     r   zImageFolder.__len__rV   )rW   rX   rY   rZ   r@   rQ   rU   r   r   r   r	   rd   )  s    y
#rd   r   )r   rH   ZPILr   r>   Z	paddle.ior   Zpaddle.utilsr   __all__r   r.   r/   r5   r`   rb   r9   rd   r   r   r   r	   <module>   s    
 R