Source code for cvtk._base

import os
import re
import random
import copy
import pathlib
import glob
import io
import typing
import base64
import json
import PIL
import PIL.Image
import PIL.ImageOps
import PIL.ImageDraw
import PIL.ImageFont
import numpy as np
ImageSourceTypes = typing.Union[str, pathlib.Path, bytes, PIL.Image.Image, np.ndarray]



[docs]
class JsonComplexEncoder(json.JSONEncoder):
    """Convert objects to JSON serializable format

    Examples:
        >>> obj = {'a': np.array([0, 1, 2]), 'b': [0, 1, 2], 'c': 'hello wolrd'}
        >>> json.dumps(obj, cls=JsonComplexEncoder)
    """

[docs]
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, np.nan):
            return None
        else:
            return super().default(obj)





[docs]
class Annotation():
    """A class to store image annotations including bounding boxes and masks

    The class store image annotations of the coordinates of bounding boxes,
    the binary masks, and the labels and scores corresponding to the bounding boxes or masks.
    The areas of bounding boxes or masks are automatically calculated from bouding boxes or masks and stored in this class.

    Args:
        labels: The labels for the bounding boxes or masks.
        bboxes: The list of bounding boxes in the format of (x1, y1, x2, y2).
        masks: NumPy array or 2-d list of maks.
        scores: The list of scores.
    
    Attributes:
        labels: The labels for the bounding boxes or masks.
        bboxes: The list of bounding boxes in the format of (x1, y1, x2, y2).
        masks: NumPy array or 2-d list of maks.
        scores: The list of scores.
        areas: The list of areas of bounding boxes or masks.

    Examples:
        >>> labels = ['leaf', 'flower', 'root']
        >>> bboxes = [[0, 0, 10, 10],
        ...           [10, 10, 20, 20],
        ...           [20, 20, 30, 30]]
        >>> masks = [np.random.randint(2, (240, 321)).tolist(),
        ...          np.random.randint(2, (240, 321)).tolist(),
        ...          np.random.randint(2, (240, 321)).tolist()]
        >>> scores = [0.9, 0.8, 0.7]
        >>>
        >>> ann = Annotation(labels, bboxes, masks, scores)
        >>>
        >>> ann[0]
        {'label': 'leaf', 'bbox': (0, 0, 10, 10), 'mask': array([[0, 1, 0, ..., 0, 1, 0],
        ...                                                        [1, 0, 0, ..., 1, 0, 0],
        ...                                                        ...,
        ...                                                        [0, 1, 0, ..., 0, 1, 0],
        ...                                                        [1, 0, 0, ..., 1, 0, 0]]),
        ... 'score': 0.9, 'area': 100}
        >>>
        >>> ann.labels
        ['leaf', 'flower', 'root']
        >>>
        >>> ann.dump()
        '[{"label": "leaf", "bbox": [0, 0, 10, 10], "mask": [[0, 1, 0, ..., 0, 1, 0], [1, 0, 0, ..., 1, 0, 0], ..., [0, 1, 0, ..., 0, 1, 0], [1, 0, 0, ..., 1, 0, 0]], "score": 0.9, "area": 100}, {"label": "flower", "bbox": [10, 10, 20, 20], "mask": [[0, 1, 0, ..., 0, 1, 0], [1, 0, 0, ..., 1, 0, 0], ..., [0, 1, 0, ..., 0, 1, 0], [1, 0, 0, ..., 1, 0, 0]], "score": 0.8, "area": 100}, {"label": "root", "bbox": [20, 20, 30, 30], "mask": [[0, 1, 0, ..., 0, 1, 0], [1, 0, 0, ..., 1, 0, 0], ..., [0, 1, 0, ..., 0, 1, 0], [1, 0, 0, ..., 1, 0, 0]], "score": 0.7, "area": 100}]'
    """
            
    def __init__(self,
                 labels:list[str],
                 bboxes: list[list]|list[tuple]|None=None,
                 masks: list[list[int]]|np.ndarray|None=None,
                 scores: list[float]|None=None):

        if isinstance(labels, str):
            labels = [labels]
        
        if bboxes is not None:
            if len(bboxes) != len(labels):
                raise ValueError('The number of labels and bounding boxes should be the same.')
        else:
            bboxes = [None] * len(labels)
        if masks is not None:
            if len(masks) != len(labels):
                raise ValueError('The number of masks and polygons should be the same.')
        else:
            masks = [None] * len(labels)
        if scores is not None:
            if len(scores) != len(labels):
                raise ValueError('The number of labels and scores should be the same.')
        else:
            scores = [None] * len(labels)

        self.__i = 0
        self.__labels = labels
        self.__bboxes = self.__as_tuple(bboxes)
        self.__masks = self.__as_array(masks)
        self.__scores = scores
        self.__areas = self.__calc_areas()


    def __len__(self):
        return len(self.__labels)


    def __getitem__(self, i):
        return {'label': self.__labels[i],
                'bbox': self.__bboxes[i],
                'mask': self.__masks[i],
                'score': self.__scores[i],
                'area': self.__areas[i]}


    def __iter__(self):
        return self


    def __next__(self):
        if self.__i < len(self):
            i = self.__i
            self.__i += 1
            return self[i]
        else:
            self.__i = 0
            raise StopIteration()
        

    def __as_tuple(self, bboxes):
        if bboxes is not None:
            bboxes = [tuple(bbox) if bbox is not None else None for bbox in bboxes]
        return bboxes


    def __as_array(self, masks):
        if masks is not None:
            masks = [np.array(mask) if mask is not None else None for mask in masks]
        return masks


    def __calc_areas(self):
        areas = []
        for bbox, mask in zip(self.__bboxes, self.__masks):
            if mask is not None:
                areas.append(int(np.sum(mask)))
            elif bbox is not None:
                areas.append((bbox[3] - bbox[1]) * (bbox[2] - bbox[0]))
            else:
                areas.append(None)
        return areas
    
    
    @property
    def labels(self) -> list[str]:
        return self.__labels


    @property
    def bboxes(self) -> list[tuple]:
        return self.__bboxes


    @property
    def masks(self) -> list[np.ndarray]:
        return self.__masks
    

    @property
    def scores(self) -> list[float]:
        return self.__scores
    
    

[docs]
    def dump(self, indent: int|None=None, ensure_ascii: bool=True) -> str:
        """Dump the annotation data to string in JSON format.

        Args:
            indent (int): The indentation of the JSON string. Default is `None`.
            ensure_ascii (bool): Ensure the string is ASCII. Default is `True`.

        Returns:
            str: JSON string of the annotation data.

        Examples:
            >>> ann = ImageAnnotation(['leaf', 'flower', 'root'], [[0, 0, 10, 10], [10, 10, 20, 20], [20, 20, 30, 30]])
            >>> ann.dump()
            '[{"label": "leaf", "bbox": [0, 0, 10, 10], "mask": null, "score": null, "area": 100}, {"label": "flower", "bbox": [10, 10, 20, 20], "mask": null, "score": null, "area": 100}, {"label": "root", "bbox": [20, 20, 30, 30], "mask": null, "score": null, "area": 100}]'
        """
        ann_dict = [self[i] for i in range(len(self))]
        return json.dumps(ann_dict, cls=JsonComplexEncoder, indent=indent, ensure_ascii=ensure_ascii)






[docs]
class Image():
    """A class to store image data and annotations
    
    The class store image data and annotations including bounding boxes and masks.

    Args:
        source: The path to the image file.
        annotations: The annotations for the image.
    
    Attributes:
        file_path: The path to the image file.
        annotations: The annotations for the image.
        size: The size of the image (width, height).
        width: The width of the image.
        height: The height of the image.
    
    Examples:
        >>> im = Image('image.jpg')
        >>> im.size
        (321, 240)
        >>> im.width
        321
        >>> im.height
        240
        >>>
        >>> labels = ['leaf', 'flower', 'root']
        >>> bboxes = [[0, 0, 10, 10],
        ...           [10, 10, 20, 20],
        ...           [20, 20, 30, 30]]
        >>> ann = ImageAnnotation(labels, bboxes)
        >>> im = Image('image.jpg', ann)
        >>> im.annotations
        <cvtk.base.ImageAnnotation object at 0x7f9d5f4b0f10>
        >>>
    """
    def __init__(self, source, annotations: Annotation|None=None):
        im = imread(source)
        self.__source = source
        self.__size = im.size
        self.__width = im.width
        self.__height = im.height
        self.__annotations = annotations


    @property
    def source(self):
        return self.__source


    @property
    def size(self):
        return self.__size
    

    @property
    def width(self):
        return self.__width
    
    
    @property
    def height(self):
        return self.__height


    @property
    def annotations(self):
        return self.__annotations



[docs]
    def draw(self,
             format: str='bbox',
             output: str|None=None,
             cutoff: float=0.5,
             label: bool=True,
             score: bool=True,
             font: PIL.ImageFont.ImageFont|None=None,
             col: dict|None=None) -> PIL.Image.Image:
        """Plot an image with annotations
        
        Plot an image with annotations including bounding boxes and masks.

        Args:
            format: The format of the annotations to plot. Default is 'bbox'.
                Options are 'bbox', 'segm', 'mask'.
            output: The path to save the plotted image. Default is None.
            cutoff: The cutoff score to plot the annotations. Default is 0.5.
            label: Whether to plot the labels. Default is True.
            score: Whether to plot the scores. Default is True.
            cols: The color dictionary for the annotations. Default is None.
        """
        import skimage.measure

        im = imread(self.__source)
        imdraw = PIL.ImageDraw.Draw(im)

        if font is None:
            font = PIL.ImageFont.load_default(max([10, int(im.height / 50), int(im.width / 50)]))
        if col is None:
            col = {'___UNDEF___': (random.randint(0, 255),
                                   random.randint(0, 255),
                                   random.randint(0, 255))}
        outline_width = max([5, int(im.height / 200), int(im.width / 200)])

        for ann in self.__annotations:
            if ('score' in ann) and (ann['score'] < cutoff):
                continue

            cl = '___UNDEF___'
            if 'label' in ann:
                cl = ann['label']
                if cl not in col:
                    col[cl] = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
            
            if ('bbox' in format) and ('bbox' in ann) and (ann['bbox'] is not None):
                x1, y1, x2, y2 = ann['bbox']
                imdraw.rectangle([(x1, y1), (x2, y2)], outline = col[cl], width=outline_width)

            if ('segm' in format) and ('mask' in ann) and (ann['mask'] is not None):
                for contour in skimage.measure.find_contours(ann['mask'], 0.5):
                    imdraw.line([tuple([c[1], c[0]]) for c in contour.tolist()], fill = col[cl], width=outline_width)

            if ('mask' in format) and ('mask' in ann) and (ann['mask'] is not None):
                mask = copy.deepcopy(ann['mask'])
                im = PIL.Image.fromarray((mask * 255).astype(np.uint8))
            
            if ('rgbmask' in format) and ('mask' in ann) and (ann['mask'] is not None):
                mask = np.zeros((im.size[1], im.size[0], 3))
                for i in range(3):
                    mask[:, :, i][ann['mask'] > 0] = col[cl][i]
                im = PIL.Image.fromarray(mask.astype(np.uint8))

            if label and ('label' in ann):
                cl_ = ann['label']
                if score and ('score' in ann):
                    cl_ = f"{ann['label']} ({ann['score']:.2f})"
                x1, y1, x2, y2 = ann['bbox']
                imdraw.text((x1 + int(outline_width * 1.5), y1), cl_, font=font, fill=col[cl])

        if output is not None:
            im.save(output)
        return im






[docs]
class ImageDeck():
    """A class to store a deck of images
    
    The class store a deck of images and annotations including bounding boxes and masks.

    Args:
        images: The list of images.

    """

    def __init__(self, images: Image|list[Image]):
        self.images = images

        if isinstance(images, Image):
            self.images = [images]
        elif isinstance(images, ImageDeck):
            self.images = copy.deepcopy(images.images)
        elif isinstance(images, (list, tuple)):
            for i, im in enumerate(images):
                if not isinstance(im, Image):
                    raise ValueError(f'Invalid image at index {i}.')
            self.images = images
        else:
            raise ValueError(f'ImageDeck only receives Image or list of Image object.')

        self.__i = 0

    
    def __iter__(self):
        return self
    

    def __next__(self):
        if self.__i < len(self):
            i = self.__i
            self.__i += 1
            return self[i]
        else:
            self.__i = 0
            raise StopIteration()

    
    def __len__(self):
        return len(self.images)
    

    def __getitem__(self, i):
        return self.images[i]
    


[docs]
    def append(self, image: Image):
        """Append an image to the deck
        
        """
        if not isinstance(image, Image):
            raise ValueError(f'Invalid image.')
        self.images.append(image)




[docs]
    def extend(self, images: Image|list[Image]):
        """Extend images to the deck
        
        """
        if isinstance(images, Image):
            self.images.append(images)
        elif isinstance(images, (list, tuple)):
            for i, im in enumerate(images):
                if not isinstance(im, Image):
                    raise ValueError(f'Invalid image at index {i}.')
            self.images.extend(images)
        else:
            raise ValueError(f'Invalid image deck.')

    
    

[docs]
    def format(self, format: str='cvtk', datalabel: str|None=None) -> dict:
        """Format the images in the deck
        
        """

        if format.lower() == 'cvtk':
            deck_dict = self.__format_cvtk()
        elif format.lower() == 'coco':
            deck_dict = self.__format_coco(datalabel)
        #elif format.lower() in ['voc', 'pascal', 'xml']:
        #    deck_dict = self.__format_voc()
        else:
            raise ValueError(f'Invalid format "{format}".')
        
        return deck_dict

    


[docs]
    def dump(self, output: str, format: str='cvtk', datalabel: str|None=None, indent: int|None=None, ensure_ascii: bool=True):
        """Dump the images in the deck to a file
        
        """

        deck_dict = self.format(format, datalabel)
        with open(output, 'w') as fh:
            json.dump(deck_dict, fh, ensure_ascii=ensure_ascii, indent=indent, cls=JsonComplexEncoder)



    def __format_cvtk(self) -> dict:
        images = []
        for i, im in enumerate(self.images):
            images.append({
                'file_path': im.source,
                'annotations': [ann for ann in im.annotations]
            })
        return {'data': images}


    def __format_coco(self, datalabel) -> dict:
        import pycocotools
        import pycocotools.mask

        images = []
        annotations = []
        categories = []
        cate2id = {}

        cate_id = 0
        if datalabel is not None:
            for label in datalabel:
                cate_id += 1
                cate2id[label] = cate_id
                categories.append({
                    'id': cate_id,
                    'name': label
                })
        else:
            cate_ = set()
            for im in self.images:
                for ann in im.annotations:
                    if 'label' in ann:
                        cate_.add(ann['label'])
            for label in sorted(cate_):
                cate_id += 1
                cate2id[label] = cate_id
                categories.append({
                    'id': cate_id,
                    'name': label
                })

        img_id = 0
        ann_id = 0
        for im in self.images:
            img_id += 1
            images.append({
                'file_name': os.path.basename(im.source),
                'height': im.height,
                'width': im.width,
                'id': img_id
            })
            for ann in im.annotations:
                ann_id += 1
                annotations.append({
                    'id': ann_id,
                    'image_id': img_id,
                    'category_id': cate2id[ann['label']],
                    'bbox': self.__xyxy2xywh(ann['bbox']),
                    'score': ann['score'],
                    'area': ann['area'],
                    'iscrowd': 0
                })
                if 'mask' in ann and ann['mask'] is not None:
                    rle_mask = pycocotools.mask.encode(np.asfortranarray(ann['mask']).astype(np.uint8))
                    annotations[-1]['segmentation'] = {
                        'size': rle_mask['size'],
                        'counts': rle_mask['counts'].decode()
                    }
        
        return {'images': images, 'annotations': annotations, 'categories': categories}


    def __xyxy2xywh(self, bbox):
        return [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]



#    def __format_voc(self) -> dict:
#        pass



[docs]
def imread(source,
           exif_transpose: bool=True,
           req_timeout: int=60) -> PIL.Image.Image:
    """Open image from various sources

    This function opens image from various sources,
    including file, url, bytes, base64, PIL image, and numpy array
    and convert it to the PIL.Image.Image class instance.
    The format of input image is automatically estimated in the function.
    Image will be transposed based on the EXIF orientation tag if `exif_transpose` is set to True.
    Note that, if 'cv2' format is selected, the image will be in BGR format, compatible with OpenCV.
    
    Args:
        source (str, pathlib.Path, bytes, PIL.Image.Image, np.ndarray): Image source,
            can be a file path, url, bytes, base64, PIL image, or numpy array.
        exif_transpose (bool): Whether to transpose the image based on the EXIF orientation tag.
        req_timeout (int): The timeout for the request to get image from url. Default is 60 seconds.
    
    Returns:
        PIL.Image.Image: Image data.
        
    Examples:
        >>> im = imread('image.jpg')
    """
    im = None

    if isinstance(source, str):
        if re.match(r'https?://', source):
            try:
                import requests
            except ImportError as e:
                raise ImportError('Unable to open image from url. '
                                  'Install requests package to enable this feature.') from e
            try:
                req = requests.get(source, timeout=req_timeout)
                req.raise_for_status()
                return imread(req.content)
            except requests.RequestException as e:
                raise ValueError('Image Not Found.', source) from e
            
        elif source.startswith('data:image'):
            return imread(base64.b64decode(source.split(',')[1]))

        else:
            return imread(pathlib.Path(source))
    
    elif isinstance(source, PIL.Image.Image):
        return source
    
    elif isinstance(source, pathlib.Path):
        im = PIL.Image.open(source)
        if exif_transpose:
            im = PIL.ImageOps.exif_transpose(im)

    elif isinstance(source, (bytes, bytearray)):
        source = np.asarray(bytearray(source), dtype=np.uint8)
        im = PIL.Image.open(io.BytesIO(source))
        if exif_transpose:
            im = PIL.ImageOps.exif_transpose(im)
        
    elif isinstance(source, np.ndarray):
        im = source.copy()
        im = PIL.Image.fromarray(im[..., 2::-1])
    
    else:
        raise ValueError(f'Unable open image file due to unknown type of "{source}".')
    
    if im is None:
        raise ValueError(f'Unable open image file f{source}. Check if the file exists or the url is correct.')

    return im

    




[docs]
def imconvert(im: ImageSourceTypes,
              format: str='PIL') -> ImageSourceTypes:
    """Convert image format

    Convert image format from any format to the specific format.

    Args:
        im (ImageSourceTypes): Image source, can be a file path, url, bytes, base64, PIL image, or numpy array.
        format (str): The format of the returned image. Default is 'PIL'.
            Options are 'cv2' (or 'cv', 'array'), 'bytes', 'base64', and 'PIL'.
    
    Returns:
        ImageSourceTypes: Image data in the specified format.
        
    Examples:
        >>> im = imread('image.jpg')
        >>> imconvert(im, 'cv2')
    """
    def __pil2bytes(im) -> bytes:
        im_buff = io.BytesIO()
        im.save(im_buff, format='JPEG')
        return im_buff.getvalue()

    im = imread(im)

    if format.lower() in ['array', 'cv2', 'cv']:
        return np.array(im)[..., 2::-1]
    elif format.lower() == 'pil':
        return im
    elif format.lower() == 'bytes':
        return __pil2bytes(im)
    elif format.lower() == 'base64':
        return 'data:image/jpeg;base64, ' + \
            base64.b64encode(__pil2bytes(im)).decode('utf-8') 
    elif format.lower() in ['gray', 'grey']:
        return im.convert('L')
    else:
        raise ValueError(f'Unsupported image format "{format}".')






[docs]
def imresize(im: ImageSourceTypes,
             shape: list[int, int]|tuple[int, int]|None=None,
             scale: float|None=None,
             shortest: int|None=None,
             longest: int|None=None,
             resample: object=PIL.Image.BILINEAR) -> PIL.Image.Image:
    """Resize the image

    Resize the image to the given shape, scale, shortest, or longest side.

    Args:
        im: ImageSourceTypes: Image source, can be a file path, url, bytes, base64, PIL image, or numpy array.
        shape: tuple: The shape of the resized image (height, width).
        scale: float: The scale factor to resize the image.
        shortest: int: The shortest side of the image.
        longest: int: The longest side of the image.
        resample: int: The resampling filter. Default is PIL.Image.BILINEAR.

    Returns:
        PIL.Image.Image: The resized image.

    Examples:
        >>> imresize('image.jpg', shape=(256, 256))
        >>> imresize('image.jpg', scale=0.5)
        >>> imresize('image.jpg', shortest=256)
        >>> imresize('image.jpg', longest=256)    
    """
    im = imread(im)
    
    if shape is not None:
        im = im.resize(shape, resample=resample)
    elif scale is not None:
        im = im.resize((int(im.width * scale), int(im.height * scale)), resample=resample)
    elif shortest is not None:
        ratio = shortest / min(im.size)
        im = im.resize((int(im.width * ratio), int(im.height * ratio)), resample=resample)
    elif longest is not None:
        ratio = longest / max(im.size)
        im = im.resize((int(im.width * ratio), int(im.height * ratio)), resample=resample)
    else:
        raise ValueError('Specify the shape, scale, shortest, or longest side to resize the image.')
    return im

    



[docs]
def imwrite(im: ImageSourceTypes, filename: str, quality: int=95) -> None:
    """Save image to file

    Args:
        im: ImageSourceTypes: Image source, can be a file path, url, bytes, base64, PIL image, or numpy array.

    Examples:
        >>> imsave(imread('image.jpg'), 'image.jpg')
        >>> imsave(imread('image.jpg'), 'image.jpg', 100)
    """
    im = imread(im)
    im.save(filename, quality=quality)





[docs]
def imshow(im: ImageSourceTypes|list[ImageSourceTypes], ncol: int|None=None, nrow: int|None=None) -> object:
    """Display image using matplotlib.pyplot

    Args:
        im: ImageSourceTypes: Image or list of images to display.
        ncol: int: Number of columns to display the images. Default is None (automatically set).
        nrow: int: Number of rows to display the images. Default is None (automatically set).
    """
    try:
        import math
        import matplotlib.pyplot as plt
    except ImportError as e:
        raise ImportError('Unable to display image. '
                          'Install matplotlib package to enable image visualization feature.') from e

    if not isinstance(im, (list, tuple)):
        im = [im]

    # set subplot panels
    if ncol is None and nrow is None:
        ncol = nrow = 1
        if len(im) > 1:
            ncol = math.ceil(math.sqrt(len(im)))
            nrow = math.ceil(len(im) / ncol)
    elif ncol is None:
        ncol = math.ceil(len(im) / nrow)
    elif nrow is None:
        nrow = math.ceil(len(im) / ncol)
    
    plt.figure()

    for i_, im_ in enumerate(im):
        plt.subplot(nrow, ncol, i_ + 1)
        plt.imshow(imread(im_))
        if isinstance(im_, str):
            plt.title(os.path.basename(im_))

    plt.show()
    return plt





[docs]
def imlist(source: str|list[str],
           ext: str|list[str]=['.jpg', '.jpeg', '.png', '.tiff'],
           ignore_case: bool=True) -> list[str]:
    """List all image files from the given sources

    The function recevies image sources as a file path, directory path, or a list of file and directory paths.
    If the source is a directory, the function will recursively search for image files with the given extensions.

    Args:
        source: str | list[str]: The directory path.
        ext: list[str]: The list of file extensions to search for. Default is ['.jpg', 'jpeg', '.png', '.tiff'].
        ignore_case: bool: Whether to ignore the case of the file extension. Default is True.

    Returns:
        list: List of image files in the directory.
    """
    im_list = []
    if isinstance(source, str):
        sources = [source]
    if ignore_case:
        ext = [e.lower() for e in ext]

    for source in sources:
        if os.path.isdir(source):
            for f in glob.glob(os.path.join(source, '**', '*'), recursive=True):
                f_ext = os.path.splitext(f)[1]
                if ignore_case:
                    f_ext = f_ext.lower()
                if f_ext in ext:
                    im_list.append(f)
        elif os.path.isfile(source):
            im_list.append(source)
        else:
            raise ValueError(f'The input "{source}" is not found or is neither a file nor a directory.')
    
    return im_list