diff --git a/cvpods/data/transforms/__init__.py b/cvpods/data/transforms/__init__.py index a9f0ac8..2facdf6 100644 --- a/cvpods/data/transforms/__init__.py +++ b/cvpods/data/transforms/__init__.py @@ -1,5 +1,5 @@ # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -from .transform import * -from .transform_gen import * +from .augmentations import * +from .auto_aug import AutoAugment __all__ = [k for k in globals().keys() if not k.startswith("_")] diff --git a/cvpods/data/transforms/transform.py b/cvpods/data/transforms/augmentations.py similarity index 55% rename from cvpods/data/transforms/transform.py rename to cvpods/data/transforms/augmentations.py index 887367e..2935ac3 100644 --- a/cvpods/data/transforms/transform.py +++ b/cvpods/data/transforms/augmentations.py @@ -1,11 +1,16 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- -# Copyright (c) BaseDetection, Inc. and its affiliates. All Rights Reserved +# Copyright (c) Facebook, Inc. and its affiliates. +# Modified by BaseDetection, Inc. and its affiliates. + +# pylint: disable=W0613 import inspect +import pprint import random +import sys from abc import ABCMeta, abstractmethod -from typing import Callable, TypeVar +from typing import Callable import cv2 import numpy as np @@ -13,43 +18,42 @@ import pycocotools.mask as mask_util import torch -import torchvision.transforms as transforms import cvpods -from cvpods.structures import BoxMode +from cvpods.structures import Boxes, BoxMode, pairwise_iou -from .transform_util import to_float_tensor, to_numpy +from ..registry import TRANSFORMS __all__ = [ - "ExpandTransform", - "AffineTransform", - "BlendTransform", - "IoUCropTransform", - "CropTransform", - "CropPadTransform", - "GridSampleTransform", - "RotationTransform", - "HFlipTransform", - "VFlipTransform", - "NoOpTransform", - "ScaleTransform", - "DistortTransform", - "Transform", - "TransformList", - "ExtentTransform", - "ResizeTransform", - # Transform used in ssl - "GaussianBlurTransform", - "GaussianBlurConvTransform", - "SolarizationTransform", - "ComposeTransform", - "LabSpaceTransform", - "PadTransform", + "Pad", + "RandomScale", + "Expand", + "MinIoURandomCrop", + "RandomSwapChannels", + "CenterAffine", + "RandomBrightness", + "RandomContrast", + "RandomCrop", + "RandomCropWithInstance", + "RandomCropWithMaxAreaLimit", + "RandomCropPad", + "RandomExtent", + "RandomFlip", + "RandomSaturation", + "RandomDistortion", + "Resize", + "ResizeShortestEdge", + "ResizeLongestEdge", + "ShuffleList", + "RandomList", + "RepeatList", + "TorchTransformGen", + # transforms used in ssl + "RandomGaussianBlur", + "RandomSolarization", + "RandomLightning", ] -# NOTE: to document methods in subclasses, it's sufficient to only document those whose -# implemenation needs special attention. - class Transform(metaclass=ABCMeta): """ @@ -58,11 +62,12 @@ class Transform(metaclass=ABCMeta): all methods of this class are deterministic w.r.t their input arguments. In training, there should be a higher-level policy that generates (likely with random variations) these transform ops. Each transform op may handle several - data types, e.g.: image, coordinates, segmentation, bounding boxes. Some of - them have a default implementation, but can be overwritten if the default - isn't appropriate. The implementation of each method may choose to modify - its input data in-place for efficient transformation. + data types, e.g.: image, point cloud, coordinates, segmentation, bounding boxes. + Some of them have a default implementation, but can be overwritten if the + default isn't appropriate. The implementation of each method may choose to + modify its input data in-place for efficient transformation. """ + def _set_attributes(self, params: list = None): """ Set attributes from the input list of parameters. @@ -71,11 +76,186 @@ def _set_attributes(self, params: list = None): params (list): list of parameters. """ - if params: + if params is not None: for k, v in params.items(): if k != "self" and not k.startswith("_"): setattr(self, k, v) + def __call__(self, data: list, annotations: list = None, **kwargs): + """ + Apply transform to the data and corresponding annotations (if exist). + """ + raise NotImplementedError + + @classmethod + def register_type(cls, data_type: str, func: Callable): + """ + Register the given function as a handler that this transform will use + for a specific data type. + + Args: + data_type (str): the name of the data type (e.g., box) + func (callable): takes a transform and a data, returns the + transformed data. + + Examples: + + .. code-block:: python + + def func(flip_transform, voxel_data): + return transformed_voxel_data + HFlipTransform.register_type("voxel", func) + + # ... + transform = HFlipTransform(...) + transform.apply_voxel(voxel_data) # func will be called + """ + assert callable( + func + ), "You can only register a callable to a Transform. Got {} instead.".format( + func) + argspec = inspect.getfullargspec(func) + assert len(argspec.args) == 2, ( + "You can only register a function that takes two positional " + "arguments to a Transform! Got a function with spec {}".format( + str(argspec))) + setattr(cls, "apply_" + data_type, func) + + def _rand_range(self, low=1.0, high=None, size=None): + """ + Uniform float random number between low and high. + """ + if high is None: + low, high = 0, low + if size is None: + size = [] + return np.random.uniform(low, high, size) + + def __repr__(self): + """ + Produce something like: + "MyTransformGen(field1={self.field1}, field2={self.field2})" + """ + try: + sig = inspect.signature(self.__init__) + classname = type(self).__name__ + argstr = [] + for name, param in sig.parameters.items(): + assert ( + param.kind != param.VAR_POSITIONAL + and param.kind != param.VAR_KEYWORD + ), "The default __repr__ doesn't support *args or **kwargs" + assert hasattr(self, name), ( + "Attribute {} not found! " + "Default __repr__ only works if attributes match the constructor.".format( + name + ) + ) + attr = getattr(self, name) + default = param.default + if default is attr: + continue + argstr.append("{}={}".format(name, pprint.pformat(attr))) + return "{}({})".format(classname, ", ".join(argstr)) + except AssertionError: + return super().__repr__() + + +class ComposeTransform(Transform): + """ + Composes several transforms together. + """ + + def __init__(self, transforms: list): + """ + Args: + transforms (list[Transform]): list of transforms to compose. + """ + super().__init__() + self._set_attributes(locals()) + + def __eq__(self, other): + if not isinstance(other, ComposeTransform): + return False + return self.transforms == other.transforms + + def __call__(self, img, annotations=None, **kwargs): + for tfm in self.transforms: + img, annotations = tfm(img, annotations, **kwargs) + return img, annotations + + def __repr__(self): + return "".join([tfm for tfm in self.transforms]) + + +@TRANSFORMS.register() +class RandomList(ComposeTransform): + """ + Random select subset of provided augmentations. + """ + def __init__(self, transforms, num_layers=2, choice_weights=None): + """ + Args: + transforms (List[TorchTransformGen]): list of transforms need to be performed. + num_layers (int): parameters of np.random.choice. + choice_weights (optional, float): parameters of np.random.choice. + """ + self.all_transforms = transforms + self.num_layers = num_layers + self.choice_weights = choice_weights + + def __call__(self, img, annotations=None, **kwargs): + self.transforms = np.random.choice( + self.all_transforms, + self.num_layers, + replace=self.choice_weights is None, + p=self.choice_weights) + + return super().__call__(img, annotations) + + +@TRANSFORMS.register() +class ShuffleList(ComposeTransform): + """ + Randomly shuffle the `transforms` order. + """ + + def __call__(self, img, annotations=None, **kwargs): + np.random.shuffle(self.transforms) + return super().__call__(img, annotations) + + +@TRANSFORMS.register() +class RepeatList(ComposeTransform): + """ + Forward several times of provided transforms for a given image. + """ + def __init__(self, transforms, repeat_times=2): + """ + Args: + transforms (list[TransformGen]): List of transform to be repeated. + repeat_times (int): number of duplicates desired. + """ + super().__init__(transforms) + self.times = repeat_times + + def __call__(self, img, annotations=None, **kwargs): + repeat_imgs = [] + repeat_annotations = [] + for t in range(self.times): + tmp_img, tmp_anno = super().__call__(img, annotations, **kwargs) + repeat_imgs.append(tmp_img) + repeat_annotations.append(tmp_anno) + repeat_imgs = np.stack(repeat_imgs, axis=0) + + return repeat_imgs, repeat_annotations + + +class DefaultTransorm(Transform): + """ + Default transform for 2D detection, segmentation, keypoints, etc. + """ + @abstractmethod def apply_image(self, img: np.ndarray): """ @@ -222,7 +402,8 @@ def __call__(self, image, annotations=None, **kwargs): keypoints[:, :2] = self.apply_coords(keypoints[:, :2]) # This assumes that HorizFlipTransform is the only one that does flip - do_hflip = isinstance(self, cvpods.data.transforms.transform.HFlipTransform) + do_hflip = isinstance(self, cvpods.data.transforms.augmentations.RandomFlip) \ + and self.horizontal # Alternative way: check if probe points was horizontally flipped. # probe = np.asarray([[0.0, 0.0], [image_width, 0.0]]) @@ -257,168 +438,19 @@ def __call__(self, image, annotations=None, **kwargs): "Supported type is ndarray.".format(type(sem_seg))) return image, annotations - @classmethod - def register_type(cls, data_type: str, func: Callable): - """ - Register the given function as a handler that this transform will use - for a specific data type. - - Args: - data_type (str): the name of the data type (e.g., box) - func (callable): takes a transform and a data, returns the - transformed data. - - Examples: - - .. code-block:: python - - def func(flip_transform, voxel_data): - return transformed_voxel_data - HFlipTransform.register_type("voxel", func) - - # ... - transform = HFlipTransform(...) - transform.apply_voxel(voxel_data) # func will be called - """ - assert callable( - func - ), "You can only register a callable to a Transform. Got {} instead.".format( - func) - argspec = inspect.getfullargspec(func) - assert len(argspec.args) == 2, ( - "You can only register a function that takes two positional " - "arguments to a Transform! Got a function with spec {}".format( - str(argspec))) - setattr(cls, "apply_" + data_type, func) - -_T = TypeVar("_T") - - -class ComposeTransform(object): - """ - Composes several transforms together. - """ - - def __init__(self, tfms): - """ - Args: - transforms (list[Transform]): list of transforms to compose. - """ - super().__init__() - self.transforms = tfms - - def __eq__(self, other): - if not isinstance(other, ComposeTransform): - return False - return self.transforms == other.transforms - - def __call__(self, img, annotations=None, **kwargs): - for tfm in self.transforms: - img, annotations = tfm(img, annotations, **kwargs) - return img, annotations - - def __repr__(self): - return "".join([tfm for tfm in self.transforms]) - - -# TODO: Deprecated -# pyre-ignore-all-errors -class TransformList: - """ - Maintain a list of transform operations which will be applied in sequence. - Attributes: - transforms (list[Transform]) - """ - def __init__(self, transforms: list): - """ - Args: - transforms (list[Transform]): list of transforms to perform. - """ - super().__init__() - for t in transforms: - assert isinstance(t, Transform), t - self.transforms = transforms - - def _apply(self, x: _T, meth: str) -> _T: - """ - Apply the transforms on the input. - Args: - x: input to apply the transform operations. - meth (str): meth. - Returns: - x: after apply the transformation. - """ - for t in self.transforms: - x = getattr(t, meth)(x) - return x - - def __getattr__(self, name: str): - """ - Args: - name (str): name of the attribute. - """ - if name.startswith("apply_"): - return lambda x: self._apply(x, name) - raise AttributeError( - "TransformList object has no attribute {}".format(name)) - - def __add__(self, other: "TransformList") -> "TransformList": - """ - Args: - other (TransformList): transformation to add. - Returns: - TransformList: list of transforms. - """ - others = (other.transforms - if isinstance(other, TransformList) else [other]) - return TransformList(self.transforms + others) - - def __iadd__(self, other: "TransformList") -> "TransformList": - """ - Args: - other (TransformList): transformation to add. - Returns: - TransformList: list of transforms. - """ - others = (other.transforms - if isinstance(other, TransformList) else [other]) - self.transforms.extend(others) - return self - - def __radd__(self, other: "TransformList") -> "TransformList": - """ - Args: - other (TransformList): transformation to add. - Returns: - TransformList: list of transforms. - """ - others = (other.transforms - if isinstance(other, TransformList) else [other]) - return TransformList(others + self.transforms) - - def insert(self, idx: int, other: "TransformList") -> "TransformList": - """ - Args: - idx (int): insert position. - other (TransformList): transformation to insert. - Returns: - None - """ - assert idx in range(len(self.transforms)) - others = (other.transforms - if isinstance(other, TransformList) else [other]) - self.transforms = self.transforms[:idx] + others + self.transforms[idx:] - - -class DistortTransform(Transform): +# Simplify this to inherent SimpleTransform +@TRANSFORMS.register() +class RandomDistortion(Transform): """ Distort image w.r.t hue, saturation and exposure. """ - def __init__(self, hue, saturation, exposure, image_format): + def __init__(self, hue, saturation, exposure, image_format="BGR", prob=0.5): + assert image_format in ["RGB", "BGR"] super().__init__() self._set_attributes(locals()) + self.cvt_code = { "RGB": (cv2.COLOR_RGB2HSV, cv2.COLOR_HSV2RGB), "BGR": (cv2.COLOR_BGR2HSV, cv2.COLOR_HSV2BGR), @@ -426,7 +458,7 @@ def __init__(self, hue, saturation, exposure, image_format): if saturation > 1.0: saturation /= 255. # in range [0, 1] - def apply_image(self, img: np.ndarray) -> np.ndarray: + def __call__(self, img, annotations=None, **kwargs): """ Args: img (ndarray): of shape HxW, HxWxC, or NxHxWxC. The array can be @@ -436,31 +468,30 @@ def apply_image(self, img: np.ndarray) -> np.ndarray: Returns: ndarray: the distorted image(s). """ - dhue = np.random.uniform(low=-self.hue, high=self.hue) - dsat = self._rand_scale(self.saturation) - dexp = self._rand_scale(self.exposure) - - dtype = img.dtype - img = cv2.cvtColor(img, self.cvt_code[0]) - img = np.asarray(img, dtype=np.float32) / 255. - img[:, :, 1] *= dsat - img[:, :, 2] *= dexp - H = img[:, :, 0] + dhue + do = self._rand_range() < self.prob + if do: + dhue = np.random.uniform(low=-self.hue, high=self.hue) + dsat = self._rand_scale(self.saturation) + dexp = self._rand_scale(self.exposure) + + dtype = img.dtype + img = cv2.cvtColor(img, self.cvt_code[0]) + img = np.asarray(img, dtype=np.float32) / 255. + img[:, :, 1] *= dsat + img[:, :, 2] *= dexp + H = img[:, :, 0] + dhue + + if dhue > 0: + H[H > 1.0] -= 1.0 + else: + H[H < 0.0] += 1.0 + + img[:, :, 0] = H + img = (img * 255).clip(0, 255).astype(np.uint8) + img = cv2.cvtColor(img, self.cvt_code[1]) + img = np.asarray(img, dtype=dtype) - if dhue > 0: - H[H > 1.0] -= 1.0 - else: - H[H < 0.0] += 1.0 - - img[:, :, 0] = H - img = (img * 255).clip(0, 255).astype(np.uint8) - img = cv2.cvtColor(img, self.cvt_code[1]) - img = np.asarray(img, dtype=dtype) - - return img - - def apply_coords(self, coords: np.ndarray) -> np.ndarray: - return coords + return img, annotations def _rand_scale(self, upper_bound): """ @@ -477,22 +508,27 @@ def _rand_scale(self, upper_bound): return scale return 1 / scale - def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray: - return segmentation - -class AffineTransform(Transform): +@TRANSFORMS.register() +class CenterAffine(DefaultTransorm): """ Augmentation from CenterNet """ - def __init__(self, src, dst, output_size, pad_value=[0, 0, 0]): + def __init__(self, boarder, output_size, pad_value=[0, 0, 0], random_aug=True): + """ output_size:(w, h) """ super().__init__() - affine = cv2.getAffineTransform(np.float32(src), np.float32(dst)) self._set_attributes(locals()) + def __call__(self, image, annotations, **kwargs): + self.img_shape = image.shape[:2] + self.center, self.scale = self.generate_center_and_scale(self.img_shape) + self.src, self.dst = self.generate_src_and_dst(self.center, self.scale, self.output_size) + self.affine = cv2.getAffineTransform(np.float32(self.src), np.float32(self.dst)) + return super().__call__(image, annotations) + def apply_image(self, img: np.ndarray) -> np.ndarray: """ Apply AffineTransform for the image(s). @@ -534,102 +570,42 @@ def apply_coords(self, coords: np.ndarray) -> np.ndarray: return coords -class RotationTransform(Transform): +@TRANSFORMS.register() +class RandomFlip(DefaultTransorm): """ - This method returns a copy of this image, rotated the given - number of degrees counter clockwise around its center. + Perform horizontal flip. """ - def __init__(self, h, w, angle, expand=True, center=None, interp=None): + def __init__(self, prob=0.5, *, horizontal=True, vertical=False): """ Args: - h, w (int): original image size - angle (float): degrees for rotation - expand (bool): choose if the image should be resized to fit the whole - rotated image (default), or simply cropped - center (tuple (width, height)): coordinates of the rotation center - if left to None, the center will be fit to the center of each image - center has no effect if expand=True because it only affects shifting - interp: cv2 interpolation method, default cv2.INTER_LINEAR + prob (float): probability of flip. + horizontal (boolean): whether to apply horizontal flipping + vertical (boolean): whether to apply vertical flipping """ super().__init__() - image_center = np.array((w / 2, h / 2)) - if center is None: - center = image_center - if interp is None: - interp = cv2.INTER_LINEAR - abs_cos, abs_sin = (abs(np.cos(np.deg2rad(angle))), abs(np.sin(np.deg2rad(angle)))) - if expand: - # find the new width and height bounds - bound_w, bound_h = np.rint( - [h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin] - ).astype(int) - else: - bound_w, bound_h = w, h + if horizontal and vertical: + raise ValueError( + "Cannot do both horiz and vert. Please use two Flip instead." + ) + if not horizontal and not vertical: + raise ValueError("At least one of horiz or vert has to be True!") self._set_attributes(locals()) - self.rm_coords = self.create_rotation_matrix() - # Needed because of this problem https://github.com/opencv/opencv/issues/11784 - self.rm_image = self.create_rotation_matrix(offset=-0.5) - def apply_image(self, img, interp=None): - """ - img should be a numpy array, formatted as Height * Width * Nchannels - """ - if len(img) == 0 or self.angle % 360 == 0: - return img - assert img.shape[:2] == (self.h, self.w) - interp = interp if interp is not None else self.interp - return cv2.warpAffine(img, self.rm_image, (self.bound_w, self.bound_h), flags=interp) + def __call__(self, image, annotations, **kwargs): + h, w = image.shape[:2] + do = self._rand_range() < self.prob - def apply_coords(self, coords): - """ - coords should be a N * 2 array-like, containing N couples of (x, y) points - """ - coords = np.asarray(coords, dtype=float) - if len(coords) == 0 or self.angle % 360 == 0: - return coords - return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :] - - def apply_segmentation(self, segmentation): - segmentation = self.apply_image(segmentation, interp=cv2.INTER_NEAREST) - return segmentation - - def create_rotation_matrix(self, offset=0): - center = (self.center[0] + offset, self.center[1] + offset) - rm = cv2.getRotationMatrix2D(tuple(center), self.angle, 1) - if self.expand: - # Find the coordinates of the center of rotation in the new image - # The only point for which we know the future coordinates is the center of the image - rot_im_center = cv2.transform(self.image_center[None, None, :] + offset, rm)[0, 0, :] - new_center = np.array([self.bound_w / 2, self.bound_h / 2]) + offset - rot_im_center - # shift the rotation center to the new coordinates - rm[:, 2] += new_center - return rm - - def inverse(self): - """ - The inverse is to rotate it back with expand, and crop to get the original shape. - """ - if not self.expand: # Not possible to inverse if a part of the image is lost - raise NotImplementedError() - rotation = RotationTransform( - self.bound_h, self.bound_w, -self.angle, True, None, self.interp - ) - crop = CropTransform( - (rotation.bound_w - self.w) // 2, (rotation.bound_h - self.h) // 2, self.w, self.h - ) - return TransformList([rotation, crop]) - - -class HFlipTransform(Transform): - """ - Perform horizontal flip. - """ + if self.horizontal: + self.width = w + else: + self.height = h - def __init__(self, width: int): - super().__init__() - self._set_attributes(locals()) + if do: + return super().__call__(image, annotations, **kwargs) + else: + return image, annotations def apply_image(self, img: np.ndarray) -> np.ndarray: """ @@ -643,13 +619,23 @@ def apply_image(self, img: np.ndarray) -> np.ndarray: Returns: ndarray: the flipped image(s). """ - tensor = torch.from_numpy(np.ascontiguousarray(img).copy()) - if len(tensor.shape) == 2: - # For dimension of HxW. - tensor = tensor.flip((-1)) - elif len(tensor.shape) > 2: - # For dimension of HxWxC, NxHxWxC. - tensor = tensor.flip((-2)) + if self.horizontal: + tensor = torch.from_numpy(np.ascontiguousarray(img).copy()) + if len(tensor.shape) == 2: + # For dimension of HxW. + tensor = tensor.flip((-1)) + elif len(tensor.shape) > 2: + # For dimension of HxWxC, NxHxWxC. + tensor = tensor.flip((-2)) + else: + tensor = torch.from_numpy(np.ascontiguousarray(img).copy()) + if len(tensor.shape) == 2: + # For dimension of HxW. + tensor = tensor.flip((-2)) + elif len(tensor.shape) > 2: + # For dimension of HxWxC, NxHxWxC. + tensor = tensor.flip((-3)) + return tensor.numpy() def apply_coords(self, coords: np.ndarray) -> np.ndarray: @@ -667,56 +653,11 @@ def apply_coords(self, coords: np.ndarray) -> np.ndarray: Therefore they are flipped by `(W - x, H - y)`, not `(W - 1 - x, H 1 - y)`. """ - coords[:, 0] = self.width - coords[:, 0] - return coords - - -class VFlipTransform(Transform): - """ - Perform vertical flip. - """ - - def __init__(self, height: int): - super().__init__() - self._set_attributes(locals()) - - def apply_image(self, img: np.ndarray) -> np.ndarray: - """ - Flip the image(s). - - Args: - img (ndarray): of shape HxW, HxWxC, or NxHxWxC. The array can be - of type uint8 in range [0, 255], or floating point in range - [0, 1] or [0, 255]. - - Returns: - ndarray: the flipped image(s). - """ - tensor = torch.from_numpy(np.ascontiguousarray(img).copy()) - if len(tensor.shape) == 2: - # For dimension of HxW. - tensor = tensor.flip((-2)) - elif len(tensor.shape) > 2: - # For dimension of HxWxC, NxHxWxC. - tensor = tensor.flip((-3)) - return tensor.numpy() - - def apply_coords(self, coords: np.ndarray) -> np.ndarray: - """ - Flip the coordinates. - - Args: - coords (ndarray): floating point array of shape Nx2. Each row is (x, y). - - Returns: - ndarray: the flipped coordinates. + if self.horizontal: + coords[:, 0] = self.width - coords[:, 0] + else: + coords[:, 1] = self.height - coords[:, 1] - Note: - The inputs are floating point coordinates, not pixel indices. - Therefore they are flipped by `(W - x, H - y)`, not - `(W - 1 - x, H - 1 - y)`. - """ - coords[:, 1] = self.height - coords[:, 1] return coords @@ -727,14 +668,12 @@ class NoOpTransform(Transform): def __init__(self): super().__init__() - def apply_image(self, img: np.ndarray) -> np.ndarray: - return img + def __call__(self, data, annotations): + return data, annotations - def apply_coords(self, coords: np.ndarray) -> np.ndarray: - return coords - -class GaussianBlurTransform(Transform): +@TRANSFORMS.register() +class RandomGaussianBlur(Transform): """ GaussianBlur using PIL.ImageFilter.GaussianBlur """ @@ -747,100 +686,29 @@ def __init__(self, sigma, p=1.0): super().__init__() self._set_attributes(locals()) - def apply_image(self, img: np.ndarray) -> np.ndarray: - if np.random.random() < self.p: - sigma = random.uniform(self.sigma[0], self.sigma[1]) - img = Image.fromarray(img).filter(ImageFilter.GaussianBlur(radius=sigma)) - return np.array(img) - - def apply_coords(self, coords: np.ndarray) -> np.ndarray: - return coords - - -class SolarizationTransform(Transform): - def __init__(self, thresh=128, p=0.5): - super().__init__() - self.thresh = thresh - self.p = p - - def apply_image(self, img: np.ndarray) -> np.ndarray: - if np.random.random() < self.p: - return np.array(ImageOps.solarize(Image.fromarray(img), self.thresh)) - else: - return img - - def apply_coords(self, coords: np.ndarray) -> np.ndarray: - return coords - - -class GaussianBlurConvTransform(Transform): - def __init__(self, kernel_size, p=1.0): - super().__init__() - self._set_attributes(locals()) - radias = kernel_size // 2 - kernel_size = radias * 2 + 1 - self.blur_h = torch.nn.Conv2d(3, 3, kernel_size=(kernel_size, 1), - stride=1, padding=0, bias=False, groups=3) - self.blur_v = torch.nn.Conv2d(3, 3, kernel_size=(1, kernel_size), - stride=1, padding=0, bias=False, groups=3) - self.k = kernel_size - self.r = radias - - self.blur = torch.nn.Sequential( - torch.nn.ReflectionPad2d(radias), - self.blur_h, - self.blur_v - ) - - self.pil_to_tensor = transforms.ToTensor() - self.tensor_to_pil = transforms.ToPILImage() - - def apply_image(self, img: np.ndarray) -> np.ndarray: + def __call__(self, image, annotations=None, **kwargs): if np.random.random() < self.p: - img = self.pil_to_tensor(Image.fromarray(img)).unsqueeze(0) - - sigma = np.random.uniform(0.1, 2.0) - x = np.arange(-self.r, self.r + 1) - x = np.exp(-np.power(x, 2) / (2 * sigma * sigma)) - x = x / x.sum() - x = torch.from_numpy(x).view(1, -1).repeat(3, 1) - - self.blur_h.weight.data.copy_(x.view(3, 1, self.k, 1)) - self.blur_v.weight.data.copy_(x.view(3, 1, 1, self.k)) - - with torch.no_grad(): - img = self.blur(img) - img = img.squeeze() - - img = np.array(self.tensor_to_pil(img)) - return img + sigma = random.uniform(self.sigma[0], self.sigma[1]) + img = Image.fromarray(image).filter(ImageFilter.GaussianBlur(radius=sigma)) - def apply_coords(self, coords: np.ndarray) -> np.ndarray: - return coords + return np.array(img), annotations -class LabSpaceTransform(Transform): - """ - Convert image from RGB into Lab color space - """ - def __init__(self): +@TRANSFORMS.register() +class RandomSolarization(Transform): + def __init__(self, thresh=128, p=0.5): super().__init__() - self._set_attributes(locals()) + self._set_attributes(locals) - def apply_image(self, img: np.ndarray) -> np.ndarray: - assert len(img.shape) == 3, 'Image should have dim H x W x 3' - assert img.shape[2] == 3, 'Image should have dim H x W x 3' - img_lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB) - img_lab = img_lab.astype(np.float32) - img_lab[:, :, 0] = (img_lab[:, :, 0] * (100.0 / 255.0)) - 50.0 - img_lab[:, :, 1:] = img_lab[:, :, 1:] - 128.0 - return img_lab + def __call__(self, img, annotations=None, **kwargs) -> np.ndarray: + if np.random.random() < self.p: + img = np.array(ImageOps.solarize(Image.fromarray(img), self.thresh)) - def apply_coords(self, coords: np.ndarray) -> np.ndarray: - return coords + return img, annotations -class PadTransform(Transform): +@TRANSFORMS.register() +class Pad(DefaultTransorm): """ Pad image with `pad_value` to the specified `target_h` and `target_w`. @@ -911,17 +779,14 @@ def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray: return segmentation -class ScaleTransform(Transform): +@TRANSFORMS.register() +class RandomScale(DefaultTransorm): """ Resize the image to a target size. """ - def __init__(self, - h: int, - w: int, - new_h: int, - new_w: int, - interp: str = "BILINEAR"): + def __init__(self, output_size, ratio_range=(0.1, 2), interp="BILINEAR"): + """ Args: h, w (int): original image size. @@ -936,6 +801,10 @@ def __init__(self, """ super().__init__() self._set_attributes(locals()) + self.min_ratio, self.max_ratio = ratio_range + if isinstance(self.output_size, int): + self.output_size = [self.output_size] * 2 + _str_to_pil_interpolation = { "NEAREST": Image.NEAREST, "BILINEAR": Image.BILINEAR, @@ -949,6 +818,29 @@ def __init__(self, interp) self.interp = _str_to_pil_interpolation[interp] + def __call__(self, img, annotations=None, **kwargs): + h, w = img.shape[:2] + output_h, output_w = self.output_size + + # 1. Select a random scale factor. + random_scale_factor = np.random.uniform(self.min_ratio, self.max_ratio) + + scaled_size_h = int(random_scale_factor * output_h) + scaled_size_w = int(random_scale_factor * output_w) + + # 2. Recompute the accurate scale_factor using rounded scaled image size. + image_scale_h = scaled_size_h * 1.0 / h + image_scale_w = scaled_size_w * 1.0 / w + image_scale = min(image_scale_h, image_scale_w) + + # 3. Select non-zero random offset (x, y) if scaled image is larger than output_size. + scaled_h = int(h * 1.0 * image_scale) + scaled_w = int(w * 1.0 * image_scale) + + self.h, self.w, self.new_h, self.new_w = h, w, scaled_h, scaled_w + + return super().__call__(img, annotations) + def apply_image(self, img: np.ndarray, interp: str = None) -> np.ndarray: """ Resize the image(s). @@ -1000,64 +892,7 @@ def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray: return segmentation -class GridSampleTransform(Transform): - def __init__(self, grid: np.ndarray, interp: str): - """ - Args: - grid (ndarray): grid has x and y input pixel locations which are - used to compute output. Grid has values in the range of [-1, 1], - which is normalized by the input height and width. The dimension - is `N x H x W x 2`. - interp (str): interpolation methods. Options include `nearest` and - `bilinear`. - """ - super().__init__() - self._set_attributes(locals()) - - def apply_image(self, img: np.ndarray, interp: str = None) -> np.ndarray: - """ - Apply grid sampling on the image(s). - - Args: - img (ndarray): of shape NxHxWxC, or HxWxC or HxW. The array can be - of type uint8 in range [0, 255], or floating point in range - [0, 1] or [0, 255]. - interp (str): interpolation methods. Options include `nearest` and - `bilinear`. - Returns: - ndarray: grid sampled image(s). - """ - interp_method = interp if interp is not None else self.interp - float_tensor = torch.nn.functional.grid_sample( - to_float_tensor(img), # NxHxWxC -> NxCxHxW. - torch.from_numpy(self.grid), - mode=interp_method, - padding_mode="border", - align_corners=False, - ) - return to_numpy(float_tensor, img.shape, img.dtype) - - def apply_coords(self, coords: np.ndarray): - """ - Not supported. - """ - raise NotImplementedError() - - def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray: - """ - Apply grid sampling on the full-image segmentation. - - Args: - segmentation (ndarray): of shape HxW. The array should have integer - or bool dtype. - Returns: - ndarray: grid sampled segmentation. - """ - segmentation = self.apply_image(segmentation, interp=Image.NEAREST) - return segmentation - - -class IoUCropTransform(Transform): +class IoUCropTransform(DefaultTransorm): """ Perform crop operations on images. @@ -1188,7 +1023,85 @@ def apply_polygons(self, polygons: list) -> list: return [self.apply_coords(p) for p in cropped_polygons] -class CropTransform(Transform): +@TRANSFORMS.register() +class MinIoURandomCrop(IoUCropTransform): + """ + Random crop the image & bboxes, the cropped patches have minimum IoU + requirement with original image & bboxes, the IoU threshold is randomly + selected from min_ious. + """ + + def __init__(self, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3): + """ + Args: + min_ious (tuple): minimum IoU threshold for all intersections with bounding boxes + min_crop_size (float): minimum crop's size + (i.e. h,w := a*h, a*w, where a >= min_crop_size). + """ + self._set_attributes(locals()) + + def __call__(self, img, annotations=None, **kwargs): + """ + Args: + img (ndarray): of shape HxWxC(RGB). The array can be of type uint8 + in range [0, 255], or floating point in range [0, 255]. + annotations (list[dict[str->str]]): + Each item in the list is a bbox label of an object. The object is + represented by a dict, + which contains: + - bbox (list): bbox coordinates, top left and bottom right. + - bbox_mode (str): bbox label mode, for example: `XYXY_ABS`, + `XYWH_ABS` and so on... + """ + sample_mode = (1, *self.min_ious, 0) + h, w = img.shape[:2] + + boxes = list() + for obj in annotations: + boxes.append(BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)) + boxes = torch.tensor(boxes) + + while True: + mode = np.random.choice(sample_mode) + if mode == 1: + return NoOpTransform() + + min_iou = mode + for i in range(50): + new_w = np.random.uniform(self.min_crop_size * w, w) + new_h = np.random.uniform(self.min_crop_size * h, h) + + # h / w in [0.5, 2] + if new_h / new_w < 0.5 or new_h / new_w > 2: + continue + + left = np.random.uniform(w - new_w) + top = np.random.uniform(h - new_h) + + patch = np.array( + (int(left), int(top), int(left + new_w), int(top + new_h))) + + overlaps = pairwise_iou( + Boxes(patch.reshape(-1, 4)), + Boxes(boxes.reshape(-1, 4)) + ) + + if overlaps.min() < min_iou: + continue + + # center of boxes should inside the crop img + center = (boxes[:, :2] + boxes[:, 2:]) / 2 + mask = ((center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) + * (center[:, 0] < patch[2]) * (center[:, 1] < patch[3])) + if not mask.any(): + continue + + self.x0, self.y0, self.w, self.h = int(left), int(top), int(new_w), int(new_h) + + return super().__call__(img, annotations) + + +class CropTransform(DefaultTransorm): """ Perform crop operations on images. """ @@ -1278,23 +1191,91 @@ def apply_polygons(self, polygons: list) -> list: return [self.apply_coords(p) for p in cropped_polygons] -class CropPadTransform(Transform): +@TRANSFORMS.register() +class RandomCrop(CropTransform): + """ + Randomly crop a subimage out of an image. + """ + + def __init__(self, crop_type: str, crop_size, strict_mode=True): + """ + Args: + crop_type (str): one of "relative_range", "relative", "absolute". + See `config/defaults.py` for explanation. + crop_size (tuple[float]): the relative ratio or absolute pixels of + height and width + strict_mode (bool): if `True`, the target `crop_size` must be smaller than + the original image size. + """ + assert crop_type in ["relative_range", "relative", "absolute"] + self._set_attributes(locals()) + + def __call__(self, img, annotations=None, **kwargs): + h, w = img.shape[:2] + croph, cropw = self.get_crop_size((h, w)) + if self.strict_mode: + assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format( + self + ) + offset_range_h = max(h - croph, 0) + offset_range_w = max(w - cropw, 0) + self.y0 = np.random.randint(offset_range_h + 1) + self.x0 = np.random.randint(offset_range_w + 1) + + self.w = cropw + self.h = croph + + return super().__call__(img, annotations) + + def get_crop_size(self, image_size): + """ + Args: + image_size (tuple): height, width + + Returns: + crop_size (tuple): height, width in absolute pixels + """ + h, w = image_size + if self.crop_type == "relative": + ch, cw = self.crop_size + return int(h * ch + 0.5), int(w * cw + 0.5) + elif self.crop_type == "relative_range": + crop_size = np.asarray(self.crop_size, dtype=np.float32) + ch, cw = crop_size + np.random.rand(2) * (1 - crop_size) + return int(h * ch + 0.5), int(w * cw + 0.5) + elif self.crop_type == "absolute": + return self.crop_size + else: + NotImplementedError("Unknown crop type {}".format(self.crop_type)) + + +@TRANSFORMS.register() +class RandomCropPad(RandomCrop): def __init__(self, - x0: int, - y0: int, - w: int, - h: int, - new_w: int, - new_h: int, + crop_type: str, + crop_size, img_value=None, seg_value=None): - super().__init__() + super().__init__(crop_type, crop_size, strict_mode=False) self._set_attributes(locals()) - self.crop_trans = CropTransform(x0, y0, w, h) - pad_top_offset = self.get_pad_offset(h, new_h) - pad_left_offset = self.get_pad_offset(w, new_w) - self.pad_trans = PadTransform( - pad_top_offset, pad_left_offset, new_h, new_w, img_value, seg_value) + + def __call__(self, img, annotations=None, **kwargs): + h, w = img.shape[:2] + croph, cropw = self.get_crop_size((h, w)) + h0 = np.random.randint(h - croph + 1) if h >= croph else 0 + w0 = np.random.randint(w - cropw + 1) if w >= cropw else 0 + dh = min(h, croph) + dw = min(w, cropw) + # print(w0, h0, dw, dh) + + self.x0, self.y0, self.w, self.h, self.new_w, self.new_h = w0, h0, dw, dh, cropw, croph + self.crop_trans = CropTransform(self.x0, self.y0, self.w, self.h) + pad_top_offset = self.get_pad_offset(self.h, self.new_h) + pad_left_offset = self.get_pad_offset(self.w, self.new_w) + self.pad_trans = Pad( + pad_top_offset, pad_left_offset, self.new_h, self.new_w, self.img_value, self.seg_value) + + return super().__call__(img, annotations) def get_pad_offset(self, ori: int, tar: int): pad_length = max(tar - ori, 0) @@ -1363,6 +1344,77 @@ def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray: return segmentation +@TRANSFORMS.register() +class RandomCropWithInstance(RandomCrop): + """ + Make sure the cropping region contains the center of a random instance from annotations. + """ + + def __call__(self, img, annotations=None, **kwargs): + h, w = img.shape[:2] + croph, cropw = self.get_crop_size((h, w)) + if self.strict_mode: + assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format( + self + ) + offset_range_h = max(h - croph, 0) + offset_range_w = max(w - cropw, 0) + # Make sure there is always at least one instance in the image + assert annotations is not None, "Can not get annotations infos." + instance = np.random.choice(annotations) + bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) + bbox = torch.tensor(bbox) + center_xy = (bbox[:2] + bbox[2:]) / 2.0 + + offset_range_h_min = max(center_xy[1] - croph, 0) + offset_range_w_min = max(center_xy[0] - cropw, 0) + offset_range_h_max = min(offset_range_h, center_xy[1] - 1) + offset_range_w_max = min(offset_range_w, center_xy[0] - 1) + + self.y0 = np.random.randint(offset_range_h_min, offset_range_h_max + 1) + self.x0 = np.random.randint(offset_range_w_min, offset_range_w_max + 1) + + self.w = cropw + self.h = croph + + return super().__call__(img, annotations) + + +@TRANSFORMS.register() +class RandomCropWithMaxAreaLimit(RandomCrop): + """ + Find a cropping window such that no single category occupies more than + `single_category_max_area` in `sem_seg`. + + The function retries random cropping 10 times max. + """ + + def __init__(self, crop_type: str, crop_size, strict_mode=True, + single_category_max_area=1.0, ignore_value=255): + super().__init__(crop_type, crop_size, strict_mode) + self._set_attributes(locals()) + + def __call__(self, img, annotations=None, **kwargs): + if self.single_category_max_area >= 1.0: + return super().__call__(img, annotations) + else: + h, w = img.shape[:2] + assert "sem_seg" in annotations[0] + sem_seg = annotations[0]["sem_seg"] + croph, cropw = self.get_crop_size((h, w)) + for _ in range(10): + y0 = np.random.randint(h - croph + 1) + x0 = np.random.randint(w - cropw + 1) + sem_seg_temp = sem_seg[y0: y0 + croph, x0: x0 + cropw] + labels, cnt = np.unique(sem_seg_temp, return_counts=True) + cnt = cnt[labels != self.ignore_value] + if len(cnt) > 1 and np.max(cnt) / np.sum(cnt) < self.single_category_max_area: + break + self.x0, self.y0, self.w, self.h = x0, y0, cropw, croph + + return super().__call__(img, annotations) + + class BlendTransform(Transform): """ Transforms pixel colors with PIL enhance functions. @@ -1382,7 +1434,8 @@ def __init__(self, src_image: np.ndarray, src_weight: float, super().__init__() self._set_attributes(locals()) - def apply_image(self, img: np.ndarray, interp: str = None) -> np.ndarray: + # def __call__(self, img: np.ndarray, interp: str = None) -> np.ndarray: + def __call__(self, img, annotations=None, **kwargs): """ Apply blend transform on the image(s). @@ -1399,54 +1452,174 @@ def apply_image(self, img: np.ndarray, interp: str = None) -> np.ndarray: if img.dtype == np.uint8: img = img.astype(np.float32) img = self.src_weight * self.src_image + self.dst_weight * img - return np.clip(img, 0, 255).astype(np.uint8) + return np.clip(img, 0, 255).astype(np.uint8), annotations else: - return self.src_weight * self.src_image + self.dst_weight * img + return self.src_weight * self.src_image + self.dst_weight * img, annotations - def apply_coords(self, coords: np.ndarray) -> np.ndarray: - """ - Apply no transform on the coordinates. - """ - return coords - def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray: +@TRANSFORMS.register() +class RandomContrast(BlendTransform): + """ + Randomly transforms image contrast. + + Contrast intensity is uniformly sampled in (intensity_min, intensity_max). + - intensity < 1 will reduce contrast + - intensity = 1 will preserve the input image + - intensity > 1 will increase contrast + + See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html + """ + + def __init__(self, intensity_min, intensity_max, prob=1.0): """ - Apply no transform on the full-image segmentation. + Args: + intensity_min (float): Minimum augmentation. + intensity_max (float): Maximum augmentation. + prob (float): probability of transforms image contrast. """ - return segmentation + super().__init__() + self._set_attributes(locals()) + + def __call__(self, img, annotations=None, **kwargs): + if self._rand_range() < self.prob: + w = np.random.uniform(self.intensity_min, self.intensity_max) + self.src_image, self.src_weight, self.dst_weight = img.mean(), 1 - w, w + return super().__call__(img, annotations) + else: + return img, annotations -class RandomSwapChannelsTransform(Transform): +@TRANSFORMS.register() +class RandomBrightness(BlendTransform): """ - Randomly swap image channels. + Randomly transforms image brightness. + + Brightness intensity is uniformly sampled in (intensity_min, intensity_max). + - intensity < 1 will reduce brightness + - intensity = 1 will preserve the input image + - intensity > 1 will increase brightness + + See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html """ - def __init__(self): + def __init__(self, intensity_min, intensity_max, prob=1.): + """ + Args: + intensity_min (float): Minimum augmentation. + intensity_max (float): Maximum augmentation. + prob (float): probability of transforms image brightness. + """ super().__init__() + self._set_attributes(locals()) - def apply_image(self, img): - assert len(img.shape) > 2 - return img[..., np.random.permutation(3)] + def __call__(self, img, annotations=None, **kwargs): + do = self._rand_range() < self.prob + if do: + w = np.random.uniform(self.intensity_min, self.intensity_max) + self.src_image, self.src_weight, self.dst_weight = 0, 1 - w, w + return super().__call__(img, annotations) + else: + return img, annotations - def apply_coords(self, coords: np.ndarray) -> np.ndarray: + +@TRANSFORMS.register() +class RandomSaturation(BlendTransform): + """ + Randomly transforms image saturation. + + Saturation intensity is uniformly sampled in (intensity_min, intensity_max). + - intensity < 1 will reduce saturation (make the image more grayscale) + - intensity = 1 will preserve the input image + - intensity > 1 will increase saturation + + See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html + """ + + def __init__(self, intensity_min, intensity_max, prob=1.0): """ - Apply no transform on the coordinates. + Args: + intensity_min (float): Minimum augmentation (1 preserves input). + intensity_max (float): Maximum augmentation (1 preserves input). + prob (float): probability of transforms image saturation. """ - return coords + super().__init__() + self._set_attributes(locals()) - def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray: + def __call__(self, img, annotations=None, **kwargs): + do = self._rand_range() < self.prob + if do: + assert img.shape[-1] == 3, "Saturation only works on RGB images" + w = np.random.uniform(self.intensity_min, self.intensity_max) + grayscale = img.dot([0.299, 0.587, 0.114])[:, :, np.newaxis] + self.src_image, self.src_weight, self.dst_weight = grayscale, 1 - w, w + return super().__call__(img, annotations) + else: + return img, annotations + + +@TRANSFORMS.register() +class RandomLightning(BlendTransform): + """ + Randomly transforms image color using fixed PCA over ImageNet. + + The degree of color jittering is randomly sampled via a normal distribution, + with standard deviation given by the scale parameter. + """ + + def __init__(self, scale, prob=0.5): """ - Apply no transform on the full-image segmentation. + Args: + scale (float): Standard deviation of principal component weighting. """ - return segmentation + super().__init__() + self._set_attributes(locals()) + self.eigen_vecs = np.array( + [ + [-0.5675, 0.7192, 0.4009], + [-0.5808, -0.0045, -0.8140], + [-0.5836, -0.6948, 0.4203], + ] + ) + self.eigen_vals = np.array([0.2175, 0.0188, 0.0045]) + + def __call__(self, img, annotations=None, **kwargs): + assert img.shape[-1] == 3, "Saturation only works on RGB images" + do = self._rand_range() < self.prob + if do: + weights = np.random.normal(scale=self.scale, size=3) + self.src_image, self.src_weight, self.dst_weight = \ + self.eigen_vecs.dot(weights * self.eigen_vals), 1, 1 + return super().__call__(img, annotations) + else: + return img, annotations + + +@TRANSFORMS.register() +class RandomSwapChannels(Transform): + """ + Randomly swap image channels. + """ + + def __init__(self, prob=0.5): + super().__init__() + self._set_attributes() + + def __call__(self, img, annotations=None, **kwargs): + assert len(img.shape) > 2 + if self._rand_range() < self.prob: + return img[..., np.random.permutation(3)], annotations + else: + return img, annotations -class ExpandTransform(Transform): +@TRANSFORMS.register() +class Expand(DefaultTransorm): """ Expand the image and boxes according the specified expand ratio. """ - def __init__(self, left, top, ratio, mean=(0, 0, 0)): + def __init__(self, ratio_range=(1, 4), mean=(0, 0, 0), prob=0.5): + """ Args: left, top (int): crop the image by img[top: top+h, left:left+w]. @@ -1455,6 +1628,18 @@ def __init__(self, left, top, ratio, mean=(0, 0, 0)): """ super().__init__() self._set_attributes(locals()) + self.min_ratio, self.max_ratio = ratio_range + + def __call__(self, img, annotations=None, **kwargs): + if self._rand_range() < self.prob: + return img, annotations + else: + h, w, c = img.shape + ratio = np.random.uniform(self.min_ratio, self.max_ratio) + left = int(np.random.uniform(0, w * ratio - w)) + top = int(np.random.uniform(0, h * ratio - h)) + self.left, self.top, self.ratio = left, top, ratio + return super().__call__(img, annotations) def apply_image(self, img): """ @@ -1483,7 +1668,8 @@ def apply_coords(self, coords: np.ndarray) -> np.ndarray: return coords -class ExtentTransform(Transform): +@TRANSFORMS.register() +class RandomExtent(DefaultTransorm): """ Extracts a subregion from the source image and scales it to the output size. @@ -1492,17 +1678,47 @@ class ExtentTransform(Transform): See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform """ - def __init__(self, src_rect, output_size, interp=Image.LINEAR, fill=0): + def __init__(self, scale_range, shift_range, interp=Image.LINEAR, fill=0, prob=0.5): """ Args: - src_rect (x0, y0, x1, y1): src coordinates - output_size (h, w): dst image size - interp: PIL interpolation methods - fill: Fill color used when src_rect extends outside image + scale_range (l, h): Range of input-to-output size scaling factor. + shift_range (x, y): Range of shifts of the cropped subrect. The rect + is shifted by [w / 2 * Uniform(-x, x), h / 2 * Uniform(-y, y)], + where (w, h) is the (width, height) of the input image. Set each + component to zero to crop at the image's center. """ super().__init__() self._set_attributes(locals()) + def __call__(self, img, annotations=None, **kwargs): + + if self._rand_range() < self.prob: + return img, annotations + else: + img_h, img_w = img.shape[:2] + + # Initialize src_rect to fit the input image. + src_rect = np.array([-0.5 * img_w, -0.5 * img_h, 0.5 * img_w, 0.5 * img_h]) + + # Apply a random scaling to the src_rect. + src_rect *= np.random.uniform(self.scale_range[0], self.scale_range[1]) + + # Apply a random shift to the coordinates origin. + src_rect[0::2] += self.shift_range[0] * img_w * (np.random.rand() - 0.5) + src_rect[1::2] += self.shift_range[1] * img_h * (np.random.rand() - 0.5) + + # Map src_rect coordinates into image coordinates (center at corner). + src_rect[0::2] += 0.5 * img_w + src_rect[1::2] += 0.5 * img_h + + self.src_rect = (src_rect[0], src_rect[1], src_rect[2], src_rect[3]) + self.output_size = ( + int(src_rect[3] - src_rect[1]), + int(src_rect[2] - src_rect[0]), + ) + + return super().__call__(img, annotations) + def apply_image(self, img, interp=None): h, w = self.output_size ret = Image.fromarray(img).transform( @@ -1533,7 +1749,7 @@ def apply_segmentation(self, segmentation): return segmentation -class ResizeTransform(Transform): +class ResizeTransform(DefaultTransorm): """ Resize the image to a target size. """ @@ -1566,6 +1782,140 @@ def apply_segmentation(self, segmentation): return segmentation +@TRANSFORMS.register() +class Resize(ResizeTransform): + """ + Resize image to a target size + """ + + def __init__(self, shape, interp=Image.BILINEAR): + """ + Args: + shape: (h, w) tuple or a int. + interp: PIL interpolation method. + """ + if isinstance(shape, int): + shape = (shape, shape) + shape = tuple(shape) + self._set_attributes(locals()) + + def __call__(self, img, annotations=None, **kwargs): + self.h, self.w, self.new_h, self.new_w = \ + img.shape[0], img.shape[1], self.shape[0], self.shape[1] + return super().__call__(img, annotations) + + +@TRANSFORMS.register() +class ResizeLongestEdge(ResizeTransform): + """ + Scale the longer edge to the given size. + """ + + def __init__(self, long_edge_length, sample_style="range", interp=Image.BILINEAR, + jitter=(0.0, 32)): + """ + Args: + long_edge_length (list[int]): If ``sample_style=="range"``, + a [min, max] interval from which to sample the shortest edge length. + If ``sample_style=="choice"``, a list of shortest edge lengths to sample from. + sample_style (str): either "range" or "choice". + interp: PIL interpolation method. + """ + assert sample_style in ["range", "choice"], sample_style + + self.is_range = sample_style == "range" + if isinstance(long_edge_length, int): + long_edge_length = (long_edge_length, long_edge_length) + self._set_attributes(locals()) + + def __call__(self, img, annotations=None, **kwargs): + h, w = img.shape[:2] + if self.is_range: + size = np.random.randint( + self.long_edge_length[0], self.long_edge_length[1] + 1 + ) + else: + size = np.random.choice(self.long_edge_length) + if size == 0: + return NoOpTransform() + + if self.jitter[0] > 0: + dw = self.jitter[0] * w + dh = self.jitter[0] * h + size = max(h, w) + np.random.uniform(low=-max(dw, dh), high=max(dw, dh)) + size -= size % self.jitter[1] + + scale = size * 1.0 / max(h, w) + if h < w: + newh, neww = scale * h, size + else: + newh, neww = size, scale * w + + neww = int(neww + 0.5) + newh = int(newh + 0.5) + + self.h, self.w, self.new_h, self.new_w = h, w, newh, neww + return super().__call__(img, annotations) + + +@TRANSFORMS.register() +class ResizeShortestEdge(ResizeTransform): + """ + Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge. + If `max_size` is reached, then downscale so that the longer edge does not exceed max_size. + """ + + def __init__( + self, + short_edge_length, + max_size=sys.maxsize, + sample_style="range", + interp=Image.BILINEAR, + ): + """ + Args: + short_edge_length (list[int]): If ``sample_style=="range"``, + a [min, max] interval from which to sample the shortest edge length. + If ``sample_style=="choice"``, a list of shortest edge lengths to sample from. + max_size (int): maximum allowed longest edge length. + sample_style (str): either "range" or "choice". + interp: PIL interpolation method. + """ + assert sample_style in ["range", "choice"], sample_style + + self.is_range = sample_style == "range" + if isinstance(short_edge_length, int): + short_edge_length = (short_edge_length, short_edge_length) + self._set_attributes(locals()) + + def __call__(self, img, annotations=None, **kwargs): + h, w = img.shape[:2] + + if self.is_range: + size = np.random.randint( + self.short_edge_length[0], self.short_edge_length[1] + 1 + ) + else: + size = np.random.choice(self.short_edge_length) + if size == 0: + return NoOpTransform() + + scale = size * 1.0 / min(h, w) + if h < w: + newh, neww = size, scale * w + else: + newh, neww = scale * h, size + if max(newh, neww) > self.max_size: + scale = self.max_size * 1.0 / max(newh, neww) + newh = newh * scale + neww = neww * scale + neww = int(neww + 0.5) + newh = int(newh + 0.5) + + self.h, self.w, self.new_h, self.new_w = h, w, newh, neww + return super().__call__(img, annotations) + + def HFlip_rotated_box(transform, rotated_boxes): """ Apply the horizontal flip transform on rotated boxes. @@ -1609,6 +1959,21 @@ def Resize_rotated_box(transform, rotated_boxes): return rotated_boxes -HFlipTransform.register_type("rotated_box", HFlip_rotated_box) +# RandomFlip is horizontal by default. +RandomFlip.register_type("rotated_box", HFlip_rotated_box) NoOpTransform.register_type("rotated_box", lambda t, x: x) ResizeTransform.register_type("rotated_box", Resize_rotated_box) + + +@TRANSFORMS.register() +class TorchTransformGen(Transform): + """ + Wrapper transfrom of transforms in torchvision. + It convert img (np.ndarray) to PIL image, and convert back to np.ndarray after transform. + """ + def __init__(self, tfm): + self.tfm = tfm + + def __call__(self, img: np.ndarray, annotations: None, **kwargs): + pil_image = Image.fromarray(img) + return np.array(self.tfm(pil_image)), annotations diff --git a/cvpods/data/transforms/auto_aug.py b/cvpods/data/transforms/auto_aug.py index 450544a..d959488 100644 --- a/cvpods/data/transforms/auto_aug.py +++ b/cvpods/data/transforms/auto_aug.py @@ -9,7 +9,8 @@ import PIL from PIL import Image, ImageEnhance, ImageOps -from cvpods.data.transforms import Transform +from ..registry import TRANSFORMS +from .augmentations import Transform _PIL_VER = tuple([int(x) for x in PIL.__version__.split('.')[:2]]) @@ -277,7 +278,8 @@ def _solarize_add_level_to_arg(level, _hparams): } -class AutoAugmentTransform(Transform): +@TRANSFORMS.register() +class AutoAugment(Transform): """ AutoAugment from Google. Implementation adapted from: @@ -310,19 +312,16 @@ def __init__(self, name, prob=0.5, magnitude=10, hparams=None): # NOTE This is my own hack, being tested, not in papers or reference impls. self.magnitude_std = self.hparams.get('magnitude_std', 0) - def apply_image(self, img: np.ndarray) -> np.ndarray: - if random.random() > self.prob: - return img - magnitude = self.magnitude - if self.magnitude_std and self.magnitude_std > 0: - magnitude = random.gauss(magnitude, self.magnitude_std) - magnitude = min(_MAX_LEVEL, max(0, magnitude)) # clip to valid range - level_args = self.level_fn( - magnitude, self.hparams) if self.level_fn is not None else tuple() - return np.array(self.aug_fn(Image.fromarray(img), *level_args, **self.kwargs)) - - def apply_coords(self, coords: np.ndarray) -> np.ndarray: - return coords + def __call__(self, img: np.ndarray, annotations: list = None, **kwargs): + if random.random() < self.prob: + magnitude = self.magnitude + if self.magnitude_std and self.magnitude_std > 0: + magnitude = random.gauss(magnitude, self.magnitude_std) + magnitude = min(_MAX_LEVEL, max(0, magnitude)) # clip to valid range + level_args = self.level_fn( + magnitude, self.hparams) if self.level_fn is not None else tuple() + img = np.array(self.aug_fn(Image.fromarray(img), *level_args, **self.kwargs)) + return img, annotations _RAND_TRANSFORMS = [ diff --git a/cvpods/data/transforms/transform_gen.py b/cvpods/data/transforms/transform_gen.py deleted file mode 100644 index 0865b9b..0000000 --- a/cvpods/data/transforms/transform_gen.py +++ /dev/null @@ -1,1107 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# Copyright (c) BaseDetection, Inc. and its affiliates. All Rights Reserved - -import inspect -import pprint -import sys -from abc import ABCMeta, abstractmethod - -import numpy as np -from PIL import Image - -import torch - -from cvpods.structures import Boxes, BoxMode, pairwise_iou - -from ..registry import TRANSFORMS -from .auto_aug import AutoAugmentTransform - -from .transform import ( # isort:skip - ScaleTransform, - AffineTransform, - BlendTransform, - IoUCropTransform, - CropTransform, - CropPadTransform, - HFlipTransform, - NoOpTransform, - VFlipTransform, - DistortTransform, - RandomSwapChannelsTransform, - ExpandTransform, - ExtentTransform, - ResizeTransform, - # Transforms used in ssl - GaussianBlurTransform, - GaussianBlurConvTransform, - SolarizationTransform, - ComposeTransform, - # LabSpaceTransform, - PadTransform, -) - -__all__ = [ - "Pad", - "RandomScale", - "Expand", - "MinIoURandomCrop", - "RandomSwapChannels", - "CenterAffine", - "RandomBrightness", - "RandomContrast", - "RandomCrop", - "RandomCropWithInstance", - "RandomCropWithMaxAreaLimit", - "RandomCropPad", - "RandomExtent", - "RandomFlip", - "RandomSaturation", - "RandomLighting", - "RandomDistortion", - "Resize", - "ResizeShortestEdge", - "ResizeLongestEdge", - "ShuffleList", - "RandomList", - "RepeatList", - "TransformGen", - "TorchTransformGen", - # transforms used in ssl - "GaussianBlur", - "GaussianBlurConv", - "Solarization", - "AutoAugment", -] - - -def check_dtype(img): - """ - Check the image data type and dimensions to ensure that transforms can be applied on it. - - Args: - img (np.array): image to be checked. - """ - assert isinstance( - img, np.ndarray - ), "[TransformGen] Needs an numpy array, but got a {}!".format(type(img)) - assert not isinstance(img.dtype, np.integer) or ( - img.dtype == np.uint8 - ), "[TransformGen] Got image of type {}, use uint8 or floating points instead!".format( - img.dtype - ) - assert img.ndim in [2, 3], img.ndim - - -@TRANSFORMS.register() -class TransformGen(metaclass=ABCMeta): - """ - TransformGen takes an image of type uint8 in range [0, 255], or - floating point in range [0, 1] or [0, 255] as input. - - It creates a :class:`Transform` based on the given image, sometimes with randomness. - The transform can then be used to transform images - or other data (boxes, points, annotations, etc.) associated with it. - - The assumption made in this class - is that the image itself is sufficient to instantiate a transform. - When this assumption is not true, you need to create the transforms by your own. - - A list of `TransformGen` can be applied with :func:`apply_transform_gens`. - """ - - def _init(self, params=None): - if params: - for k, v in params.items(): - if k != "self" and not k.startswith("_"): - setattr(self, k, v) - - @abstractmethod - def get_transform(self, img, annotations=None): - raise NotImplementedError - - def __call__(self, img, annotations=None, **kwargs): - return self.get_transform(img, annotations)(img, annotations, **kwargs) - - def _rand_range(self, low=1.0, high=None, size=None): - """ - Uniform float random number between low and high. - """ - if high is None: - low, high = 0, low - if size is None: - size = [] - return np.random.uniform(low, high, size) - - def __repr__(self): - """ - Produce something like: - "MyTransformGen(field1={self.field1}, field2={self.field2})" - """ - try: - sig = inspect.signature(self.__init__) - classname = type(self).__name__ - argstr = [] - for name, param in sig.parameters.items(): - assert ( - param.kind != param.VAR_POSITIONAL - and param.kind != param.VAR_KEYWORD - ), "The default __repr__ doesn't support *args or **kwargs" - assert hasattr(self, name), ( - "Attribute {} not found! " - "Default __repr__ only works if attributes match the constructor.".format( - name - ) - ) - attr = getattr(self, name) - default = param.default - if default is attr: - continue - argstr.append("{}={}".format(name, pprint.pformat(attr))) - return "{}({})".format(classname, ", ".join(argstr)) - except AssertionError: - return super().__repr__() - - __str__ = __repr__ - - -@TRANSFORMS.register() -class RandomFlip(TransformGen): - """ - Flip the image horizontally or vertically with the given probability. - """ - - def __init__(self, prob=0.5, *, horizontal=True, vertical=False): - """ - Args: - prob (float): probability of flip. - horizontal (boolean): whether to apply horizontal flipping - vertical (boolean): whether to apply vertical flipping - """ - super().__init__() - - if horizontal and vertical: - raise ValueError( - "Cannot do both horiz and vert. Please use two Flip instead." - ) - if not horizontal and not vertical: - raise ValueError("At least one of horiz or vert has to be True!") - self._init(locals()) - - def get_transform(self, img, annotations=None): - h, w = img.shape[:2] - do = self._rand_range() < self.prob - if do: - if self.horizontal: - return HFlipTransform(w) - elif self.vertical: - return VFlipTransform(h) - else: - return NoOpTransform() - - -@TRANSFORMS.register() -class TorchTransformGen: - """ - Wrapper transfrom of transforms in torchvision. - It convert img (np.ndarray) to PIL image, and convert back to np.ndarray after transform. - """ - def __init__(self, tfm): - self.tfm = tfm - - def __call__(self, img: np.ndarray, annotations: None, **kwargs): - pil_image = Image.fromarray(img) - return np.array(self.tfm(pil_image)), annotations - - -@TRANSFORMS.register() -class RandomDistortion(TransformGen): - """ - Random distort image's hue, saturation and exposure. - """ - - def __init__(self, hue, saturation, exposure, image_format="BGR"): - """ - RandomDistortion Initialization. - Args: - hue (float): value of hue - saturation (float): value of saturation - exposure (float): value of exposure - """ - assert image_format in ["RGB", "BGR"] - super().__init__() - self._init(locals()) - - def get_transform(self, img, annotations=None): - return DistortTransform(self.hue, self.saturation, self.exposure, self.image_format) - - -@TRANSFORMS.register() -class CenterAffine(TransformGen): - """ - Affine Transform for CenterNet - """ - - def __init__(self, boarder, output_size, pad_value=[0, 0, 0], random_aug=True): - """ - output_size (w, h) shape - """ - super().__init__() - self._init(locals()) - - def get_transform(self, img, annotations=None): - img_shape = img.shape[:2] - center, scale = self.generate_center_and_scale(img_shape) - src, dst = self.generate_src_and_dst(center, scale, self.output_size) - return AffineTransform(src, dst, self.output_size, self.pad_value) - - @staticmethod - def _get_boarder(boarder, size): - """ - This func may be rewirite someday - """ - i = 1 - size //= 2 - while size <= boarder // i: - i *= 2 - return boarder // i - - def generate_center_and_scale(self, img_shape): - """ - generate center - shpae : (h, w) - """ - height, width = img_shape - center = np.array([width / 2, height / 2], dtype=np.float32) - scale = float(max(img_shape)) - if self.random_aug: - scale = scale * np.random.choice(np.arange(0.6, 1.4, 0.1)) - h_boarder = self._get_boarder(self.boarder, height) - w_boarder = self._get_boarder(self.boarder, width) - center[0] = np.random.randint(low=w_boarder, high=width - w_boarder) - center[1] = np.random.randint(low=h_boarder, high=height - h_boarder) - else: - pass - - return center, scale - - @staticmethod - def generate_src_and_dst(center, scale, output_size): - if not isinstance(scale, np.ndarray) and not isinstance(scale, list): - scale = np.array([scale, scale], dtype=np.float32) - src = np.zeros((3, 2), dtype=np.float32) - src_w = scale[0] - src_dir = [0, src_w * -0.5] - src[0, :] = center - src[1, :] = src[0, :] + src_dir - src[2, :] = src[1, :] + (src_dir[1], -src_dir[0]) - - dst = np.zeros((3, 2), dtype=np.float32) - dst_w, dst_h = output_size - dst_dir = [0, dst_w * -0.5] - dst[0, :] = [dst_w * 0.5, dst_h * 0.5] - dst[1, :] = dst[0, :] + dst_dir - dst[2, :] = dst[1, :] + (dst_dir[1], -dst_dir[0]) - - return src, dst - - -@TRANSFORMS.register() -class GaussianBlur(TransformGen): - """ - Gaussian blur transform. - """ - def __init__(self, sigma, p=1.0): - """ - Args: - sigma (List(float)): sigma of gaussian - p (float): probability of perform this augmentation - """ - super().__init__() - self._init(locals()) - - def get_transform(self, img, annotations=None): - return GaussianBlurTransform(self.sigma, self.p) - - -@TRANSFORMS.register() -class Solarization(TransformGen): - def __init__(self, threshold=128, p=0.5): - super().__init__() - self._init(locals()) - - def get_transform(self, img, annotations=None): - return SolarizationTransform(self.threshold, self.p) - - -@TRANSFORMS.register() -class GaussianBlurConv(TransformGen): - def __init__(self, kernel_size, p): - super().__init__() - self._init(locals()) - - def get_transform(self, img, annotations=None): - return GaussianBlurConvTransform(self.kernel_size, self.p) - - -@TRANSFORMS.register() -class Resize(TransformGen): - """ - Resize image to a target size - """ - - def __init__(self, shape, interp=Image.BILINEAR): - """ - Args: - shape: (h, w) tuple or a int. - interp: PIL interpolation method. - """ - if isinstance(shape, int): - shape = (shape, shape) - shape = tuple(shape) - self._init(locals()) - - def get_transform(self, img, annotations=None): - return ResizeTransform( - img.shape[0], img.shape[1], self.shape[0], self.shape[1], self.interp - ) - - -@TRANSFORMS.register() -class ResizeLongestEdge(TransformGen): - """ - Scale the longer edge to the given size. - """ - - def __init__(self, long_edge_length, sample_style="range", interp=Image.BILINEAR, - jitter=(0.0, 32)): - """ - Args: - long_edge_length (list[int]): If ``sample_style=="range"``, - a [min, max] interval from which to sample the shortest edge length. - If ``sample_style=="choice"``, a list of shortest edge lengths to sample from. - sample_style (str): either "range" or "choice". - interp: PIL interpolation method. - """ - super().__init__() - assert sample_style in ["range", "choice"], sample_style - - self.is_range = sample_style == "range" - if isinstance(long_edge_length, int): - long_edge_length = (long_edge_length, long_edge_length) - self._init(locals()) - - def get_transform(self, img, annotations=None): - h, w = img.shape[:2] - if self.is_range: - size = np.random.randint( - self.long_edge_length[0], self.long_edge_length[1] + 1 - ) - else: - size = np.random.choice(self.long_edge_length) - if size == 0: - return NoOpTransform() - - if self.jitter[0] > 0: - dw = self.jitter[0] * w - dh = self.jitter[0] * h - size = max(h, w) + np.random.uniform(low=-max(dw, dh), high=max(dw, dh)) - size -= size % self.jitter[1] - - scale = size * 1.0 / max(h, w) - if h < w: - newh, neww = scale * h, size - else: - newh, neww = size, scale * w - - neww = int(neww + 0.5) - newh = int(newh + 0.5) - - return ResizeTransform(h, w, newh, neww, self.interp) - - -@TRANSFORMS.register() -class ResizeShortestEdge(TransformGen): - """ - Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge. - If `max_size` is reached, then downscale so that the longer edge does not exceed max_size. - """ - - def __init__( - self, - short_edge_length, - max_size=sys.maxsize, - sample_style="range", - interp=Image.BILINEAR, - ): - """ - Args: - short_edge_length (list[int]): If ``sample_style=="range"``, - a [min, max] interval from which to sample the shortest edge length. - If ``sample_style=="choice"``, a list of shortest edge lengths to sample from. - max_size (int): maximum allowed longest edge length. - sample_style (str): either "range" or "choice". - interp: PIL interpolation method. - """ - super().__init__() - assert sample_style in ["range", "choice"], sample_style - - self.is_range = sample_style == "range" - if isinstance(short_edge_length, int): - short_edge_length = (short_edge_length, short_edge_length) - self._init(locals()) - - def get_transform(self, img, annotations=None): - h, w = img.shape[:2] - - if self.is_range: - size = np.random.randint( - self.short_edge_length[0], self.short_edge_length[1] + 1 - ) - else: - size = np.random.choice(self.short_edge_length) - if size == 0: - return NoOpTransform() - - scale = size * 1.0 / min(h, w) - if h < w: - newh, neww = size, scale * w - else: - newh, neww = scale * h, size - if max(newh, neww) > self.max_size: - scale = self.max_size * 1.0 / max(newh, neww) - newh = newh * scale - neww = neww * scale - neww = int(neww + 0.5) - newh = int(newh + 0.5) - return ResizeTransform(h, w, newh, neww, self.interp) - - -@TRANSFORMS.register() -class RandomCrop(TransformGen): - """ - Randomly crop a subimage out of an image. - """ - - def __init__(self, crop_type: str, crop_size, strict_mode=True): - """ - Args: - crop_type (str): one of "relative_range", "relative", "absolute". - See `config/defaults.py` for explanation. - crop_size (tuple[float]): the relative ratio or absolute pixels of - height and width - strict_mode (bool): if `True`, the target `crop_size` must be smaller than - the original image size. - """ - super().__init__() - assert crop_type in ["relative_range", "relative", "absolute"] - self._init(locals()) - - def get_transform(self, img, annotations=None): - h, w = img.shape[:2] - croph, cropw = self.get_crop_size((h, w)) - if self.strict_mode: - assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format( - self - ) - offset_range_h = max(h - croph, 0) - offset_range_w = max(w - cropw, 0) - h0 = np.random.randint(offset_range_h + 1) - w0 = np.random.randint(offset_range_w + 1) - return CropTransform(w0, h0, cropw, croph) - - def get_crop_size(self, image_size): - """ - Args: - image_size (tuple): height, width - - Returns: - crop_size (tuple): height, width in absolute pixels - """ - h, w = image_size - if self.crop_type == "relative": - ch, cw = self.crop_size - return int(h * ch + 0.5), int(w * cw + 0.5) - elif self.crop_type == "relative_range": - crop_size = np.asarray(self.crop_size, dtype=np.float32) - ch, cw = crop_size + np.random.rand(2) * (1 - crop_size) - return int(h * ch + 0.5), int(w * cw + 0.5) - elif self.crop_type == "absolute": - return self.crop_size - else: - NotImplementedError("Unknown crop type {}".format(self.crop_type)) - - -@TRANSFORMS.register() -class RandomCropWithInstance(RandomCrop): - """ - Make sure the cropping region contains the center of a random instance from annotations. - """ - - def get_transform(self, img, annotations=None): - h, w = img.shape[:2] - croph, cropw = self.get_crop_size((h, w)) - if self.strict_mode: - assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format( - self - ) - offset_range_h = max(h - croph, 0) - offset_range_w = max(w - cropw, 0) - # Make sure there is always at least one instance in the image - assert annotations is not None, "Can not get annotations infos." - instance = np.random.choice(annotations) - bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS) - bbox = torch.tensor(bbox) - center_xy = (bbox[:2] + bbox[2:]) / 2.0 - - offset_range_h_min = max(center_xy[1] - croph, 0) - offset_range_w_min = max(center_xy[0] - cropw, 0) - offset_range_h_max = min(offset_range_h, center_xy[1] - 1) - offset_range_w_max = min(offset_range_w, center_xy[0] - 1) - - h0 = np.random.randint(offset_range_h_min, offset_range_h_max + 1) - w0 = np.random.randint(offset_range_w_min, offset_range_w_max + 1) - return CropTransform(w0, h0, cropw, croph) - - -@TRANSFORMS.register() -class RandomCropWithMaxAreaLimit(RandomCrop): - """ - Find a cropping window such that no single category occupies more than - `single_category_max_area` in `sem_seg`. - - The function retries random cropping 10 times max. - """ - - def __init__(self, crop_type: str, crop_size, strict_mode=True, - single_category_max_area=1.0, ignore_value=255): - super().__init__(crop_type, crop_size, strict_mode) - self._init(locals()) - - def get_transform(self, img, annotations=None): - if self.single_category_max_area >= 1.0: - crop_tfm = super().get_transform(img, annotations) - else: - h, w = img.shape[:2] - assert "sem_seg" in annotations[0] - sem_seg = annotations[0]["sem_seg"] - croph, cropw = self.get_crop_size((h, w)) - for _ in range(10): - y0 = np.random.randint(h - croph + 1) - x0 = np.random.randint(w - cropw + 1) - sem_seg_temp = sem_seg[y0: y0 + croph, x0: x0 + cropw] - labels, cnt = np.unique(sem_seg_temp, return_counts=True) - cnt = cnt[labels != self.ignore_value] - if len(cnt) > 1 and np.max(cnt) / np.sum(cnt) < self.single_category_max_area: - break - crop_tfm = CropTransform(x0, y0, cropw, croph) - return crop_tfm - - -@TRANSFORMS.register() -class RandomCropPad(RandomCrop): - """ - Randomly crop and pad a subimage out of an image. - """ - def __init__(self, - crop_type: str, - crop_size, - img_value=None, - seg_value=None): - super().__init__(crop_type, crop_size, strict_mode=False) - self._init(locals()) - - def get_transform(self, img, annotations=None): - h, w = img.shape[:2] - croph, cropw = self.get_crop_size((h, w)) - h0 = np.random.randint(h - croph + 1) if h >= croph else 0 - w0 = np.random.randint(w - cropw + 1) if w >= cropw else 0 - dh = min(h, croph) - dw = min(w, cropw) - # print(w0, h0, dw, dh) - return CropPadTransform(w0, h0, dw, dh, cropw, croph, self.img_value, - self.seg_value) - - -@TRANSFORMS.register() -class RandomExtent(TransformGen): - """ - Outputs an image by cropping a random "subrect" of the source image. - - The subrect can be parameterized to include pixels outside the source image, - in which case they will be set to zeros (i.e. black). The size of the output - image will vary with the size of the random subrect. - """ - - def __init__(self, scale_range, shift_range): - """ - Args: - scale_range (l, h): Range of input-to-output size scaling factor. - shift_range (x, y): Range of shifts of the cropped subrect. The rect - is shifted by [w / 2 * Uniform(-x, x), h / 2 * Uniform(-y, y)], - where (w, h) is the (width, height) of the input image. Set each - component to zero to crop at the image's center. - """ - super().__init__() - self._init(locals()) - - def get_transform(self, img, annotations=None): - img_h, img_w = img.shape[:2] - - # Initialize src_rect to fit the input image. - src_rect = np.array([-0.5 * img_w, -0.5 * img_h, 0.5 * img_w, 0.5 * img_h]) - - # Apply a random scaling to the src_rect. - src_rect *= np.random.uniform(self.scale_range[0], self.scale_range[1]) - - # Apply a random shift to the coordinates origin. - src_rect[0::2] += self.shift_range[0] * img_w * (np.random.rand() - 0.5) - src_rect[1::2] += self.shift_range[1] * img_h * (np.random.rand() - 0.5) - - # Map src_rect coordinates into image coordinates (center at corner). - src_rect[0::2] += 0.5 * img_w - src_rect[1::2] += 0.5 * img_h - - return ExtentTransform( - src_rect=(src_rect[0], src_rect[1], src_rect[2], src_rect[3]), - output_size=( - int(src_rect[3] - src_rect[1]), - int(src_rect[2] - src_rect[0]), - ), - ) - - -@TRANSFORMS.register() -class RandomContrast(TransformGen): - """ - Randomly transforms image contrast. - - Contrast intensity is uniformly sampled in (intensity_min, intensity_max). - - intensity < 1 will reduce contrast - - intensity = 1 will preserve the input image - - intensity > 1 will increase contrast - - See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html - """ - - def __init__(self, intensity_min, intensity_max, prob=1.0): - """ - Args: - intensity_min (float): Minimum augmentation. - intensity_max (float): Maximum augmentation. - prob (float): probability of transforms image contrast. - """ - super().__init__() - self._init(locals()) - - def get_transform(self, img, annotations=None): - do = self._rand_range() < self.prob - if do: - w = np.random.uniform(self.intensity_min, self.intensity_max) - return BlendTransform(src_image=img.mean(), src_weight=1 - w, dst_weight=w) - else: - return NoOpTransform() - - -@TRANSFORMS.register() -class RandomBrightness(TransformGen): - """ - Randomly transforms image brightness. - - Brightness intensity is uniformly sampled in (intensity_min, intensity_max). - - intensity < 1 will reduce brightness - - intensity = 1 will preserve the input image - - intensity > 1 will increase brightness - - See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html - """ - - def __init__(self, intensity_min, intensity_max, prob=1.): - """ - Args: - intensity_min (float): Minimum augmentation. - intensity_max (float): Maximum augmentation. - prob (float): probability of transforms image brightness. - """ - super().__init__() - self._init(locals()) - - def get_transform(self, img, annotations=None): - do = self._rand_range() < self.prob - if do: - w = np.random.uniform(self.intensity_min, self.intensity_max) - return BlendTransform(src_image=0, src_weight=1 - w, dst_weight=w) - else: - return NoOpTransform() - - -@TRANSFORMS.register() -class RandomSaturation(TransformGen): - """ - Randomly transforms image saturation. - - Saturation intensity is uniformly sampled in (intensity_min, intensity_max). - - intensity < 1 will reduce saturation (make the image more grayscale) - - intensity = 1 will preserve the input image - - intensity > 1 will increase saturation - - See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html - """ - - def __init__(self, intensity_min, intensity_max, prob=1.0): - """ - Args: - intensity_min (float): Minimum augmentation (1 preserves input). - intensity_max (float): Maximum augmentation (1 preserves input). - prob (float): probability of transforms image saturation. - """ - super().__init__() - self._init(locals()) - - def get_transform(self, img, annotations=None): - do = self._rand_range() < self.prob - if do: - assert img.shape[-1] == 3, "Saturation only works on RGB images" - w = np.random.uniform(self.intensity_min, self.intensity_max) - grayscale = img.dot([0.299, 0.587, 0.114])[:, :, np.newaxis] - return BlendTransform(src_image=grayscale, src_weight=1 - w, dst_weight=w) - else: - return NoOpTransform() - - -@TRANSFORMS.register() -class RandomLighting(TransformGen): - """ - Randomly transforms image color using fixed PCA over ImageNet. - - The degree of color jittering is randomly sampled via a normal distribution, - with standard deviation given by the scale parameter. - """ - - def __init__(self, scale): - """ - Args: - scale (float): Standard deviation of principal component weighting. - """ - super().__init__() - self._init(locals()) - self.eigen_vecs = np.array( - [ - [-0.5675, 0.7192, 0.4009], - [-0.5808, -0.0045, -0.8140], - [-0.5836, -0.6948, 0.4203], - ] - ) - self.eigen_vals = np.array([0.2175, 0.0188, 0.0045]) - - def get_transform(self, img, annotations=None): - assert img.shape[-1] == 3, "Saturation only works on RGB images" - weights = np.random.normal(scale=self.scale, size=3) - return BlendTransform( - src_image=self.eigen_vecs.dot(weights * self.eigen_vals), - src_weight=1.0, - dst_weight=1.0, - ) - - -@TRANSFORMS.register() -class RandomSwapChannels(TransformGen): - """ - Randomly swap image channels. - """ - - def __init__(self, prob=0.5): - """ - Args: - prob (float): probability of swap channels. - """ - super().__init__() - self._init(locals()) - - def get_transform(self, img, annotations=None): - _, w = img.shape[:2] - do = self._rand_range() < self.prob - if do: - return RandomSwapChannelsTransform() - else: - return NoOpTransform() - - -@TRANSFORMS.register() -class MinIoURandomCrop(TransformGen): - """ - Random crop the image & bboxes, the cropped patches have minimum IoU - requirement with original image & bboxes, the IoU threshold is randomly - selected from min_ious. - """ - - def __init__(self, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3): - """ - Args: - min_ious (tuple): minimum IoU threshold for all intersections with bounding boxes - min_crop_size (float): minimum crop's size - (i.e. h,w := a*h, a*w, where a >= min_crop_size). - """ - super().__init__() - self._init(locals()) - - def get_transform(self, img, annotations): - """ - Args: - img (ndarray): of shape HxWxC(RGB). The array can be of type uint8 - in range [0, 255], or floating point in range [0, 255]. - annotations (list[dict[str->str]]): - Each item in the list is a bbox label of an object. The object is - represented by a dict, - which contains: - - bbox (list): bbox coordinates, top left and bottom right. - - bbox_mode (str): bbox label mode, for example: `XYXY_ABS`, - `XYWH_ABS` and so on... - """ - sample_mode = (1, *self.min_ious, 0) - h, w = img.shape[:2] - - boxes = list() - for obj in annotations: - boxes.append(BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS)) - boxes = torch.tensor(boxes) - - while True: - mode = np.random.choice(sample_mode) - if mode == 1: - return NoOpTransform() - - min_iou = mode - for i in range(50): - new_w = np.random.uniform(self.min_crop_size * w, w) - new_h = np.random.uniform(self.min_crop_size * h, h) - - # h / w in [0.5, 2] - if new_h / new_w < 0.5 or new_h / new_w > 2: - continue - - left = np.random.uniform(w - new_w) - top = np.random.uniform(h - new_h) - - patch = np.array( - (int(left), int(top), int(left + new_w), int(top + new_h))) - - overlaps = pairwise_iou( - Boxes(patch.reshape(-1, 4)), - Boxes(boxes.reshape(-1, 4)) - ) - - if overlaps.min() < min_iou: - continue - - # center of boxes should inside the crop img - center = (boxes[:, :2] + boxes[:, 2:]) / 2 - mask = ((center[:, 0] > patch[0]) * (center[:, 1] > patch[1]) - * (center[:, 0] < patch[2]) * (center[:, 1] < patch[3])) - if not mask.any(): - continue - return IoUCropTransform(int(left), int(top), int(new_w), int(new_h)) - - -@TRANSFORMS.register() -class Expand(TransformGen): - """ - Random Expand the image & bboxes. - """ - - def __init__(self, ratio_range=(1, 4), mean=(0, 0, 0), prob=0.5): - """ - Args: - ratio_range (tuple): range of expand ratio. - mean (tuple): mean value of dataset. - prob (float): probability of applying this transformation. - """ - super().__init__() - self._init(locals()) - self.min_ratio, self.max_ratio = ratio_range - - def get_transform(self, img, annotations=None): - if np.random.uniform(0, 1) > self.prob: - return NoOpTransform() - h, w, c = img.shape - ratio = np.random.uniform(self.min_ratio, self.max_ratio) - left = int(np.random.uniform(0, w * ratio - w)) - top = int(np.random.uniform(0, h * ratio - h)) - return ExpandTransform(left, top, ratio, self.mean) - - -@TRANSFORMS.register() -class RandomScale(TransformGen): - """ - Randomly scale the image according to the specified output size and scale ratio range. - - This transform has the following three steps: - - 1. select a random scale factor according to the specified scale ratio range. - 2. recompute the accurate scale_factor using rounded scaled image size. - 3. select non-zero random offset (x, y) if scaled image is larger than output_size. - """ - - def __init__(self, output_size, ratio_range=(0.1, 2), interp="BILINEAR"): - """ - Args: - output_size (tuple): image output size. - ratio_range (tuple): range of scale ratio. - interp (str): the interpolation method. Options includes: - * "NEAREST" - * "BILINEAR" - * "BICUBIC" - * "LANCZOS" - * "HAMMING" - * "BOX" - """ - super().__init__() - self._init(locals()) - self.min_ratio, self.max_ratio = ratio_range - if isinstance(self.output_size, int): - self.output_size = [self.output_size] * 2 - - def get_transform(self, img, annotations=None): - h, w = img.shape[:2] - output_h, output_w = self.output_size - - # 1. Select a random scale factor. - random_scale_factor = np.random.uniform(self.min_ratio, self.max_ratio) - - scaled_size_h = int(random_scale_factor * output_h) - scaled_size_w = int(random_scale_factor * output_w) - - # 2. Recompute the accurate scale_factor using rounded scaled image size. - image_scale_h = scaled_size_h * 1.0 / h - image_scale_w = scaled_size_w * 1.0 / w - image_scale = min(image_scale_h, image_scale_w) - - # 3. Select non-zero random offset (x, y) if scaled image is larger than output_size. - scaled_h = int(h * 1.0 * image_scale) - scaled_w = int(w * 1.0 * image_scale) - - return ScaleTransform(h, w, scaled_h, scaled_w, self.interp) - - -@TRANSFORMS.register() -class AutoAugment(TransformGen): - """ - Convert any of AutoAugment into a cvpods-fashion Transform such that can be configured in - config.py - """ - def __init__(self, name, prob=0.5, magnitude=10, hparams=None): - """ - Args: - name (str): any type of transforms list in _RAND_TRANSFORMS. - prob (float): probability of perform current augmentation. - magnitude (int): intensity / magnitude of each augmentation. - hparams (dict): hyper-parameters required by each augmentation. - """ - - super().__init__() - self._init(locals()) - - def get_transform(self, img, annotations=None): - return AutoAugmentTransform(self.name, self.prob, self.magnitude, self.hparams) - - -@TRANSFORMS.register() -class Pad(TransformGen): - """ - Pad image with `pad_value` to the specified `target_h` and `target_w`. - - Adds `top` rows of `pad_value` on top, `left` columns of `pad_value` on the left, - and then pads the image on the bottom and right with `pad_value` until it has - dimensions `target_h`, `target_w`. - - This op does nothing if `top` and `left` is zero and the image already has size - `target_h` by `target_w`. - """ - - def __init__(self, top, left, target_h, target_w, pad_value=0): - """ - Args: - top (int): number of rows of `pad_value` to add on top. - left (int): number of columns of `pad_value` to add on the left. - target_h (int): height of output image. - target_w (int): width of output image. - pad_value (int): the value used to pad the image. - """ - super().__init__() - self._init(locals()) - - def get_transform(self, img, annotations=None): - return PadTransform(self.top, self.left, self.target_h, self.target_w, self.pad_value) - - -@TRANSFORMS.register() -class RandomList(TransformGen): - """ - Random select subset of provided augmentations. - """ - def __init__(self, transforms, num_layers=2, choice_weights=None): - """ - Args: - transforms (List[TorchTransformGen]): list of transforms need to be performed. - num_layers (int): parameters of np.random.choice. - choice_weights (optional, float): parameters of np.random.choice. - """ - self.transforms = transforms - self.num_layers = num_layers - self.choice_weights = choice_weights - - def get_transform(self, img, annotations=None): - tfms = np.random.choice( - self.transforms, - self.num_layers, - replace=self.choice_weights is None, - p=self.choice_weights) - return ComposeTransform(tfms) - - -@TRANSFORMS.register() -class ShuffleList(TransformGen): - """ - Randomly shuffle the `transforms` order. - """ - - def __init__(self, transforms): - """ - Args: - transforms (list[TransformGen]): List of transform to be shuffled. - """ - super().__init__() - self.transforms = transforms - - def get_transform(self, img, annotations=None): - np.random.shuffle(self.transforms) - return ComposeTransform(self.transforms) - - -@TRANSFORMS.register() -class RepeatList(TransformGen): - """ - Forward several times of provided transforms for a given image. - """ - def __init__(self, transforms, repeat_times): - """ - Args: - transforms (list[TransformGen]): List of transform to be repeated. - repeat_times (int): number of duplicates desired. - """ - super().__init__() - self.transforms = transforms - self.times = repeat_times - - def get_transform(self, img, annotations=None): - return ComposeTransform(self.transforms) - - def __call__(self, img, annotations=None, **kwargs): - repeat_imgs = [] - repeat_annotations = [] - for t in range(self.times): - tmp_img, tmp_anno = self.get_transform(img)(img, annotations, **kwargs) - repeat_imgs.append(tmp_img) - repeat_annotations.append(tmp_anno) - repeat_imgs = np.stack(repeat_imgs, axis=0) - return repeat_imgs, repeat_annotations diff --git a/cvpods/modeling/meta_arch/centernet.py b/cvpods/modeling/meta_arch/centernet.py index ee4699f..207ee9c 100644 --- a/cvpods/modeling/meta_arch/centernet.py +++ b/cvpods/modeling/meta_arch/centernet.py @@ -11,7 +11,7 @@ import torch.nn as nn import torch.nn.functional as F -from cvpods.data.transforms.transform_gen import CenterAffine +from cvpods.data.transforms.augmentations import CenterAffine from cvpods.layers import DeformConvWithOff, ModulatedDeformConvWithOff, ShapeSpec from cvpods.modeling.losses import reg_l1_loss from cvpods.modeling.nn_utils.feature_utils import gather_feature diff --git a/playground/detection/coco/retinanet/retinanet.res50.fpn.coco.multiscale.1x/README.md b/playground/detection/coco/retinanet/retinanet.res50.fpn.coco.multiscale.1x/README.md index 6917d50..9eef717 100644 --- a/playground/detection/coco/retinanet/retinanet.res50.fpn.coco.multiscale.1x/README.md +++ b/playground/detection/coco/retinanet/retinanet.res50.fpn.coco.multiscale.1x/README.md @@ -1,50 +1,55 @@ # retinanet.res50.fpn.coco.multiscale.1x + +seed: 54373550 + ## Evaluation results for bbox: + ``` - Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.365 - Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.562 - Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.393 - Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.219 - Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.405 - Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.477 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.314 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.503 - Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.534 - Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.348 - Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.579 - Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.683 + Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.370 + Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.561 + Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.395 + Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.227 + Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.407 + Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.485 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.318 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.508 + Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.541 + Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.355 + Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.583 + Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.692 ``` | AP | AP50 | AP75 | APs | APm | APl | |:------:|:------:|:------:|:------:|:------:|:------:| -| 36.454 | 56.242 | 39.328 | 21.898 | 40.478 | 47.747 | +| 36.972 | 56.094 | 39.523 | 22.692 | 40.713 | 48.540 | + ### Per-category bbox AP: | category | AP | category | AP | category | AP | |:--------------|:-------|:-------------|:-------|:---------------|:-------| -| person | 49.847 | bicycle | 27.800 | car | 39.543 | -| motorcycle | 39.535 | airplane | 60.788 | bus | 62.082 | -| train | 57.897 | truck | 32.374 | boat | 23.642 | -| traffic light | 23.785 | fire hydrant | 61.860 | stop sign | 62.912 | -| parking meter | 42.134 | bench | 20.323 | bird | 32.035 | -| cat | 62.643 | dog | 59.460 | horse | 51.436 | -| sheep | 45.442 | cow | 49.267 | elephant | 55.752 | -| bear | 69.257 | zebra | 62.308 | giraffe | 62.423 | -| backpack | 12.308 | umbrella | 32.660 | handbag | 11.692 | -| tie | 26.095 | suitcase | 29.544 | frisbee | 62.662 | -| skis | 18.217 | snowboard | 22.185 | sports ball | 43.453 | -| kite | 37.063 | baseball bat | 21.984 | baseball glove | 31.171 | -| skateboard | 48.046 | surfboard | 30.663 | tennis racket | 44.667 | -| bottle | 32.999 | wine glass | 30.731 | cup | 37.611 | -| fork | 22.843 | knife | 9.941 | spoon | 10.597 | -| bowl | 37.554 | banana | 21.658 | apple | 16.915 | -| sandwich | 28.278 | orange | 27.994 | broccoli | 20.984 | -| carrot | 19.075 | hot dog | 27.483 | pizza | 46.153 | -| donut | 39.205 | cake | 30.042 | chair | 23.022 | -| couch | 36.458 | potted plant | 23.638 | bed | 39.539 | -| dining table | 24.671 | toilet | 54.413 | tv | 53.012 | -| laptop | 52.960 | mouse | 59.783 | remote | 24.399 | -| keyboard | 42.980 | cell phone | 32.588 | microwave | 53.939 | -| oven | 31.974 | toaster | 16.414 | sink | 31.876 | -| refrigerator | 46.588 | book | 11.818 | clock | 48.778 | -| vase | 34.466 | scissors | 25.628 | teddy bear | 45.000 | -| hair drier | 0.428 | toothbrush | 14.942 | | | +| person | 50.773 | bicycle | 27.125 | car | 39.880 | +| motorcycle | 40.405 | airplane | 63.344 | bus | 63.718 | +| train | 59.472 | truck | 33.432 | boat | 23.513 | +| traffic light | 25.051 | fire hydrant | 63.709 | stop sign | 62.338 | +| parking meter | 43.618 | bench | 20.839 | bird | 32.856 | +| cat | 64.382 | dog | 60.487 | horse | 51.448 | +| sheep | 46.863 | cow | 49.293 | elephant | 56.725 | +| bear | 67.596 | zebra | 64.431 | giraffe | 62.447 | +| backpack | 12.816 | umbrella | 33.575 | handbag | 11.732 | +| tie | 26.924 | suitcase | 32.361 | frisbee | 61.239 | +| skis | 16.860 | snowboard | 18.176 | sports ball | 43.461 | +| kite | 36.440 | baseball bat | 23.016 | baseball glove | 30.666 | +| skateboard | 46.296 | surfboard | 30.590 | tennis racket | 43.959 | +| bottle | 34.105 | wine glass | 32.470 | cup | 38.667 | +| fork | 21.774 | knife | 10.702 | spoon | 8.498 | +| bowl | 38.039 | banana | 21.952 | apple | 18.406 | +| sandwich | 29.000 | orange | 27.158 | broccoli | 21.329 | +| carrot | 19.073 | hot dog | 25.912 | pizza | 47.289 | +| donut | 39.409 | cake | 30.148 | chair | 23.320 | +| couch | 38.598 | potted plant | 23.030 | bed | 41.347 | +| dining table | 24.819 | toilet | 55.766 | tv | 53.597 | +| laptop | 54.526 | mouse | 59.451 | remote | 24.147 | +| keyboard | 43.568 | cell phone | 32.373 | microwave | 55.890 | +| oven | 31.385 | toaster | 26.014 | sink | 32.295 | +| refrigerator | 49.247 | book | 12.279 | clock | 48.616 | +| vase | 35.170 | scissors | 24.159 | teddy bear | 42.737 | +| hair drier | 5.050 | toothbrush | 14.578 | | | diff --git a/setup.py b/setup.py index 6280b8f..18d3af3 100644 --- a/setup.py +++ b/setup.py @@ -11,7 +11,7 @@ from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension torch_ver = [int(x) for x in torch.__version__.split(".")[:2]] -assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3" +assert torch_ver >= [1, 6], "Requires PyTorch >= 1.6" def get_version(): diff --git a/tests/data/test_rotation_transform.py b/tests/data/test_rotation_transform.py deleted file mode 100644 index 3685298..0000000 --- a/tests/data/test_rotation_transform.py +++ /dev/null @@ -1,71 +0,0 @@ -# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved -import numpy as np -import unittest - -from cvpods.data.transforms.transform import RotationTransform - - -class TestRotationTransform(unittest.TestCase): - def assertEqualsArrays(self, a1, a2): - self.assertTrue(np.allclose(a1, a2)) - - def randomData(self, h=5, w=5): - image = np.random.rand(h, w) - coords = np.array([[i, j] for j in range(h + 1) for i in range(w + 1)], dtype=float) - return image, coords, h, w - - def test180(self): - image, coords, h, w = self.randomData(6, 6) - rot = RotationTransform(h, w, 180, expand=False, center=None) - self.assertEqualsArrays(rot.apply_image(image), image[::-1, ::-1]) - rotated_coords = [[w - c[0], h - c[1]] for c in coords] - self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) - - def test45_coords(self): - _, coords, h, w = self.randomData(4, 6) - rot = RotationTransform(h, w, 45, expand=False, center=None) - rotated_coords = [ - [(x + y - (h + w) / 2) / np.sqrt(2) + w / 2, h / 2 + (y + (w - h) / 2 - x) / np.sqrt(2)] - for (x, y) in coords - ] - self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) - - def test90(self): - image, coords, h, w = self.randomData() - rot = RotationTransform(h, w, 90, expand=False, center=None) - self.assertEqualsArrays(rot.apply_image(image), image.T[::-1]) - rotated_coords = [[c[1], w - c[0]] for c in coords] - self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) - - def test90_expand(self): # non-square image - image, coords, h, w = self.randomData(h=5, w=8) - rot = RotationTransform(h, w, 90, expand=True, center=None) - self.assertEqualsArrays(rot.apply_image(image), image.T[::-1]) - rotated_coords = [[c[1], w - c[0]] for c in coords] - self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords) - - def test_center_expand(self): - # center has no effect if expand=True because it only affects shifting - image, coords, h, w = self.randomData(h=5, w=8) - angle = np.random.randint(360) - rot1 = RotationTransform(h, w, angle, expand=True, center=None) - rot2 = RotationTransform(h, w, angle, expand=True, center=(0, 0)) - rot3 = RotationTransform(h, w, angle, expand=True, center=(h, w)) - rot4 = RotationTransform(h, w, angle, expand=True, center=(2, 5)) - for r1 in [rot1, rot2, rot3, rot4]: - for r2 in [rot1, rot2, rot3, rot4]: - self.assertEqualsArrays(r1.apply_image(image), r2.apply_image(image)) - self.assertEqualsArrays(r1.apply_coords(coords), r2.apply_coords(coords)) - - def test_inverse_transform(self): - image, coords, h, w = self.randomData(h=5, w=8) - rot = RotationTransform(h, w, 90, expand=True, center=None) - rot_image = rot.apply_image(image) - self.assertEqualsArrays(rot.inverse().apply_image(rot_image), image) - rot = RotationTransform(h, w, 65, expand=True, center=None) - rotated_coords = rot.apply_coords(coords) - self.assertEqualsArrays(rot.inverse().apply_coords(rotated_coords), coords) - - -if __name__ == "__main__": - unittest.main()