diff --git a/cvpods/data/transforms/__init__.py b/cvpods/data/transforms/__init__.py
index a9f0ac8..2facdf6 100644
--- a/cvpods/data/transforms/__init__.py
+++ b/cvpods/data/transforms/__init__.py
@@ -1,5 +1,5 @@
 # Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-from .transform import *
-from .transform_gen import *
+from .augmentations import *
+from .auto_aug import AutoAugment
 
 __all__ = [k for k in globals().keys() if not k.startswith("_")]
diff --git a/cvpods/data/transforms/transform.py b/cvpods/data/transforms/augmentations.py
similarity index 55%
rename from cvpods/data/transforms/transform.py
rename to cvpods/data/transforms/augmentations.py
index 887367e..2935ac3 100644
--- a/cvpods/data/transforms/transform.py
+++ b/cvpods/data/transforms/augmentations.py
@@ -1,11 +1,16 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
-# Copyright (c) BaseDetection, Inc. and its affiliates. All Rights Reserved
+# Copyright (c) Facebook, Inc. and its affiliates.
+# Modified by BaseDetection, Inc. and its affiliates.
+
+# pylint: disable=W0613
 
 import inspect
+import pprint
 import random
+import sys
 from abc import ABCMeta, abstractmethod
-from typing import Callable, TypeVar
+from typing import Callable
 
 import cv2
 import numpy as np
@@ -13,43 +18,42 @@
 import pycocotools.mask as mask_util
 
 import torch
-import torchvision.transforms as transforms
 
 import cvpods
-from cvpods.structures import BoxMode
+from cvpods.structures import Boxes, BoxMode, pairwise_iou
 
-from .transform_util import to_float_tensor, to_numpy
+from ..registry import TRANSFORMS
 
 __all__ = [
-    "ExpandTransform",
-    "AffineTransform",
-    "BlendTransform",
-    "IoUCropTransform",
-    "CropTransform",
-    "CropPadTransform",
-    "GridSampleTransform",
-    "RotationTransform",
-    "HFlipTransform",
-    "VFlipTransform",
-    "NoOpTransform",
-    "ScaleTransform",
-    "DistortTransform",
-    "Transform",
-    "TransformList",
-    "ExtentTransform",
-    "ResizeTransform",
-    # Transform used in ssl
-    "GaussianBlurTransform",
-    "GaussianBlurConvTransform",
-    "SolarizationTransform",
-    "ComposeTransform",
-    "LabSpaceTransform",
-    "PadTransform",
+    "Pad",
+    "RandomScale",
+    "Expand",
+    "MinIoURandomCrop",
+    "RandomSwapChannels",
+    "CenterAffine",
+    "RandomBrightness",
+    "RandomContrast",
+    "RandomCrop",
+    "RandomCropWithInstance",
+    "RandomCropWithMaxAreaLimit",
+    "RandomCropPad",
+    "RandomExtent",
+    "RandomFlip",
+    "RandomSaturation",
+    "RandomDistortion",
+    "Resize",
+    "ResizeShortestEdge",
+    "ResizeLongestEdge",
+    "ShuffleList",
+    "RandomList",
+    "RepeatList",
+    "TorchTransformGen",
+    # transforms used in ssl
+    "RandomGaussianBlur",
+    "RandomSolarization",
+    "RandomLightning",
 ]
 
-# NOTE: to document methods in subclasses, it's sufficient to only document those whose
-# implemenation needs special attention.
-
 
 class Transform(metaclass=ABCMeta):
     """
@@ -58,11 +62,12 @@ class Transform(metaclass=ABCMeta):
     all methods of this class are deterministic w.r.t their input arguments. In
     training, there should be a higher-level policy that generates (likely with
     random variations) these transform ops. Each transform op may handle several
-    data types, e.g.: image, coordinates, segmentation, bounding boxes. Some of
-    them have a default implementation, but can be overwritten if the default
-    isn't appropriate. The implementation of each method may choose to modify
-    its input data in-place for efficient transformation.
+    data types, e.g.: image, point cloud, coordinates, segmentation, bounding boxes.
+    Some of them have a default implementation, but can be overwritten if the
+    default isn't appropriate. The implementation of each method may choose to
+    modify its input data in-place for efficient transformation.
     """
+
     def _set_attributes(self, params: list = None):
         """
         Set attributes from the input list of parameters.
@@ -71,11 +76,186 @@ def _set_attributes(self, params: list = None):
             params (list): list of parameters.
         """
 
-        if params:
+        if params is not None:
             for k, v in params.items():
                 if k != "self" and not k.startswith("_"):
                     setattr(self, k, v)
 
+    def __call__(self, data: list, annotations: list = None, **kwargs):
+        """
+        Apply transform to the data and corresponding annotations (if exist).
+        """
+        raise NotImplementedError
+
+    @classmethod
+    def register_type(cls, data_type: str, func: Callable):
+        """
+        Register the given function as a handler that this transform will use
+        for a specific data type.
+
+        Args:
+            data_type (str): the name of the data type (e.g., box)
+            func (callable): takes a transform and a data, returns the
+                transformed data.
+
+        Examples:
+
+        .. code-block:: python
+
+            def func(flip_transform, voxel_data):
+                return transformed_voxel_data
+            HFlipTransform.register_type("voxel", func)
+
+            # ...
+            transform = HFlipTransform(...)
+            transform.apply_voxel(voxel_data)  # func will be called
+        """
+        assert callable(
+            func
+        ), "You can only register a callable to a Transform. Got {} instead.".format(
+            func)
+        argspec = inspect.getfullargspec(func)
+        assert len(argspec.args) == 2, (
+            "You can only register a function that takes two positional "
+            "arguments to a Transform! Got a function with spec {}".format(
+                str(argspec)))
+        setattr(cls, "apply_" + data_type, func)
+
+    def _rand_range(self, low=1.0, high=None, size=None):
+        """
+        Uniform float random number between low and high.
+        """
+        if high is None:
+            low, high = 0, low
+        if size is None:
+            size = []
+        return np.random.uniform(low, high, size)
+
+    def __repr__(self):
+        """
+        Produce something like:
+        "MyTransformGen(field1={self.field1}, field2={self.field2})"
+        """
+        try:
+            sig = inspect.signature(self.__init__)
+            classname = type(self).__name__
+            argstr = []
+            for name, param in sig.parameters.items():
+                assert (
+                    param.kind != param.VAR_POSITIONAL
+                    and param.kind != param.VAR_KEYWORD
+                ), "The default __repr__ doesn't support *args or **kwargs"
+                assert hasattr(self, name), (
+                    "Attribute {} not found! "
+                    "Default __repr__ only works if attributes match the constructor.".format(
+                        name
+                    )
+                )
+                attr = getattr(self, name)
+                default = param.default
+                if default is attr:
+                    continue
+                argstr.append("{}={}".format(name, pprint.pformat(attr)))
+            return "{}({})".format(classname, ", ".join(argstr))
+        except AssertionError:
+            return super().__repr__()
+
+
+class ComposeTransform(Transform):
+    """
+    Composes several transforms together.
+    """
+
+    def __init__(self, transforms: list):
+        """
+        Args:
+            transforms (list[Transform]): list of transforms to compose.
+        """
+        super().__init__()
+        self._set_attributes(locals())
+
+    def __eq__(self, other):
+        if not isinstance(other, ComposeTransform):
+            return False
+        return self.transforms == other.transforms
+
+    def __call__(self, img, annotations=None, **kwargs):
+        for tfm in self.transforms:
+            img, annotations = tfm(img, annotations, **kwargs)
+        return img, annotations
+
+    def __repr__(self):
+        return "".join([tfm for tfm in self.transforms])
+
+
+@TRANSFORMS.register()
+class RandomList(ComposeTransform):
+    """
+    Random select subset of provided augmentations.
+    """
+    def __init__(self, transforms, num_layers=2, choice_weights=None):
+        """
+        Args:
+            transforms (List[TorchTransformGen]): list of transforms need to be performed.
+            num_layers (int): parameters of np.random.choice.
+            choice_weights (optional, float): parameters of np.random.choice.
+        """
+        self.all_transforms = transforms
+        self.num_layers = num_layers
+        self.choice_weights = choice_weights
+
+    def __call__(self, img, annotations=None, **kwargs):
+        self.transforms = np.random.choice(
+            self.all_transforms,
+            self.num_layers,
+            replace=self.choice_weights is None,
+            p=self.choice_weights)
+
+        return super().__call__(img, annotations)
+
+
+@TRANSFORMS.register()
+class ShuffleList(ComposeTransform):
+    """
+    Randomly shuffle the `transforms` order.
+    """
+
+    def __call__(self, img, annotations=None, **kwargs):
+        np.random.shuffle(self.transforms)
+        return super().__call__(img, annotations)
+
+
+@TRANSFORMS.register()
+class RepeatList(ComposeTransform):
+    """
+    Forward several times of provided transforms for a given image.
+    """
+    def __init__(self, transforms, repeat_times=2):
+        """
+        Args:
+            transforms (list[TransformGen]): List of transform to be repeated.
+            repeat_times (int): number of duplicates desired.
+        """
+        super().__init__(transforms)
+        self.times = repeat_times
+
+    def __call__(self, img, annotations=None, **kwargs):
+        repeat_imgs = []
+        repeat_annotations = []
+        for t in range(self.times):
+            tmp_img, tmp_anno = super().__call__(img, annotations, **kwargs)
+            repeat_imgs.append(tmp_img)
+            repeat_annotations.append(tmp_anno)
+        repeat_imgs = np.stack(repeat_imgs, axis=0)
+
+        return repeat_imgs, repeat_annotations
+
+
+class DefaultTransorm(Transform):
+    """
+    Default transform for 2D detection, segmentation, keypoints, etc.
+    """
+
     @abstractmethod
     def apply_image(self, img: np.ndarray):
         """
@@ -222,7 +402,8 @@ def __call__(self, image, annotations=None, **kwargs):
                     keypoints[:, :2] = self.apply_coords(keypoints[:, :2])
 
                     # This assumes that HorizFlipTransform is the only one that does flip
-                    do_hflip = isinstance(self, cvpods.data.transforms.transform.HFlipTransform)
+                    do_hflip = isinstance(self, cvpods.data.transforms.augmentations.RandomFlip) \
+                        and self.horizontal
 
                     # Alternative way: check if probe points was horizontally flipped.
                     # probe = np.asarray([[0.0, 0.0], [image_width, 0.0]])
@@ -257,168 +438,19 @@ def __call__(self, image, annotations=None, **kwargs):
                             "Supported type is ndarray.".format(type(sem_seg)))
         return image, annotations
 
-    @classmethod
-    def register_type(cls, data_type: str, func: Callable):
-        """
-        Register the given function as a handler that this transform will use
-        for a specific data type.
-
-        Args:
-            data_type (str): the name of the data type (e.g., box)
-            func (callable): takes a transform and a data, returns the
-                transformed data.
-
-        Examples:
-
-        .. code-block:: python
-
-            def func(flip_transform, voxel_data):
-                return transformed_voxel_data
-            HFlipTransform.register_type("voxel", func)
-
-            # ...
-            transform = HFlipTransform(...)
-            transform.apply_voxel(voxel_data)  # func will be called
-        """
-        assert callable(
-            func
-        ), "You can only register a callable to a Transform. Got {} instead.".format(
-            func)
-        argspec = inspect.getfullargspec(func)
-        assert len(argspec.args) == 2, (
-            "You can only register a function that takes two positional "
-            "arguments to a Transform! Got a function with spec {}".format(
-                str(argspec)))
-        setattr(cls, "apply_" + data_type, func)
-
 
-_T = TypeVar("_T")
-
-
-class ComposeTransform(object):
-    """
-    Composes several transforms together.
-    """
-
-    def __init__(self, tfms):
-        """
-        Args:
-            transforms (list[Transform]): list of transforms to compose.
-        """
-        super().__init__()
-        self.transforms = tfms
-
-    def __eq__(self, other):
-        if not isinstance(other, ComposeTransform):
-            return False
-        return self.transforms == other.transforms
-
-    def __call__(self, img, annotations=None, **kwargs):
-        for tfm in self.transforms:
-            img, annotations = tfm(img, annotations, **kwargs)
-        return img, annotations
-
-    def __repr__(self):
-        return "".join([tfm for tfm in self.transforms])
-
-
-# TODO: Deprecated
-# pyre-ignore-all-errors
-class TransformList:
-    """
-    Maintain a list of transform operations which will be applied in sequence.
-    Attributes:
-        transforms (list[Transform])
-    """
-    def __init__(self, transforms: list):
-        """
-        Args:
-            transforms (list[Transform]): list of transforms to perform.
-        """
-        super().__init__()
-        for t in transforms:
-            assert isinstance(t, Transform), t
-        self.transforms = transforms
-
-    def _apply(self, x: _T, meth: str) -> _T:
-        """
-        Apply the transforms on the input.
-        Args:
-            x: input to apply the transform operations.
-            meth (str): meth.
-        Returns:
-            x: after apply the transformation.
-        """
-        for t in self.transforms:
-            x = getattr(t, meth)(x)
-        return x
-
-    def __getattr__(self, name: str):
-        """
-        Args:
-            name (str): name of the attribute.
-        """
-        if name.startswith("apply_"):
-            return lambda x: self._apply(x, name)
-        raise AttributeError(
-            "TransformList object has no attribute {}".format(name))
-
-    def __add__(self, other: "TransformList") -> "TransformList":
-        """
-        Args:
-            other (TransformList): transformation to add.
-        Returns:
-            TransformList: list of transforms.
-        """
-        others = (other.transforms
-                  if isinstance(other, TransformList) else [other])
-        return TransformList(self.transforms + others)
-
-    def __iadd__(self, other: "TransformList") -> "TransformList":
-        """
-        Args:
-            other (TransformList): transformation to add.
-        Returns:
-            TransformList: list of transforms.
-        """
-        others = (other.transforms
-                  if isinstance(other, TransformList) else [other])
-        self.transforms.extend(others)
-        return self
-
-    def __radd__(self, other: "TransformList") -> "TransformList":
-        """
-        Args:
-            other (TransformList): transformation to add.
-        Returns:
-            TransformList: list of transforms.
-        """
-        others = (other.transforms
-                  if isinstance(other, TransformList) else [other])
-        return TransformList(others + self.transforms)
-
-    def insert(self, idx: int, other: "TransformList") -> "TransformList":
-        """
-        Args:
-            idx (int): insert position.
-            other (TransformList): transformation to insert.
-        Returns:
-            None
-        """
-        assert idx in range(len(self.transforms))
-        others = (other.transforms
-                  if isinstance(other, TransformList) else [other])
-        self.transforms = self.transforms[:idx] + others + self.transforms[idx:]
-
-
-class DistortTransform(Transform):
+# Simplify this to inherent SimpleTransform
+@TRANSFORMS.register()
+class RandomDistortion(Transform):
     """
     Distort image w.r.t hue, saturation and exposure.
     """
 
-    def __init__(self, hue, saturation, exposure, image_format):
+    def __init__(self, hue, saturation, exposure, image_format="BGR", prob=0.5):
+        assert image_format in ["RGB", "BGR"]
         super().__init__()
         self._set_attributes(locals())
+
         self.cvt_code = {
             "RGB": (cv2.COLOR_RGB2HSV, cv2.COLOR_HSV2RGB),
             "BGR": (cv2.COLOR_BGR2HSV, cv2.COLOR_HSV2BGR),
@@ -426,7 +458,7 @@ def __init__(self, hue, saturation, exposure, image_format):
         if saturation > 1.0:
             saturation /= 255.  # in range [0, 1]
 
-    def apply_image(self, img: np.ndarray) -> np.ndarray:
+    def __call__(self, img, annotations=None, **kwargs):
         """
         Args:
             img (ndarray): of shape HxW, HxWxC, or NxHxWxC. The array can be
@@ -436,31 +468,30 @@ def apply_image(self, img: np.ndarray) -> np.ndarray:
         Returns:
             ndarray: the distorted image(s).
         """
-        dhue = np.random.uniform(low=-self.hue, high=self.hue)
-        dsat = self._rand_scale(self.saturation)
-        dexp = self._rand_scale(self.exposure)
-
-        dtype = img.dtype
-        img = cv2.cvtColor(img, self.cvt_code[0])
-        img = np.asarray(img, dtype=np.float32) / 255.
-        img[:, :, 1] *= dsat
-        img[:, :, 2] *= dexp
-        H = img[:, :, 0] + dhue
+        do = self._rand_range() < self.prob
+        if do:
+            dhue = np.random.uniform(low=-self.hue, high=self.hue)
+            dsat = self._rand_scale(self.saturation)
+            dexp = self._rand_scale(self.exposure)
+
+            dtype = img.dtype
+            img = cv2.cvtColor(img, self.cvt_code[0])
+            img = np.asarray(img, dtype=np.float32) / 255.
+            img[:, :, 1] *= dsat
+            img[:, :, 2] *= dexp
+            H = img[:, :, 0] + dhue
+
+            if dhue > 0:
+                H[H > 1.0] -= 1.0
+            else:
+                H[H < 0.0] += 1.0
+
+            img[:, :, 0] = H
+            img = (img * 255).clip(0, 255).astype(np.uint8)
+            img = cv2.cvtColor(img, self.cvt_code[1])
+            img = np.asarray(img, dtype=dtype)
 
-        if dhue > 0:
-            H[H > 1.0] -= 1.0
-        else:
-            H[H < 0.0] += 1.0
-
-        img[:, :, 0] = H
-        img = (img * 255).clip(0, 255).astype(np.uint8)
-        img = cv2.cvtColor(img, self.cvt_code[1])
-        img = np.asarray(img, dtype=dtype)
-
-        return img
-
-    def apply_coords(self, coords: np.ndarray) -> np.ndarray:
-        return coords
+        return img, annotations
 
     def _rand_scale(self, upper_bound):
         """
@@ -477,22 +508,27 @@ def _rand_scale(self, upper_bound):
             return scale
         return 1 / scale
 
-    def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray:
-        return segmentation
 
-
-class AffineTransform(Transform):
+@TRANSFORMS.register()
+class CenterAffine(DefaultTransorm):
     """
     Augmentation from CenterNet
     """
-    def __init__(self, src, dst, output_size, pad_value=[0, 0, 0]):
+    def __init__(self, boarder, output_size, pad_value=[0, 0, 0], random_aug=True):
+
         """
         output_size:(w, h)
         """
         super().__init__()
-        affine = cv2.getAffineTransform(np.float32(src), np.float32(dst))
         self._set_attributes(locals())
 
+    def __call__(self, image, annotations, **kwargs):
+        self.img_shape = image.shape[:2]
+        self.center, self.scale = self.generate_center_and_scale(self.img_shape)
+        self.src, self.dst = self.generate_src_and_dst(self.center, self.scale, self.output_size)
+        self.affine = cv2.getAffineTransform(np.float32(self.src), np.float32(self.dst))
+        return super().__call__(image, annotations)
+
     def apply_image(self, img: np.ndarray) -> np.ndarray:
         """
         Apply AffineTransform for the image(s).
@@ -534,102 +570,42 @@ def apply_coords(self, coords: np.ndarray) -> np.ndarray:
         return coords
 
 
-class RotationTransform(Transform):
+@TRANSFORMS.register()
+class RandomFlip(DefaultTransorm):
     """
-    This method returns a copy of this image, rotated the given
-    number of degrees counter clockwise around its center.
+    Perform horizontal flip.
     """
 
-    def __init__(self, h, w, angle, expand=True, center=None, interp=None):
+    def __init__(self, prob=0.5, *, horizontal=True, vertical=False):
         """
         Args:
-            h, w (int): original image size
-            angle (float): degrees for rotation
-            expand (bool): choose if the image should be resized to fit the whole
-                rotated image (default), or simply cropped
-            center (tuple (width, height)): coordinates of the rotation center
-                if left to None, the center will be fit to the center of each image
-                center has no effect if expand=True because it only affects shifting
-            interp: cv2 interpolation method, default cv2.INTER_LINEAR
+            prob (float): probability of flip.
+            horizontal (boolean): whether to apply horizontal flipping
+            vertical (boolean): whether to apply vertical flipping
         """
         super().__init__()
-        image_center = np.array((w / 2, h / 2))
-        if center is None:
-            center = image_center
-        if interp is None:
-            interp = cv2.INTER_LINEAR
-        abs_cos, abs_sin = (abs(np.cos(np.deg2rad(angle))), abs(np.sin(np.deg2rad(angle))))
-        if expand:
-            # find the new width and height bounds
-            bound_w, bound_h = np.rint(
-                [h * abs_sin + w * abs_cos, h * abs_cos + w * abs_sin]
-            ).astype(int)
-        else:
-            bound_w, bound_h = w, h
 
+        if horizontal and vertical:
+            raise ValueError(
+                "Cannot do both horiz and vert. Please use two Flip instead."
+            )
+        if not horizontal and not vertical:
+            raise ValueError("At least one of horiz or vert has to be True!")
         self._set_attributes(locals())
-        self.rm_coords = self.create_rotation_matrix()
-        # Needed because of this problem https://github.com/opencv/opencv/issues/11784
-        self.rm_image = self.create_rotation_matrix(offset=-0.5)
 
-    def apply_image(self, img, interp=None):
-        """
-        img should be a numpy array, formatted as Height * Width * Nchannels
-        """
-        if len(img) == 0 or self.angle % 360 == 0:
-            return img
-        assert img.shape[:2] == (self.h, self.w)
-        interp = interp if interp is not None else self.interp
-        return cv2.warpAffine(img, self.rm_image, (self.bound_w, self.bound_h), flags=interp)
+    def __call__(self, image, annotations, **kwargs):
+        h, w = image.shape[:2]
+        do = self._rand_range() < self.prob
 
-    def apply_coords(self, coords):
-        """
-        coords should be a N * 2 array-like, containing N couples of (x, y) points
-        """
-        coords = np.asarray(coords, dtype=float)
-        if len(coords) == 0 or self.angle % 360 == 0:
-            return coords
-        return cv2.transform(coords[:, np.newaxis, :], self.rm_coords)[:, 0, :]
-
-    def apply_segmentation(self, segmentation):
-        segmentation = self.apply_image(segmentation, interp=cv2.INTER_NEAREST)
-        return segmentation
-
-    def create_rotation_matrix(self, offset=0):
-        center = (self.center[0] + offset, self.center[1] + offset)
-        rm = cv2.getRotationMatrix2D(tuple(center), self.angle, 1)
-        if self.expand:
-            # Find the coordinates of the center of rotation in the new image
-            # The only point for which we know the future coordinates is the center of the image
-            rot_im_center = cv2.transform(self.image_center[None, None, :] + offset, rm)[0, 0, :]
-            new_center = np.array([self.bound_w / 2, self.bound_h / 2]) + offset - rot_im_center
-            # shift the rotation center to the new coordinates
-            rm[:, 2] += new_center
-        return rm
-
-    def inverse(self):
-        """
-        The inverse is to rotate it back with expand, and crop to get the original shape.
-        """
-        if not self.expand:  # Not possible to inverse if a part of the image is lost
-            raise NotImplementedError()
-        rotation = RotationTransform(
-            self.bound_h, self.bound_w, -self.angle, True, None, self.interp
-        )
-        crop = CropTransform(
-            (rotation.bound_w - self.w) // 2, (rotation.bound_h - self.h) // 2, self.w, self.h
-        )
-        return TransformList([rotation, crop])
-
-
-class HFlipTransform(Transform):
-    """
-    Perform horizontal flip.
-    """
+        if self.horizontal:
+            self.width = w
+        else:
+            self.height = h
 
-    def __init__(self, width: int):
-        super().__init__()
-        self._set_attributes(locals())
+        if do:
+            return super().__call__(image, annotations, **kwargs)
+        else:
+            return image, annotations
 
     def apply_image(self, img: np.ndarray) -> np.ndarray:
         """
@@ -643,13 +619,23 @@ def apply_image(self, img: np.ndarray) -> np.ndarray:
         Returns:
             ndarray: the flipped image(s).
         """
-        tensor = torch.from_numpy(np.ascontiguousarray(img).copy())
-        if len(tensor.shape) == 2:
-            # For dimension of HxW.
-            tensor = tensor.flip((-1))
-        elif len(tensor.shape) > 2:
-            # For dimension of HxWxC, NxHxWxC.
-            tensor = tensor.flip((-2))
+        if self.horizontal:
+            tensor = torch.from_numpy(np.ascontiguousarray(img).copy())
+            if len(tensor.shape) == 2:
+                # For dimension of HxW.
+                tensor = tensor.flip((-1))
+            elif len(tensor.shape) > 2:
+                # For dimension of HxWxC, NxHxWxC.
+                tensor = tensor.flip((-2))
+        else:
+            tensor = torch.from_numpy(np.ascontiguousarray(img).copy())
+            if len(tensor.shape) == 2:
+                # For dimension of HxW.
+                tensor = tensor.flip((-2))
+            elif len(tensor.shape) > 2:
+                # For dimension of HxWxC, NxHxWxC.
+                tensor = tensor.flip((-3))
+
         return tensor.numpy()
 
     def apply_coords(self, coords: np.ndarray) -> np.ndarray:
@@ -667,56 +653,11 @@ def apply_coords(self, coords: np.ndarray) -> np.ndarray:
             Therefore they are flipped by `(W - x, H - y)`, not
             `(W - 1 - x, H 1 - y)`.
         """
-        coords[:, 0] = self.width - coords[:, 0]
-        return coords
-
-
-class VFlipTransform(Transform):
-    """
-    Perform vertical flip.
-    """
-
-    def __init__(self, height: int):
-        super().__init__()
-        self._set_attributes(locals())
-
-    def apply_image(self, img: np.ndarray) -> np.ndarray:
-        """
-        Flip the image(s).
-
-        Args:
-            img (ndarray): of shape HxW, HxWxC, or NxHxWxC. The array can be
-                of type uint8 in range [0, 255], or floating point in range
-                [0, 1] or [0, 255].
-
-        Returns:
-            ndarray: the flipped image(s).
-        """
-        tensor = torch.from_numpy(np.ascontiguousarray(img).copy())
-        if len(tensor.shape) == 2:
-            # For dimension of HxW.
-            tensor = tensor.flip((-2))
-        elif len(tensor.shape) > 2:
-            # For dimension of HxWxC, NxHxWxC.
-            tensor = tensor.flip((-3))
-        return tensor.numpy()
-
-    def apply_coords(self, coords: np.ndarray) -> np.ndarray:
-        """
-        Flip the coordinates.
-
-        Args:
-            coords (ndarray): floating point array of shape Nx2. Each row is (x, y).
-
-        Returns:
-            ndarray: the flipped coordinates.
+        if self.horizontal:
+            coords[:, 0] = self.width - coords[:, 0]
+        else:
+            coords[:, 1] = self.height - coords[:, 1]
 
-        Note:
-            The inputs are floating point coordinates, not pixel indices.
-            Therefore they are flipped by `(W - x, H - y)`, not
-            `(W - 1 - x, H - 1 - y)`.
-        """
-        coords[:, 1] = self.height - coords[:, 1]
         return coords
 
 
@@ -727,14 +668,12 @@ class NoOpTransform(Transform):
     def __init__(self):
         super().__init__()
 
-    def apply_image(self, img: np.ndarray) -> np.ndarray:
-        return img
+    def __call__(self, data, annotations):
+        return data, annotations
 
-    def apply_coords(self, coords: np.ndarray) -> np.ndarray:
-        return coords
 
-
-class GaussianBlurTransform(Transform):
+@TRANSFORMS.register()
+class RandomGaussianBlur(Transform):
     """
     GaussianBlur using PIL.ImageFilter.GaussianBlur
     """
@@ -747,100 +686,29 @@ def __init__(self, sigma, p=1.0):
         super().__init__()
         self._set_attributes(locals())
 
-    def apply_image(self, img: np.ndarray) -> np.ndarray:
-        if np.random.random() < self.p:
-            sigma = random.uniform(self.sigma[0], self.sigma[1])
-            img = Image.fromarray(img).filter(ImageFilter.GaussianBlur(radius=sigma))
-        return np.array(img)
-
-    def apply_coords(self, coords: np.ndarray) -> np.ndarray:
-        return coords
-
-
-class SolarizationTransform(Transform):
-    def __init__(self, thresh=128, p=0.5):
-        super().__init__()
-        self.thresh = thresh
-        self.p = p
-
-    def apply_image(self, img: np.ndarray) -> np.ndarray:
-        if np.random.random() < self.p:
-            return np.array(ImageOps.solarize(Image.fromarray(img), self.thresh))
-        else:
-            return img
-
-    def apply_coords(self, coords: np.ndarray) -> np.ndarray:
-        return coords
-
-
-class GaussianBlurConvTransform(Transform):
-    def __init__(self, kernel_size, p=1.0):
-        super().__init__()
-        self._set_attributes(locals())
-        radias = kernel_size // 2
-        kernel_size = radias * 2 + 1
-        self.blur_h = torch.nn.Conv2d(3, 3, kernel_size=(kernel_size, 1),
-                                      stride=1, padding=0, bias=False, groups=3)
-        self.blur_v = torch.nn.Conv2d(3, 3, kernel_size=(1, kernel_size),
-                                      stride=1, padding=0, bias=False, groups=3)
-        self.k = kernel_size
-        self.r = radias
-
-        self.blur = torch.nn.Sequential(
-            torch.nn.ReflectionPad2d(radias),
-            self.blur_h,
-            self.blur_v
-        )
-
-        self.pil_to_tensor = transforms.ToTensor()
-        self.tensor_to_pil = transforms.ToPILImage()
-
-    def apply_image(self, img: np.ndarray) -> np.ndarray:
+    def __call__(self, image, annotations=None, **kwargs):
         if np.random.random() < self.p:
-            img = self.pil_to_tensor(Image.fromarray(img)).unsqueeze(0)
-
-            sigma = np.random.uniform(0.1, 2.0)
-            x = np.arange(-self.r, self.r + 1)
-            x = np.exp(-np.power(x, 2) / (2 * sigma * sigma))
-            x = x / x.sum()
-            x = torch.from_numpy(x).view(1, -1).repeat(3, 1)
-
-            self.blur_h.weight.data.copy_(x.view(3, 1, self.k, 1))
-            self.blur_v.weight.data.copy_(x.view(3, 1, 1, self.k))
-
-            with torch.no_grad():
-                img = self.blur(img)
-                img = img.squeeze()
-
-            img = np.array(self.tensor_to_pil(img))
-        return img
+            sigma = random.uniform(self.sigma[0], self.sigma[1])
+            img = Image.fromarray(image).filter(ImageFilter.GaussianBlur(radius=sigma))
 
-    def apply_coords(self, coords: np.ndarray) -> np.ndarray:
-        return coords
+        return np.array(img), annotations
 
 
-class LabSpaceTransform(Transform):
-    """
-    Convert image from RGB into Lab color space
-    """
-    def __init__(self):
+@TRANSFORMS.register()
+class RandomSolarization(Transform):
+    def __init__(self, thresh=128, p=0.5):
         super().__init__()
-        self._set_attributes(locals())
+        self._set_attributes(locals)
 
-    def apply_image(self, img: np.ndarray) -> np.ndarray:
-        assert len(img.shape) == 3, 'Image should have dim H x W x 3'
-        assert img.shape[2] == 3, 'Image should have dim H x W x 3'
-        img_lab = cv2.cvtColor(img, cv2.COLOR_BGR2LAB)
-        img_lab = img_lab.astype(np.float32)
-        img_lab[:, :, 0] = (img_lab[:, :, 0] * (100.0 / 255.0)) - 50.0
-        img_lab[:, :, 1:] = img_lab[:, :, 1:] - 128.0
-        return img_lab
+    def __call__(self, img, annotations=None, **kwargs) -> np.ndarray:
+        if np.random.random() < self.p:
+            img = np.array(ImageOps.solarize(Image.fromarray(img), self.thresh))
 
-    def apply_coords(self, coords: np.ndarray) -> np.ndarray:
-        return coords
+        return img, annotations
 
 
-class PadTransform(Transform):
+@TRANSFORMS.register()
+class Pad(DefaultTransorm):
     """
     Pad image with `pad_value` to the specified `target_h` and `target_w`.
 
@@ -911,17 +779,14 @@ def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray:
         return segmentation
 
 
-class ScaleTransform(Transform):
+@TRANSFORMS.register()
+class RandomScale(DefaultTransorm):
     """
     Resize the image to a target size.
     """
 
-    def __init__(self,
-                 h: int,
-                 w: int,
-                 new_h: int,
-                 new_w: int,
-                 interp: str = "BILINEAR"):
+    def __init__(self, output_size, ratio_range=(0.1, 2), interp="BILINEAR"):
+
         """
         Args:
             h, w (int): original image size.
@@ -936,6 +801,10 @@ def __init__(self,
         """
         super().__init__()
         self._set_attributes(locals())
+        self.min_ratio, self.max_ratio = ratio_range
+        if isinstance(self.output_size, int):
+            self.output_size = [self.output_size] * 2
+
         _str_to_pil_interpolation = {
             "NEAREST": Image.NEAREST,
             "BILINEAR": Image.BILINEAR,
@@ -949,6 +818,29 @@ def __init__(self,
             interp)
         self.interp = _str_to_pil_interpolation[interp]
 
+    def __call__(self, img, annotations=None, **kwargs):
+        h, w = img.shape[:2]
+        output_h, output_w = self.output_size
+
+        # 1. Select a random scale factor.
+        random_scale_factor = np.random.uniform(self.min_ratio, self.max_ratio)
+
+        scaled_size_h = int(random_scale_factor * output_h)
+        scaled_size_w = int(random_scale_factor * output_w)
+
+        # 2. Recompute the accurate scale_factor using rounded scaled image size.
+        image_scale_h = scaled_size_h * 1.0 / h
+        image_scale_w = scaled_size_w * 1.0 / w
+        image_scale = min(image_scale_h, image_scale_w)
+
+        # 3. Select non-zero random offset (x, y) if scaled image is larger than output_size.
+        scaled_h = int(h * 1.0 * image_scale)
+        scaled_w = int(w * 1.0 * image_scale)
+
+        self.h, self.w, self.new_h, self.new_w = h, w, scaled_h, scaled_w
+
+        return super().__call__(img, annotations)
+
     def apply_image(self, img: np.ndarray, interp: str = None) -> np.ndarray:
         """
         Resize the image(s).
@@ -1000,64 +892,7 @@ def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray:
         return segmentation
 
 
-class GridSampleTransform(Transform):
-    def __init__(self, grid: np.ndarray, interp: str):
-        """
-        Args:
-            grid (ndarray): grid has x and y input pixel locations which are
-                used to compute output. Grid has values in the range of [-1, 1],
-                which is normalized by the input height and width. The dimension
-                is `N x H x W x 2`.
-            interp (str): interpolation methods. Options include `nearest` and
-                `bilinear`.
-        """
-        super().__init__()
-        self._set_attributes(locals())
-
-    def apply_image(self, img: np.ndarray, interp: str = None) -> np.ndarray:
-        """
-        Apply grid sampling on the image(s).
-
-        Args:
-            img (ndarray): of shape NxHxWxC, or HxWxC or HxW. The array can be
-                of type uint8 in range [0, 255], or floating point in range
-                [0, 1] or [0, 255].
-            interp (str): interpolation methods. Options include `nearest` and
-                `bilinear`.
-        Returns:
-            ndarray: grid sampled image(s).
-        """
-        interp_method = interp if interp is not None else self.interp
-        float_tensor = torch.nn.functional.grid_sample(
-            to_float_tensor(img),    # NxHxWxC -> NxCxHxW.
-            torch.from_numpy(self.grid),
-            mode=interp_method,
-            padding_mode="border",
-            align_corners=False,
-        )
-        return to_numpy(float_tensor, img.shape, img.dtype)
-
-    def apply_coords(self, coords: np.ndarray):
-        """
-        Not supported.
-        """
-        raise NotImplementedError()
-
-    def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray:
-        """
-        Apply grid sampling on the full-image segmentation.
-
-        Args:
-            segmentation (ndarray): of shape HxW. The array should have integer
-                or bool dtype.
-        Returns:
-            ndarray: grid sampled segmentation.
-        """
-        segmentation = self.apply_image(segmentation, interp=Image.NEAREST)
-        return segmentation
-
-
-class IoUCropTransform(Transform):
+class IoUCropTransform(DefaultTransorm):
     """
     Perform crop operations on images.
 
@@ -1188,7 +1023,85 @@ def apply_polygons(self, polygons: list) -> list:
         return [self.apply_coords(p) for p in cropped_polygons]
 
 
-class CropTransform(Transform):
+@TRANSFORMS.register()
+class MinIoURandomCrop(IoUCropTransform):
+    """
+    Random crop the image & bboxes, the cropped patches have minimum IoU
+    requirement with original image & bboxes, the IoU threshold is randomly
+    selected from min_ious.
+    """
+
+    def __init__(self, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3):
+        """
+        Args:
+            min_ious (tuple): minimum IoU threshold for all intersections with bounding boxes
+            min_crop_size (float): minimum crop's size
+                (i.e. h,w := a*h, a*w, where a >= min_crop_size).
+        """
+        self._set_attributes(locals())
+
+    def __call__(self, img, annotations=None, **kwargs):
+        """
+        Args:
+            img (ndarray): of shape HxWxC(RGB). The array can be of type uint8
+                in range [0, 255], or floating point in range [0, 255].
+            annotations (list[dict[str->str]]):
+                Each item in the list is a bbox label of an object. The object is
+                    represented by a dict,
+                which contains:
+                 - bbox (list): bbox coordinates, top left and bottom right.
+                 - bbox_mode (str): bbox label mode, for example: `XYXY_ABS`,
+                    `XYWH_ABS` and so on...
+        """
+        sample_mode = (1, *self.min_ious, 0)
+        h, w = img.shape[:2]
+
+        boxes = list()
+        for obj in annotations:
+            boxes.append(BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS))
+        boxes = torch.tensor(boxes)
+
+        while True:
+            mode = np.random.choice(sample_mode)
+            if mode == 1:
+                return NoOpTransform()
+
+            min_iou = mode
+            for i in range(50):
+                new_w = np.random.uniform(self.min_crop_size * w, w)
+                new_h = np.random.uniform(self.min_crop_size * h, h)
+
+                # h / w in [0.5, 2]
+                if new_h / new_w < 0.5 or new_h / new_w > 2:
+                    continue
+
+                left = np.random.uniform(w - new_w)
+                top = np.random.uniform(h - new_h)
+
+                patch = np.array(
+                    (int(left), int(top), int(left + new_w), int(top + new_h)))
+
+                overlaps = pairwise_iou(
+                    Boxes(patch.reshape(-1, 4)),
+                    Boxes(boxes.reshape(-1, 4))
+                )
+
+                if overlaps.min() < min_iou:
+                    continue
+
+                # center of boxes should inside the crop img
+                center = (boxes[:, :2] + boxes[:, 2:]) / 2
+                mask = ((center[:, 0] > patch[0]) * (center[:, 1] > patch[1])
+                        * (center[:, 0] < patch[2]) * (center[:, 1] < patch[3]))
+                if not mask.any():
+                    continue
+
+                self.x0, self.y0, self.w, self.h = int(left), int(top), int(new_w), int(new_h)
+
+                return super().__call__(img, annotations)
+
+
+class CropTransform(DefaultTransorm):
     """
     Perform crop operations on images.
     """
@@ -1278,23 +1191,91 @@ def apply_polygons(self, polygons: list) -> list:
         return [self.apply_coords(p) for p in cropped_polygons]
 
 
-class CropPadTransform(Transform):
+@TRANSFORMS.register()
+class RandomCrop(CropTransform):
+    """
+    Randomly crop a subimage out of an image.
+    """
+
+    def __init__(self, crop_type: str, crop_size, strict_mode=True):
+        """
+        Args:
+            crop_type (str): one of "relative_range", "relative", "absolute".
+                See `config/defaults.py` for explanation.
+            crop_size (tuple[float]): the relative ratio or absolute pixels of
+                height and width
+            strict_mode (bool): if `True`, the target `crop_size` must be smaller than
+                the original image size.
+        """
+        assert crop_type in ["relative_range", "relative", "absolute"]
+        self._set_attributes(locals())
+
+    def __call__(self, img, annotations=None, **kwargs):
+        h, w = img.shape[:2]
+        croph, cropw = self.get_crop_size((h, w))
+        if self.strict_mode:
+            assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(
+                self
+            )
+        offset_range_h = max(h - croph, 0)
+        offset_range_w = max(w - cropw, 0)
+        self.y0 = np.random.randint(offset_range_h + 1)
+        self.x0 = np.random.randint(offset_range_w + 1)
+
+        self.w = cropw
+        self.h = croph
+
+        return super().__call__(img, annotations)
+
+    def get_crop_size(self, image_size):
+        """
+        Args:
+            image_size (tuple): height, width
+
+        Returns:
+            crop_size (tuple): height, width in absolute pixels
+        """
+        h, w = image_size
+        if self.crop_type == "relative":
+            ch, cw = self.crop_size
+            return int(h * ch + 0.5), int(w * cw + 0.5)
+        elif self.crop_type == "relative_range":
+            crop_size = np.asarray(self.crop_size, dtype=np.float32)
+            ch, cw = crop_size + np.random.rand(2) * (1 - crop_size)
+            return int(h * ch + 0.5), int(w * cw + 0.5)
+        elif self.crop_type == "absolute":
+            return self.crop_size
+        else:
+            NotImplementedError("Unknown crop type {}".format(self.crop_type))
+
+
+@TRANSFORMS.register()
+class RandomCropPad(RandomCrop):
     def __init__(self,
-                 x0: int,
-                 y0: int,
-                 w: int,
-                 h: int,
-                 new_w: int,
-                 new_h: int,
+                 crop_type: str,
+                 crop_size,
                  img_value=None,
                  seg_value=None):
-        super().__init__()
+        super().__init__(crop_type, crop_size, strict_mode=False)
         self._set_attributes(locals())
-        self.crop_trans = CropTransform(x0, y0, w, h)
-        pad_top_offset = self.get_pad_offset(h, new_h)
-        pad_left_offset = self.get_pad_offset(w, new_w)
-        self.pad_trans = PadTransform(
-            pad_top_offset, pad_left_offset, new_h, new_w, img_value, seg_value)
+
+    def __call__(self, img, annotations=None, **kwargs):
+        h, w = img.shape[:2]
+        croph, cropw = self.get_crop_size((h, w))
+        h0 = np.random.randint(h - croph + 1) if h >= croph else 0
+        w0 = np.random.randint(w - cropw + 1) if w >= cropw else 0
+        dh = min(h, croph)
+        dw = min(w, cropw)
+        # print(w0, h0, dw, dh)
+
+        self.x0, self.y0, self.w, self.h, self.new_w, self.new_h = w0, h0, dw, dh, cropw, croph
+        self.crop_trans = CropTransform(self.x0, self.y0, self.w, self.h)
+        pad_top_offset = self.get_pad_offset(self.h, self.new_h)
+        pad_left_offset = self.get_pad_offset(self.w, self.new_w)
+        self.pad_trans = Pad(
+            pad_top_offset, pad_left_offset, self.new_h, self.new_w, self.img_value, self.seg_value)
+
+        return super().__call__(img, annotations)
 
     def get_pad_offset(self, ori: int, tar: int):
         pad_length = max(tar - ori, 0)
@@ -1363,6 +1344,77 @@ def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray:
         return segmentation
 
 
+@TRANSFORMS.register()
+class RandomCropWithInstance(RandomCrop):
+    """
+    Make sure the cropping region contains the center of a random instance from annotations.
+    """
+
+    def __call__(self, img, annotations=None, **kwargs):
+        h, w = img.shape[:2]
+        croph, cropw = self.get_crop_size((h, w))
+        if self.strict_mode:
+            assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(
+                self
+            )
+        offset_range_h = max(h - croph, 0)
+        offset_range_w = max(w - cropw, 0)
+        # Make sure there is always at least one instance in the image
+        assert annotations is not None, "Can not get annotations infos."
+        instance = np.random.choice(annotations)
+        bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS)
+        bbox = torch.tensor(bbox)
+        center_xy = (bbox[:2] + bbox[2:]) / 2.0
+
+        offset_range_h_min = max(center_xy[1] - croph, 0)
+        offset_range_w_min = max(center_xy[0] - cropw, 0)
+        offset_range_h_max = min(offset_range_h, center_xy[1] - 1)
+        offset_range_w_max = min(offset_range_w, center_xy[0] - 1)
+
+        self.y0 = np.random.randint(offset_range_h_min, offset_range_h_max + 1)
+        self.x0 = np.random.randint(offset_range_w_min, offset_range_w_max + 1)
+
+        self.w = cropw
+        self.h = croph
+
+        return super().__call__(img, annotations)
+
+
+@TRANSFORMS.register()
+class RandomCropWithMaxAreaLimit(RandomCrop):
+    """
+    Find a cropping window such that no single category occupies more than
+    `single_category_max_area` in `sem_seg`.
+
+    The function retries random cropping 10 times max.
+    """
+
+    def __init__(self, crop_type: str, crop_size, strict_mode=True,
+                 single_category_max_area=1.0, ignore_value=255):
+        super().__init__(crop_type, crop_size, strict_mode)
+        self._set_attributes(locals())
+
+    def __call__(self, img, annotations=None, **kwargs):
+        if self.single_category_max_area >= 1.0:
+            return super().__call__(img, annotations)
+        else:
+            h, w = img.shape[:2]
+            assert "sem_seg" in annotations[0]
+            sem_seg = annotations[0]["sem_seg"]
+            croph, cropw = self.get_crop_size((h, w))
+            for _ in range(10):
+                y0 = np.random.randint(h - croph + 1)
+                x0 = np.random.randint(w - cropw + 1)
+                sem_seg_temp = sem_seg[y0: y0 + croph, x0: x0 + cropw]
+                labels, cnt = np.unique(sem_seg_temp, return_counts=True)
+                cnt = cnt[labels != self.ignore_value]
+                if len(cnt) > 1 and np.max(cnt) / np.sum(cnt) < self.single_category_max_area:
+                    break
+            self.x0, self.y0, self.w, self.h = x0, y0, cropw, croph
+
+            return super().__call__(img, annotations)
+
+
 class BlendTransform(Transform):
     """
     Transforms pixel colors with PIL enhance functions.
@@ -1382,7 +1434,8 @@ def __init__(self, src_image: np.ndarray, src_weight: float,
         super().__init__()
         self._set_attributes(locals())
 
-    def apply_image(self, img: np.ndarray, interp: str = None) -> np.ndarray:
+    # def __call__(self, img: np.ndarray, interp: str = None) -> np.ndarray:
+    def __call__(self, img, annotations=None, **kwargs):
         """
         Apply blend transform on the image(s).
 
@@ -1399,54 +1452,174 @@ def apply_image(self, img: np.ndarray, interp: str = None) -> np.ndarray:
         if img.dtype == np.uint8:
             img = img.astype(np.float32)
             img = self.src_weight * self.src_image + self.dst_weight * img
-            return np.clip(img, 0, 255).astype(np.uint8)
+            return np.clip(img, 0, 255).astype(np.uint8), annotations
         else:
-            return self.src_weight * self.src_image + self.dst_weight * img
+            return self.src_weight * self.src_image + self.dst_weight * img, annotations
 
-    def apply_coords(self, coords: np.ndarray) -> np.ndarray:
-        """
-        Apply no transform on the coordinates.
-        """
-        return coords
 
-    def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray:
+@TRANSFORMS.register()
+class RandomContrast(BlendTransform):
+    """
+    Randomly transforms image contrast.
+
+    Contrast intensity is uniformly sampled in (intensity_min, intensity_max).
+    - intensity < 1 will reduce contrast
+    - intensity = 1 will preserve the input image
+    - intensity > 1 will increase contrast
+
+    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
+    """
+
+    def __init__(self, intensity_min, intensity_max, prob=1.0):
         """
-        Apply no transform on the full-image segmentation.
+        Args:
+            intensity_min (float): Minimum augmentation.
+            intensity_max (float): Maximum augmentation.
+            prob (float): probability of transforms image contrast.
         """
-        return segmentation
+        super().__init__()
+        self._set_attributes(locals())
+
+    def __call__(self, img, annotations=None, **kwargs):
+        if self._rand_range() < self.prob:
+            w = np.random.uniform(self.intensity_min, self.intensity_max)
+            self.src_image, self.src_weight, self.dst_weight = img.mean(), 1 - w, w
+            return super().__call__(img, annotations)
+        else:
+            return img, annotations
 
 
-class RandomSwapChannelsTransform(Transform):
+@TRANSFORMS.register()
+class RandomBrightness(BlendTransform):
     """
-    Randomly swap image channels.
+    Randomly transforms image brightness.
+
+    Brightness intensity is uniformly sampled in (intensity_min, intensity_max).
+    - intensity < 1 will reduce brightness
+    - intensity = 1 will preserve the input image
+    - intensity > 1 will increase brightness
+
+    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
     """
 
-    def __init__(self):
+    def __init__(self, intensity_min, intensity_max, prob=1.):
+        """
+        Args:
+            intensity_min (float): Minimum augmentation.
+            intensity_max (float): Maximum augmentation.
+            prob (float): probability of transforms image brightness.
+        """
         super().__init__()
+        self._set_attributes(locals())
 
-    def apply_image(self, img):
-        assert len(img.shape) > 2
-        return img[..., np.random.permutation(3)]
+    def __call__(self, img, annotations=None, **kwargs):
+        do = self._rand_range() < self.prob
+        if do:
+            w = np.random.uniform(self.intensity_min, self.intensity_max)
+            self.src_image, self.src_weight, self.dst_weight = 0, 1 - w, w
+            return super().__call__(img, annotations)
+        else:
+            return img, annotations
 
-    def apply_coords(self, coords: np.ndarray) -> np.ndarray:
+
+@TRANSFORMS.register()
+class RandomSaturation(BlendTransform):
+    """
+    Randomly transforms image saturation.
+
+    Saturation intensity is uniformly sampled in (intensity_min, intensity_max).
+    - intensity < 1 will reduce saturation (make the image more grayscale)
+    - intensity = 1 will preserve the input image
+    - intensity > 1 will increase saturation
+
+    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
+    """
+
+    def __init__(self, intensity_min, intensity_max, prob=1.0):
         """
-        Apply no transform on the coordinates.
+        Args:
+            intensity_min (float): Minimum augmentation (1 preserves input).
+            intensity_max (float): Maximum augmentation (1 preserves input).
+            prob (float): probability of transforms image saturation.
         """
-        return coords
+        super().__init__()
+        self._set_attributes(locals())
 
-    def apply_segmentation(self, segmentation: np.ndarray) -> np.ndarray:
+    def __call__(self, img, annotations=None, **kwargs):
+        do = self._rand_range() < self.prob
+        if do:
+            assert img.shape[-1] == 3, "Saturation only works on RGB images"
+            w = np.random.uniform(self.intensity_min, self.intensity_max)
+            grayscale = img.dot([0.299, 0.587, 0.114])[:, :, np.newaxis]
+            self.src_image, self.src_weight, self.dst_weight = grayscale, 1 - w, w
+            return super().__call__(img, annotations)
+        else:
+            return img, annotations
+
+
+@TRANSFORMS.register()
+class RandomLightning(BlendTransform):
+    """
+    Randomly transforms image color using fixed PCA over ImageNet.
+
+    The degree of color jittering is randomly sampled via a normal distribution,
+    with standard deviation given by the scale parameter.
+    """
+
+    def __init__(self, scale, prob=0.5):
         """
-        Apply no transform on the full-image segmentation.
+        Args:
+            scale (float): Standard deviation of principal component weighting.
         """
-        return segmentation
+        super().__init__()
+        self._set_attributes(locals())
+        self.eigen_vecs = np.array(
+            [
+                [-0.5675, 0.7192, 0.4009],
+                [-0.5808, -0.0045, -0.8140],
+                [-0.5836, -0.6948, 0.4203],
+            ]
+        )
+        self.eigen_vals = np.array([0.2175, 0.0188, 0.0045])
+
+    def __call__(self, img, annotations=None, **kwargs):
+        assert img.shape[-1] == 3, "Saturation only works on RGB images"
+        do = self._rand_range() < self.prob
+        if do:
+            weights = np.random.normal(scale=self.scale, size=3)
+            self.src_image, self.src_weight, self.dst_weight = \
+                self.eigen_vecs.dot(weights * self.eigen_vals), 1, 1
+            return super().__call__(img, annotations)
+        else:
+            return img, annotations
+
+
+@TRANSFORMS.register()
+class RandomSwapChannels(Transform):
+    """
+    Randomly swap image channels.
+    """
+
+    def __init__(self, prob=0.5):
+        super().__init__()
+        self._set_attributes()
+
+    def __call__(self, img, annotations=None, **kwargs):
+        assert len(img.shape) > 2
+        if self._rand_range() < self.prob:
+            return img[..., np.random.permutation(3)], annotations
+        else:
+            return img, annotations
 
 
-class ExpandTransform(Transform):
+@TRANSFORMS.register()
+class Expand(DefaultTransorm):
     """
     Expand the image and boxes according the specified expand ratio.
     """
 
-    def __init__(self, left, top, ratio, mean=(0, 0, 0)):
+    def __init__(self, ratio_range=(1, 4), mean=(0, 0, 0), prob=0.5):
+
         """
         Args:
             left, top (int): crop the image by img[top: top+h, left:left+w].
@@ -1455,6 +1628,18 @@ def __init__(self, left, top, ratio, mean=(0, 0, 0)):
         """
         super().__init__()
         self._set_attributes(locals())
+        self.min_ratio, self.max_ratio = ratio_range
+
+    def __call__(self, img, annotations=None, **kwargs):
+        if self._rand_range() < self.prob:
+            return img, annotations
+        else:
+            h, w, c = img.shape
+            ratio = np.random.uniform(self.min_ratio, self.max_ratio)
+            left = int(np.random.uniform(0, w * ratio - w))
+            top = int(np.random.uniform(0, h * ratio - h))
+            self.left, self.top, self.ratio = left, top, ratio
+            return super().__call__(img, annotations)
 
     def apply_image(self, img):
         """
@@ -1483,7 +1668,8 @@ def apply_coords(self, coords: np.ndarray) -> np.ndarray:
         return coords
 
 
-class ExtentTransform(Transform):
+@TRANSFORMS.register()
+class RandomExtent(DefaultTransorm):
     """
     Extracts a subregion from the source image and scales it to the output size.
 
@@ -1492,17 +1678,47 @@ class ExtentTransform(Transform):
 
     See: https://pillow.readthedocs.io/en/latest/PIL.html#PIL.ImageTransform.ExtentTransform
     """
-    def __init__(self, src_rect, output_size, interp=Image.LINEAR, fill=0):
+    def __init__(self, scale_range, shift_range, interp=Image.LINEAR, fill=0, prob=0.5):
         """
         Args:
-            src_rect (x0, y0, x1, y1): src coordinates
-            output_size (h, w): dst image size
-            interp: PIL interpolation methods
-            fill: Fill color used when src_rect extends outside image
+            scale_range (l, h): Range of input-to-output size scaling factor.
+            shift_range (x, y): Range of shifts of the cropped subrect. The rect
+                is shifted by [w / 2 * Uniform(-x, x), h / 2 * Uniform(-y, y)],
+                where (w, h) is the (width, height) of the input image. Set each
+                component to zero to crop at the image's center.
         """
         super().__init__()
         self._set_attributes(locals())
 
+    def __call__(self, img, annotations=None, **kwargs):
+
+        if self._rand_range() < self.prob:
+            return img, annotations
+        else:
+            img_h, img_w = img.shape[:2]
+
+            # Initialize src_rect to fit the input image.
+            src_rect = np.array([-0.5 * img_w, -0.5 * img_h, 0.5 * img_w, 0.5 * img_h])
+
+            # Apply a random scaling to the src_rect.
+            src_rect *= np.random.uniform(self.scale_range[0], self.scale_range[1])
+
+            # Apply a random shift to the coordinates origin.
+            src_rect[0::2] += self.shift_range[0] * img_w * (np.random.rand() - 0.5)
+            src_rect[1::2] += self.shift_range[1] * img_h * (np.random.rand() - 0.5)
+
+            # Map src_rect coordinates into image coordinates (center at corner).
+            src_rect[0::2] += 0.5 * img_w
+            src_rect[1::2] += 0.5 * img_h
+
+            self.src_rect = (src_rect[0], src_rect[1], src_rect[2], src_rect[3])
+            self.output_size = (
+                int(src_rect[3] - src_rect[1]),
+                int(src_rect[2] - src_rect[0]),
+            )
+
+            return super().__call__(img, annotations)
+
     def apply_image(self, img, interp=None):
         h, w = self.output_size
         ret = Image.fromarray(img).transform(
@@ -1533,7 +1749,7 @@ def apply_segmentation(self, segmentation):
         return segmentation
 
 
-class ResizeTransform(Transform):
+class ResizeTransform(DefaultTransorm):
     """
     Resize the image to a target size.
     """
@@ -1566,6 +1782,140 @@ def apply_segmentation(self, segmentation):
         return segmentation
 
 
+@TRANSFORMS.register()
+class Resize(ResizeTransform):
+    """
+    Resize image to a target size
+    """
+
+    def __init__(self, shape, interp=Image.BILINEAR):
+        """
+        Args:
+            shape: (h, w) tuple or a int.
+            interp: PIL interpolation method.
+        """
+        if isinstance(shape, int):
+            shape = (shape, shape)
+        shape = tuple(shape)
+        self._set_attributes(locals())
+
+    def __call__(self, img, annotations=None, **kwargs):
+        self.h, self.w, self.new_h, self.new_w = \
+            img.shape[0], img.shape[1], self.shape[0], self.shape[1]
+        return super().__call__(img, annotations)
+
+
+@TRANSFORMS.register()
+class ResizeLongestEdge(ResizeTransform):
+    """
+    Scale the longer edge to the given size.
+    """
+
+    def __init__(self, long_edge_length, sample_style="range", interp=Image.BILINEAR,
+                 jitter=(0.0, 32)):
+        """
+        Args:
+            long_edge_length (list[int]): If ``sample_style=="range"``,
+                a [min, max] interval from which to sample the shortest edge length.
+                If ``sample_style=="choice"``, a list of shortest edge lengths to sample from.
+            sample_style (str): either "range" or "choice".
+            interp: PIL interpolation method.
+        """
+        assert sample_style in ["range", "choice"], sample_style
+
+        self.is_range = sample_style == "range"
+        if isinstance(long_edge_length, int):
+            long_edge_length = (long_edge_length, long_edge_length)
+        self._set_attributes(locals())
+
+    def __call__(self, img, annotations=None, **kwargs):
+        h, w = img.shape[:2]
+        if self.is_range:
+            size = np.random.randint(
+                self.long_edge_length[0], self.long_edge_length[1] + 1
+            )
+        else:
+            size = np.random.choice(self.long_edge_length)
+        if size == 0:
+            return NoOpTransform()
+
+        if self.jitter[0] > 0:
+            dw = self.jitter[0] * w
+            dh = self.jitter[0] * h
+            size = max(h, w) + np.random.uniform(low=-max(dw, dh), high=max(dw, dh))
+            size -= size % self.jitter[1]
+
+        scale = size * 1.0 / max(h, w)
+        if h < w:
+            newh, neww = scale * h, size
+        else:
+            newh, neww = size, scale * w
+
+        neww = int(neww + 0.5)
+        newh = int(newh + 0.5)
+
+        self.h, self.w, self.new_h, self.new_w = h, w, newh, neww
+        return super().__call__(img, annotations)
+
+
+@TRANSFORMS.register()
+class ResizeShortestEdge(ResizeTransform):
+    """
+    Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge.
+    If `max_size` is reached, then downscale so that the longer edge does not exceed max_size.
+    """
+
+    def __init__(
+        self,
+        short_edge_length,
+        max_size=sys.maxsize,
+        sample_style="range",
+        interp=Image.BILINEAR,
+    ):
+        """
+        Args:
+            short_edge_length (list[int]): If ``sample_style=="range"``,
+                a [min, max] interval from which to sample the shortest edge length.
+                If ``sample_style=="choice"``, a list of shortest edge lengths to sample from.
+            max_size (int): maximum allowed longest edge length.
+            sample_style (str): either "range" or "choice".
+            interp: PIL interpolation method.
+        """
+        assert sample_style in ["range", "choice"], sample_style
+
+        self.is_range = sample_style == "range"
+        if isinstance(short_edge_length, int):
+            short_edge_length = (short_edge_length, short_edge_length)
+        self._set_attributes(locals())
+
+    def __call__(self, img, annotations=None, **kwargs):
+        h, w = img.shape[:2]
+
+        if self.is_range:
+            size = np.random.randint(
+                self.short_edge_length[0], self.short_edge_length[1] + 1
+            )
+        else:
+            size = np.random.choice(self.short_edge_length)
+        if size == 0:
+            return NoOpTransform()
+
+        scale = size * 1.0 / min(h, w)
+        if h < w:
+            newh, neww = size, scale * w
+        else:
+            newh, neww = scale * h, size
+        if max(newh, neww) > self.max_size:
+            scale = self.max_size * 1.0 / max(newh, neww)
+            newh = newh * scale
+            neww = neww * scale
+        neww = int(neww + 0.5)
+        newh = int(newh + 0.5)
+
+        self.h, self.w, self.new_h, self.new_w = h, w, newh, neww
+        return super().__call__(img, annotations)
+
+
 def HFlip_rotated_box(transform, rotated_boxes):
     """
     Apply the horizontal flip transform on rotated boxes.
@@ -1609,6 +1959,21 @@ def Resize_rotated_box(transform, rotated_boxes):
     return rotated_boxes
 
 
-HFlipTransform.register_type("rotated_box", HFlip_rotated_box)
+# RandomFlip is horizontal by default.
+RandomFlip.register_type("rotated_box", HFlip_rotated_box)
 NoOpTransform.register_type("rotated_box", lambda t, x: x)
 ResizeTransform.register_type("rotated_box", Resize_rotated_box)
+
+
+@TRANSFORMS.register()
+class TorchTransformGen(Transform):
+    """
+    Wrapper transfrom of transforms in torchvision.
+    It convert img (np.ndarray) to PIL image, and convert back to np.ndarray after transform.
+    """
+    def __init__(self, tfm):
+        self.tfm = tfm
+
+    def __call__(self, img: np.ndarray, annotations: None, **kwargs):
+        pil_image = Image.fromarray(img)
+        return np.array(self.tfm(pil_image)), annotations
diff --git a/cvpods/data/transforms/auto_aug.py b/cvpods/data/transforms/auto_aug.py
index 450544a..d959488 100644
--- a/cvpods/data/transforms/auto_aug.py
+++ b/cvpods/data/transforms/auto_aug.py
@@ -9,7 +9,8 @@
 import PIL
 from PIL import Image, ImageEnhance, ImageOps
 
-from cvpods.data.transforms import Transform
+from ..registry import TRANSFORMS
+from .augmentations import Transform
 
 _PIL_VER = tuple([int(x) for x in PIL.__version__.split('.')[:2]])
 
@@ -277,7 +278,8 @@ def _solarize_add_level_to_arg(level, _hparams):
 }
 
 
-class AutoAugmentTransform(Transform):
+@TRANSFORMS.register()
+class AutoAugment(Transform):
     """
     AutoAugment from Google.
     Implementation adapted from:
@@ -310,19 +312,16 @@ def __init__(self, name, prob=0.5, magnitude=10, hparams=None):
         # NOTE This is my own hack, being tested, not in papers or reference impls.
         self.magnitude_std = self.hparams.get('magnitude_std', 0)
 
-    def apply_image(self, img: np.ndarray) -> np.ndarray:
-        if random.random() > self.prob:
-            return img
-        magnitude = self.magnitude
-        if self.magnitude_std and self.magnitude_std > 0:
-            magnitude = random.gauss(magnitude, self.magnitude_std)
-        magnitude = min(_MAX_LEVEL, max(0, magnitude))  # clip to valid range
-        level_args = self.level_fn(
-            magnitude, self.hparams) if self.level_fn is not None else tuple()
-        return np.array(self.aug_fn(Image.fromarray(img), *level_args, **self.kwargs))
-
-    def apply_coords(self, coords: np.ndarray) -> np.ndarray:
-        return coords
+    def __call__(self, img: np.ndarray, annotations: list = None, **kwargs):
+        if random.random() < self.prob:
+            magnitude = self.magnitude
+            if self.magnitude_std and self.magnitude_std > 0:
+                magnitude = random.gauss(magnitude, self.magnitude_std)
+            magnitude = min(_MAX_LEVEL, max(0, magnitude))  # clip to valid range
+            level_args = self.level_fn(
+                magnitude, self.hparams) if self.level_fn is not None else tuple()
+            img = np.array(self.aug_fn(Image.fromarray(img), *level_args, **self.kwargs))
+        return img, annotations
 
 
 _RAND_TRANSFORMS = [
diff --git a/cvpods/data/transforms/transform_gen.py b/cvpods/data/transforms/transform_gen.py
deleted file mode 100644
index 0865b9b..0000000
--- a/cvpods/data/transforms/transform_gen.py
+++ /dev/null
@@ -1,1107 +0,0 @@
-#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
-# Copyright (c) BaseDetection, Inc. and its affiliates. All Rights Reserved
-
-import inspect
-import pprint
-import sys
-from abc import ABCMeta, abstractmethod
-
-import numpy as np
-from PIL import Image
-
-import torch
-
-from cvpods.structures import Boxes, BoxMode, pairwise_iou
-
-from ..registry import TRANSFORMS
-from .auto_aug import AutoAugmentTransform
-
-from .transform import (  # isort:skip
-    ScaleTransform,
-    AffineTransform,
-    BlendTransform,
-    IoUCropTransform,
-    CropTransform,
-    CropPadTransform,
-    HFlipTransform,
-    NoOpTransform,
-    VFlipTransform,
-    DistortTransform,
-    RandomSwapChannelsTransform,
-    ExpandTransform,
-    ExtentTransform,
-    ResizeTransform,
-    # Transforms used in ssl
-    GaussianBlurTransform,
-    GaussianBlurConvTransform,
-    SolarizationTransform,
-    ComposeTransform,
-    # LabSpaceTransform,
-    PadTransform,
-)
-
-__all__ = [
-    "Pad",
-    "RandomScale",
-    "Expand",
-    "MinIoURandomCrop",
-    "RandomSwapChannels",
-    "CenterAffine",
-    "RandomBrightness",
-    "RandomContrast",
-    "RandomCrop",
-    "RandomCropWithInstance",
-    "RandomCropWithMaxAreaLimit",
-    "RandomCropPad",
-    "RandomExtent",
-    "RandomFlip",
-    "RandomSaturation",
-    "RandomLighting",
-    "RandomDistortion",
-    "Resize",
-    "ResizeShortestEdge",
-    "ResizeLongestEdge",
-    "ShuffleList",
-    "RandomList",
-    "RepeatList",
-    "TransformGen",
-    "TorchTransformGen",
-    # transforms used in ssl
-    "GaussianBlur",
-    "GaussianBlurConv",
-    "Solarization",
-    "AutoAugment",
-]
-
-
-def check_dtype(img):
-    """
-    Check the image data type and dimensions to ensure that transforms can be applied on it.
-
-    Args:
-        img (np.array): image to be checked.
-    """
-    assert isinstance(
-        img, np.ndarray
-    ), "[TransformGen] Needs an numpy array, but got a {}!".format(type(img))
-    assert not isinstance(img.dtype, np.integer) or (
-        img.dtype == np.uint8
-    ), "[TransformGen] Got image of type {}, use uint8 or floating points instead!".format(
-        img.dtype
-    )
-    assert img.ndim in [2, 3], img.ndim
-
-
-@TRANSFORMS.register()
-class TransformGen(metaclass=ABCMeta):
-    """
-    TransformGen takes an image of type uint8 in range [0, 255], or
-    floating point in range [0, 1] or [0, 255] as input.
-
-    It creates a :class:`Transform` based on the given image, sometimes with randomness.
-    The transform can then be used to transform images
-    or other data (boxes, points, annotations, etc.) associated with it.
-
-    The assumption made in this class
-    is that the image itself is sufficient to instantiate a transform.
-    When this assumption is not true, you need to create the transforms by your own.
-
-    A list of `TransformGen` can be applied with :func:`apply_transform_gens`.
-    """
-
-    def _init(self, params=None):
-        if params:
-            for k, v in params.items():
-                if k != "self" and not k.startswith("_"):
-                    setattr(self, k, v)
-
-    @abstractmethod
-    def get_transform(self, img, annotations=None):
-        raise NotImplementedError
-
-    def __call__(self, img, annotations=None, **kwargs):
-        return self.get_transform(img, annotations)(img, annotations, **kwargs)
-
-    def _rand_range(self, low=1.0, high=None, size=None):
-        """
-        Uniform float random number between low and high.
-        """
-        if high is None:
-            low, high = 0, low
-        if size is None:
-            size = []
-        return np.random.uniform(low, high, size)
-
-    def __repr__(self):
-        """
-        Produce something like:
-        "MyTransformGen(field1={self.field1}, field2={self.field2})"
-        """
-        try:
-            sig = inspect.signature(self.__init__)
-            classname = type(self).__name__
-            argstr = []
-            for name, param in sig.parameters.items():
-                assert (
-                    param.kind != param.VAR_POSITIONAL
-                    and param.kind != param.VAR_KEYWORD
-                ), "The default __repr__ doesn't support *args or **kwargs"
-                assert hasattr(self, name), (
-                    "Attribute {} not found! "
-                    "Default __repr__ only works if attributes match the constructor.".format(
-                        name
-                    )
-                )
-                attr = getattr(self, name)
-                default = param.default
-                if default is attr:
-                    continue
-                argstr.append("{}={}".format(name, pprint.pformat(attr)))
-            return "{}({})".format(classname, ", ".join(argstr))
-        except AssertionError:
-            return super().__repr__()
-
-    __str__ = __repr__
-
-
-@TRANSFORMS.register()
-class RandomFlip(TransformGen):
-    """
-    Flip the image horizontally or vertically with the given probability.
-    """
-
-    def __init__(self, prob=0.5, *, horizontal=True, vertical=False):
-        """
-        Args:
-            prob (float): probability of flip.
-            horizontal (boolean): whether to apply horizontal flipping
-            vertical (boolean): whether to apply vertical flipping
-        """
-        super().__init__()
-
-        if horizontal and vertical:
-            raise ValueError(
-                "Cannot do both horiz and vert. Please use two Flip instead."
-            )
-        if not horizontal and not vertical:
-            raise ValueError("At least one of horiz or vert has to be True!")
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        h, w = img.shape[:2]
-        do = self._rand_range() < self.prob
-        if do:
-            if self.horizontal:
-                return HFlipTransform(w)
-            elif self.vertical:
-                return VFlipTransform(h)
-        else:
-            return NoOpTransform()
-
-
-@TRANSFORMS.register()
-class TorchTransformGen:
-    """
-    Wrapper transfrom of transforms in torchvision.
-    It convert img (np.ndarray) to PIL image, and convert back to np.ndarray after transform.
-    """
-    def __init__(self, tfm):
-        self.tfm = tfm
-
-    def __call__(self, img: np.ndarray, annotations: None, **kwargs):
-        pil_image = Image.fromarray(img)
-        return np.array(self.tfm(pil_image)), annotations
-
-
-@TRANSFORMS.register()
-class RandomDistortion(TransformGen):
-    """
-    Random distort image's hue, saturation and exposure.
-    """
-
-    def __init__(self, hue, saturation, exposure, image_format="BGR"):
-        """
-        RandomDistortion Initialization.
-        Args:
-            hue (float): value of hue
-            saturation (float): value of saturation
-            exposure (float): value of exposure
-        """
-        assert image_format in ["RGB", "BGR"]
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        return DistortTransform(self.hue, self.saturation, self.exposure, self.image_format)
-
-
-@TRANSFORMS.register()
-class CenterAffine(TransformGen):
-    """
-    Affine Transform for CenterNet
-    """
-
-    def __init__(self, boarder, output_size, pad_value=[0, 0, 0], random_aug=True):
-        """
-        output_size (w, h) shape
-        """
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        img_shape = img.shape[:2]
-        center, scale = self.generate_center_and_scale(img_shape)
-        src, dst = self.generate_src_and_dst(center, scale, self.output_size)
-        return AffineTransform(src, dst, self.output_size, self.pad_value)
-
-    @staticmethod
-    def _get_boarder(boarder, size):
-        """
-        This func may be rewirite someday
-        """
-        i = 1
-        size //= 2
-        while size <= boarder // i:
-            i *= 2
-        return boarder // i
-
-    def generate_center_and_scale(self, img_shape):
-        """
-        generate center
-        shpae : (h, w)
-        """
-        height, width = img_shape
-        center = np.array([width / 2, height / 2], dtype=np.float32)
-        scale = float(max(img_shape))
-        if self.random_aug:
-            scale = scale * np.random.choice(np.arange(0.6, 1.4, 0.1))
-            h_boarder = self._get_boarder(self.boarder, height)
-            w_boarder = self._get_boarder(self.boarder, width)
-            center[0] = np.random.randint(low=w_boarder, high=width - w_boarder)
-            center[1] = np.random.randint(low=h_boarder, high=height - h_boarder)
-        else:
-            pass
-
-        return center, scale
-
-    @staticmethod
-    def generate_src_and_dst(center, scale, output_size):
-        if not isinstance(scale, np.ndarray) and not isinstance(scale, list):
-            scale = np.array([scale, scale], dtype=np.float32)
-        src = np.zeros((3, 2), dtype=np.float32)
-        src_w = scale[0]
-        src_dir = [0, src_w * -0.5]
-        src[0, :] = center
-        src[1, :] = src[0, :] + src_dir
-        src[2, :] = src[1, :] + (src_dir[1], -src_dir[0])
-
-        dst = np.zeros((3, 2), dtype=np.float32)
-        dst_w, dst_h = output_size
-        dst_dir = [0, dst_w * -0.5]
-        dst[0, :] = [dst_w * 0.5, dst_h * 0.5]
-        dst[1, :] = dst[0, :] + dst_dir
-        dst[2, :] = dst[1, :] + (dst_dir[1], -dst_dir[0])
-
-        return src, dst
-
-
-@TRANSFORMS.register()
-class GaussianBlur(TransformGen):
-    """
-    Gaussian blur transform.
-    """
-    def __init__(self, sigma, p=1.0):
-        """
-        Args:
-            sigma (List(float)): sigma of gaussian
-            p (float): probability of perform this augmentation
-        """
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        return GaussianBlurTransform(self.sigma, self.p)
-
-
-@TRANSFORMS.register()
-class Solarization(TransformGen):
-    def __init__(self, threshold=128, p=0.5):
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        return SolarizationTransform(self.threshold, self.p)
-
-
-@TRANSFORMS.register()
-class GaussianBlurConv(TransformGen):
-    def __init__(self, kernel_size, p):
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        return GaussianBlurConvTransform(self.kernel_size, self.p)
-
-
-@TRANSFORMS.register()
-class Resize(TransformGen):
-    """
-    Resize image to a target size
-    """
-
-    def __init__(self, shape, interp=Image.BILINEAR):
-        """
-        Args:
-            shape: (h, w) tuple or a int.
-            interp: PIL interpolation method.
-        """
-        if isinstance(shape, int):
-            shape = (shape, shape)
-        shape = tuple(shape)
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        return ResizeTransform(
-            img.shape[0], img.shape[1], self.shape[0], self.shape[1], self.interp
-        )
-
-
-@TRANSFORMS.register()
-class ResizeLongestEdge(TransformGen):
-    """
-    Scale the longer edge to the given size.
-    """
-
-    def __init__(self, long_edge_length, sample_style="range", interp=Image.BILINEAR,
-                 jitter=(0.0, 32)):
-        """
-        Args:
-            long_edge_length (list[int]): If ``sample_style=="range"``,
-                a [min, max] interval from which to sample the shortest edge length.
-                If ``sample_style=="choice"``, a list of shortest edge lengths to sample from.
-            sample_style (str): either "range" or "choice".
-            interp: PIL interpolation method.
-        """
-        super().__init__()
-        assert sample_style in ["range", "choice"], sample_style
-
-        self.is_range = sample_style == "range"
-        if isinstance(long_edge_length, int):
-            long_edge_length = (long_edge_length, long_edge_length)
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        h, w = img.shape[:2]
-        if self.is_range:
-            size = np.random.randint(
-                self.long_edge_length[0], self.long_edge_length[1] + 1
-            )
-        else:
-            size = np.random.choice(self.long_edge_length)
-        if size == 0:
-            return NoOpTransform()
-
-        if self.jitter[0] > 0:
-            dw = self.jitter[0] * w
-            dh = self.jitter[0] * h
-            size = max(h, w) + np.random.uniform(low=-max(dw, dh), high=max(dw, dh))
-            size -= size % self.jitter[1]
-
-        scale = size * 1.0 / max(h, w)
-        if h < w:
-            newh, neww = scale * h, size
-        else:
-            newh, neww = size, scale * w
-
-        neww = int(neww + 0.5)
-        newh = int(newh + 0.5)
-
-        return ResizeTransform(h, w, newh, neww, self.interp)
-
-
-@TRANSFORMS.register()
-class ResizeShortestEdge(TransformGen):
-    """
-    Scale the shorter edge to the given size, with a limit of `max_size` on the longer edge.
-    If `max_size` is reached, then downscale so that the longer edge does not exceed max_size.
-    """
-
-    def __init__(
-        self,
-        short_edge_length,
-        max_size=sys.maxsize,
-        sample_style="range",
-        interp=Image.BILINEAR,
-    ):
-        """
-        Args:
-            short_edge_length (list[int]): If ``sample_style=="range"``,
-                a [min, max] interval from which to sample the shortest edge length.
-                If ``sample_style=="choice"``, a list of shortest edge lengths to sample from.
-            max_size (int): maximum allowed longest edge length.
-            sample_style (str): either "range" or "choice".
-            interp: PIL interpolation method.
-        """
-        super().__init__()
-        assert sample_style in ["range", "choice"], sample_style
-
-        self.is_range = sample_style == "range"
-        if isinstance(short_edge_length, int):
-            short_edge_length = (short_edge_length, short_edge_length)
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        h, w = img.shape[:2]
-
-        if self.is_range:
-            size = np.random.randint(
-                self.short_edge_length[0], self.short_edge_length[1] + 1
-            )
-        else:
-            size = np.random.choice(self.short_edge_length)
-        if size == 0:
-            return NoOpTransform()
-
-        scale = size * 1.0 / min(h, w)
-        if h < w:
-            newh, neww = size, scale * w
-        else:
-            newh, neww = scale * h, size
-        if max(newh, neww) > self.max_size:
-            scale = self.max_size * 1.0 / max(newh, neww)
-            newh = newh * scale
-            neww = neww * scale
-        neww = int(neww + 0.5)
-        newh = int(newh + 0.5)
-        return ResizeTransform(h, w, newh, neww, self.interp)
-
-
-@TRANSFORMS.register()
-class RandomCrop(TransformGen):
-    """
-    Randomly crop a subimage out of an image.
-    """
-
-    def __init__(self, crop_type: str, crop_size, strict_mode=True):
-        """
-        Args:
-            crop_type (str): one of "relative_range", "relative", "absolute".
-                See `config/defaults.py` for explanation.
-            crop_size (tuple[float]): the relative ratio or absolute pixels of
-                height and width
-            strict_mode (bool): if `True`, the target `crop_size` must be smaller than
-                the original image size.
-        """
-        super().__init__()
-        assert crop_type in ["relative_range", "relative", "absolute"]
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        h, w = img.shape[:2]
-        croph, cropw = self.get_crop_size((h, w))
-        if self.strict_mode:
-            assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(
-                self
-            )
-        offset_range_h = max(h - croph, 0)
-        offset_range_w = max(w - cropw, 0)
-        h0 = np.random.randint(offset_range_h + 1)
-        w0 = np.random.randint(offset_range_w + 1)
-        return CropTransform(w0, h0, cropw, croph)
-
-    def get_crop_size(self, image_size):
-        """
-        Args:
-            image_size (tuple): height, width
-
-        Returns:
-            crop_size (tuple): height, width in absolute pixels
-        """
-        h, w = image_size
-        if self.crop_type == "relative":
-            ch, cw = self.crop_size
-            return int(h * ch + 0.5), int(w * cw + 0.5)
-        elif self.crop_type == "relative_range":
-            crop_size = np.asarray(self.crop_size, dtype=np.float32)
-            ch, cw = crop_size + np.random.rand(2) * (1 - crop_size)
-            return int(h * ch + 0.5), int(w * cw + 0.5)
-        elif self.crop_type == "absolute":
-            return self.crop_size
-        else:
-            NotImplementedError("Unknown crop type {}".format(self.crop_type))
-
-
-@TRANSFORMS.register()
-class RandomCropWithInstance(RandomCrop):
-    """
-    Make sure the cropping region contains the center of a random instance from annotations.
-    """
-
-    def get_transform(self, img, annotations=None):
-        h, w = img.shape[:2]
-        croph, cropw = self.get_crop_size((h, w))
-        if self.strict_mode:
-            assert h >= croph and w >= cropw, "Shape computation in {} has bugs.".format(
-                self
-            )
-        offset_range_h = max(h - croph, 0)
-        offset_range_w = max(w - cropw, 0)
-        # Make sure there is always at least one instance in the image
-        assert annotations is not None, "Can not get annotations infos."
-        instance = np.random.choice(annotations)
-        bbox = BoxMode.convert(instance["bbox"], instance["bbox_mode"], BoxMode.XYXY_ABS)
-        bbox = torch.tensor(bbox)
-        center_xy = (bbox[:2] + bbox[2:]) / 2.0
-
-        offset_range_h_min = max(center_xy[1] - croph, 0)
-        offset_range_w_min = max(center_xy[0] - cropw, 0)
-        offset_range_h_max = min(offset_range_h, center_xy[1] - 1)
-        offset_range_w_max = min(offset_range_w, center_xy[0] - 1)
-
-        h0 = np.random.randint(offset_range_h_min, offset_range_h_max + 1)
-        w0 = np.random.randint(offset_range_w_min, offset_range_w_max + 1)
-        return CropTransform(w0, h0, cropw, croph)
-
-
-@TRANSFORMS.register()
-class RandomCropWithMaxAreaLimit(RandomCrop):
-    """
-    Find a cropping window such that no single category occupies more than
-    `single_category_max_area` in `sem_seg`.
-
-    The function retries random cropping 10 times max.
-    """
-
-    def __init__(self, crop_type: str, crop_size, strict_mode=True,
-                 single_category_max_area=1.0, ignore_value=255):
-        super().__init__(crop_type, crop_size, strict_mode)
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        if self.single_category_max_area >= 1.0:
-            crop_tfm = super().get_transform(img, annotations)
-        else:
-            h, w = img.shape[:2]
-            assert "sem_seg" in annotations[0]
-            sem_seg = annotations[0]["sem_seg"]
-            croph, cropw = self.get_crop_size((h, w))
-            for _ in range(10):
-                y0 = np.random.randint(h - croph + 1)
-                x0 = np.random.randint(w - cropw + 1)
-                sem_seg_temp = sem_seg[y0: y0 + croph, x0: x0 + cropw]
-                labels, cnt = np.unique(sem_seg_temp, return_counts=True)
-                cnt = cnt[labels != self.ignore_value]
-                if len(cnt) > 1 and np.max(cnt) / np.sum(cnt) < self.single_category_max_area:
-                    break
-            crop_tfm = CropTransform(x0, y0, cropw, croph)
-        return crop_tfm
-
-
-@TRANSFORMS.register()
-class RandomCropPad(RandomCrop):
-    """
-    Randomly crop and pad a subimage out of an image.
-    """
-    def __init__(self,
-                 crop_type: str,
-                 crop_size,
-                 img_value=None,
-                 seg_value=None):
-        super().__init__(crop_type, crop_size, strict_mode=False)
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        h, w = img.shape[:2]
-        croph, cropw = self.get_crop_size((h, w))
-        h0 = np.random.randint(h - croph + 1) if h >= croph else 0
-        w0 = np.random.randint(w - cropw + 1) if w >= cropw else 0
-        dh = min(h, croph)
-        dw = min(w, cropw)
-        # print(w0, h0, dw, dh)
-        return CropPadTransform(w0, h0, dw, dh, cropw, croph, self.img_value,
-                                self.seg_value)
-
-
-@TRANSFORMS.register()
-class RandomExtent(TransformGen):
-    """
-    Outputs an image by cropping a random "subrect" of the source image.
-
-    The subrect can be parameterized to include pixels outside the source image,
-    in which case they will be set to zeros (i.e. black). The size of the output
-    image will vary with the size of the random subrect.
-    """
-
-    def __init__(self, scale_range, shift_range):
-        """
-        Args:
-            scale_range (l, h): Range of input-to-output size scaling factor.
-            shift_range (x, y): Range of shifts of the cropped subrect. The rect
-                is shifted by [w / 2 * Uniform(-x, x), h / 2 * Uniform(-y, y)],
-                where (w, h) is the (width, height) of the input image. Set each
-                component to zero to crop at the image's center.
-        """
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        img_h, img_w = img.shape[:2]
-
-        # Initialize src_rect to fit the input image.
-        src_rect = np.array([-0.5 * img_w, -0.5 * img_h, 0.5 * img_w, 0.5 * img_h])
-
-        # Apply a random scaling to the src_rect.
-        src_rect *= np.random.uniform(self.scale_range[0], self.scale_range[1])
-
-        # Apply a random shift to the coordinates origin.
-        src_rect[0::2] += self.shift_range[0] * img_w * (np.random.rand() - 0.5)
-        src_rect[1::2] += self.shift_range[1] * img_h * (np.random.rand() - 0.5)
-
-        # Map src_rect coordinates into image coordinates (center at corner).
-        src_rect[0::2] += 0.5 * img_w
-        src_rect[1::2] += 0.5 * img_h
-
-        return ExtentTransform(
-            src_rect=(src_rect[0], src_rect[1], src_rect[2], src_rect[3]),
-            output_size=(
-                int(src_rect[3] - src_rect[1]),
-                int(src_rect[2] - src_rect[0]),
-            ),
-        )
-
-
-@TRANSFORMS.register()
-class RandomContrast(TransformGen):
-    """
-    Randomly transforms image contrast.
-
-    Contrast intensity is uniformly sampled in (intensity_min, intensity_max).
-    - intensity < 1 will reduce contrast
-    - intensity = 1 will preserve the input image
-    - intensity > 1 will increase contrast
-
-    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
-    """
-
-    def __init__(self, intensity_min, intensity_max, prob=1.0):
-        """
-        Args:
-            intensity_min (float): Minimum augmentation.
-            intensity_max (float): Maximum augmentation.
-            prob (float): probability of transforms image contrast.
-        """
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        do = self._rand_range() < self.prob
-        if do:
-            w = np.random.uniform(self.intensity_min, self.intensity_max)
-            return BlendTransform(src_image=img.mean(), src_weight=1 - w, dst_weight=w)
-        else:
-            return NoOpTransform()
-
-
-@TRANSFORMS.register()
-class RandomBrightness(TransformGen):
-    """
-    Randomly transforms image brightness.
-
-    Brightness intensity is uniformly sampled in (intensity_min, intensity_max).
-    - intensity < 1 will reduce brightness
-    - intensity = 1 will preserve the input image
-    - intensity > 1 will increase brightness
-
-    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
-    """
-
-    def __init__(self, intensity_min, intensity_max, prob=1.):
-        """
-        Args:
-            intensity_min (float): Minimum augmentation.
-            intensity_max (float): Maximum augmentation.
-            prob (float): probability of transforms image brightness.
-        """
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        do = self._rand_range() < self.prob
-        if do:
-            w = np.random.uniform(self.intensity_min, self.intensity_max)
-            return BlendTransform(src_image=0, src_weight=1 - w, dst_weight=w)
-        else:
-            return NoOpTransform()
-
-
-@TRANSFORMS.register()
-class RandomSaturation(TransformGen):
-    """
-    Randomly transforms image saturation.
-
-    Saturation intensity is uniformly sampled in (intensity_min, intensity_max).
-    - intensity < 1 will reduce saturation (make the image more grayscale)
-    - intensity = 1 will preserve the input image
-    - intensity > 1 will increase saturation
-
-    See: https://pillow.readthedocs.io/en/3.0.x/reference/ImageEnhance.html
-    """
-
-    def __init__(self, intensity_min, intensity_max, prob=1.0):
-        """
-        Args:
-            intensity_min (float): Minimum augmentation (1 preserves input).
-            intensity_max (float): Maximum augmentation (1 preserves input).
-            prob (float): probability of transforms image saturation.
-        """
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        do = self._rand_range() < self.prob
-        if do:
-            assert img.shape[-1] == 3, "Saturation only works on RGB images"
-            w = np.random.uniform(self.intensity_min, self.intensity_max)
-            grayscale = img.dot([0.299, 0.587, 0.114])[:, :, np.newaxis]
-            return BlendTransform(src_image=grayscale, src_weight=1 - w, dst_weight=w)
-        else:
-            return NoOpTransform()
-
-
-@TRANSFORMS.register()
-class RandomLighting(TransformGen):
-    """
-    Randomly transforms image color using fixed PCA over ImageNet.
-
-    The degree of color jittering is randomly sampled via a normal distribution,
-    with standard deviation given by the scale parameter.
-    """
-
-    def __init__(self, scale):
-        """
-        Args:
-            scale (float): Standard deviation of principal component weighting.
-        """
-        super().__init__()
-        self._init(locals())
-        self.eigen_vecs = np.array(
-            [
-                [-0.5675, 0.7192, 0.4009],
-                [-0.5808, -0.0045, -0.8140],
-                [-0.5836, -0.6948, 0.4203],
-            ]
-        )
-        self.eigen_vals = np.array([0.2175, 0.0188, 0.0045])
-
-    def get_transform(self, img, annotations=None):
-        assert img.shape[-1] == 3, "Saturation only works on RGB images"
-        weights = np.random.normal(scale=self.scale, size=3)
-        return BlendTransform(
-            src_image=self.eigen_vecs.dot(weights * self.eigen_vals),
-            src_weight=1.0,
-            dst_weight=1.0,
-        )
-
-
-@TRANSFORMS.register()
-class RandomSwapChannels(TransformGen):
-    """
-    Randomly swap image channels.
-    """
-
-    def __init__(self, prob=0.5):
-        """
-        Args:
-            prob (float): probability of swap channels.
-        """
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        _, w = img.shape[:2]
-        do = self._rand_range() < self.prob
-        if do:
-            return RandomSwapChannelsTransform()
-        else:
-            return NoOpTransform()
-
-
-@TRANSFORMS.register()
-class MinIoURandomCrop(TransformGen):
-    """
-    Random crop the image & bboxes, the cropped patches have minimum IoU
-    requirement with original image & bboxes, the IoU threshold is randomly
-    selected from min_ious.
-    """
-
-    def __init__(self, min_ious=(0.1, 0.3, 0.5, 0.7, 0.9), min_crop_size=0.3):
-        """
-        Args:
-            min_ious (tuple): minimum IoU threshold for all intersections with bounding boxes
-            min_crop_size (float): minimum crop's size
-                (i.e. h,w := a*h, a*w, where a >= min_crop_size).
-        """
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img, annotations):
-        """
-        Args:
-            img (ndarray): of shape HxWxC(RGB). The array can be of type uint8
-                in range [0, 255], or floating point in range [0, 255].
-            annotations (list[dict[str->str]]):
-                Each item in the list is a bbox label of an object. The object is
-                    represented by a dict,
-                which contains:
-                 - bbox (list): bbox coordinates, top left and bottom right.
-                 - bbox_mode (str): bbox label mode, for example: `XYXY_ABS`,
-                    `XYWH_ABS` and so on...
-        """
-        sample_mode = (1, *self.min_ious, 0)
-        h, w = img.shape[:2]
-
-        boxes = list()
-        for obj in annotations:
-            boxes.append(BoxMode.convert(obj["bbox"], obj["bbox_mode"], BoxMode.XYXY_ABS))
-        boxes = torch.tensor(boxes)
-
-        while True:
-            mode = np.random.choice(sample_mode)
-            if mode == 1:
-                return NoOpTransform()
-
-            min_iou = mode
-            for i in range(50):
-                new_w = np.random.uniform(self.min_crop_size * w, w)
-                new_h = np.random.uniform(self.min_crop_size * h, h)
-
-                # h / w in [0.5, 2]
-                if new_h / new_w < 0.5 or new_h / new_w > 2:
-                    continue
-
-                left = np.random.uniform(w - new_w)
-                top = np.random.uniform(h - new_h)
-
-                patch = np.array(
-                    (int(left), int(top), int(left + new_w), int(top + new_h)))
-
-                overlaps = pairwise_iou(
-                    Boxes(patch.reshape(-1, 4)),
-                    Boxes(boxes.reshape(-1, 4))
-                )
-
-                if overlaps.min() < min_iou:
-                    continue
-
-                # center of boxes should inside the crop img
-                center = (boxes[:, :2] + boxes[:, 2:]) / 2
-                mask = ((center[:, 0] > patch[0]) * (center[:, 1] > patch[1])
-                        * (center[:, 0] < patch[2]) * (center[:, 1] < patch[3]))
-                if not mask.any():
-                    continue
-                return IoUCropTransform(int(left), int(top), int(new_w), int(new_h))
-
-
-@TRANSFORMS.register()
-class Expand(TransformGen):
-    """
-    Random Expand the image & bboxes.
-    """
-
-    def __init__(self, ratio_range=(1, 4), mean=(0, 0, 0), prob=0.5):
-        """
-        Args:
-            ratio_range (tuple): range of expand ratio.
-            mean (tuple): mean value of dataset.
-            prob (float): probability of applying this transformation.
-        """
-        super().__init__()
-        self._init(locals())
-        self.min_ratio, self.max_ratio = ratio_range
-
-    def get_transform(self, img, annotations=None):
-        if np.random.uniform(0, 1) > self.prob:
-            return NoOpTransform()
-        h, w, c = img.shape
-        ratio = np.random.uniform(self.min_ratio, self.max_ratio)
-        left = int(np.random.uniform(0, w * ratio - w))
-        top = int(np.random.uniform(0, h * ratio - h))
-        return ExpandTransform(left, top, ratio, self.mean)
-
-
-@TRANSFORMS.register()
-class RandomScale(TransformGen):
-    """
-    Randomly scale the image according to the specified output size and scale ratio range.
-
-    This transform has the following three steps:
-
-        1. select a random scale factor according to the specified scale ratio range.
-        2. recompute the accurate scale_factor using rounded scaled image size.
-        3. select non-zero random offset (x, y) if scaled image is larger than output_size.
-    """
-
-    def __init__(self, output_size, ratio_range=(0.1, 2), interp="BILINEAR"):
-        """
-        Args:
-            output_size (tuple): image output size.
-            ratio_range (tuple): range of scale ratio.
-            interp (str): the interpolation method. Options includes:
-              * "NEAREST"
-              * "BILINEAR"
-              * "BICUBIC"
-              * "LANCZOS"
-              * "HAMMING"
-              * "BOX"
-        """
-        super().__init__()
-        self._init(locals())
-        self.min_ratio, self.max_ratio = ratio_range
-        if isinstance(self.output_size, int):
-            self.output_size = [self.output_size] * 2
-
-    def get_transform(self, img, annotations=None):
-        h, w = img.shape[:2]
-        output_h, output_w = self.output_size
-
-        # 1. Select a random scale factor.
-        random_scale_factor = np.random.uniform(self.min_ratio, self.max_ratio)
-
-        scaled_size_h = int(random_scale_factor * output_h)
-        scaled_size_w = int(random_scale_factor * output_w)
-
-        # 2. Recompute the accurate scale_factor using rounded scaled image size.
-        image_scale_h = scaled_size_h * 1.0 / h
-        image_scale_w = scaled_size_w * 1.0 / w
-        image_scale = min(image_scale_h, image_scale_w)
-
-        # 3. Select non-zero random offset (x, y) if scaled image is larger than output_size.
-        scaled_h = int(h * 1.0 * image_scale)
-        scaled_w = int(w * 1.0 * image_scale)
-
-        return ScaleTransform(h, w, scaled_h, scaled_w, self.interp)
-
-
-@TRANSFORMS.register()
-class AutoAugment(TransformGen):
-    """
-    Convert any of AutoAugment into a cvpods-fashion Transform such that can be configured in
-        config.py
-    """
-    def __init__(self, name, prob=0.5, magnitude=10, hparams=None):
-        """
-        Args:
-            name (str): any type of transforms list in _RAND_TRANSFORMS.
-            prob (float): probability of perform current augmentation.
-            magnitude (int): intensity / magnitude of each augmentation.
-            hparams (dict): hyper-parameters required by each augmentation.
-        """
-
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        return AutoAugmentTransform(self.name, self.prob, self.magnitude, self.hparams)
-
-
-@TRANSFORMS.register()
-class Pad(TransformGen):
-    """
-    Pad image with `pad_value` to the specified `target_h` and `target_w`.
-
-    Adds `top` rows of `pad_value` on top, `left` columns of `pad_value` on the left,
-    and then pads the image on the bottom and right with `pad_value` until it has
-    dimensions `target_h`, `target_w`.
-
-    This op does nothing if `top` and `left` is zero and the image already has size
-    `target_h` by `target_w`.
-    """
-
-    def __init__(self, top, left, target_h, target_w, pad_value=0):
-        """
-        Args:
-            top (int): number of rows of `pad_value` to add on top.
-            left (int): number of columns of `pad_value` to add on the left.
-            target_h (int): height of output image.
-            target_w (int): width of output image.
-            pad_value (int): the value used to pad the image.
-        """
-        super().__init__()
-        self._init(locals())
-
-    def get_transform(self, img, annotations=None):
-        return PadTransform(self.top, self.left, self.target_h, self.target_w, self.pad_value)
-
-
-@TRANSFORMS.register()
-class RandomList(TransformGen):
-    """
-    Random select subset of provided augmentations.
-    """
-    def __init__(self, transforms, num_layers=2, choice_weights=None):
-        """
-        Args:
-            transforms (List[TorchTransformGen]): list of transforms need to be performed.
-            num_layers (int): parameters of np.random.choice.
-            choice_weights (optional, float): parameters of np.random.choice.
-        """
-        self.transforms = transforms
-        self.num_layers = num_layers
-        self.choice_weights = choice_weights
-
-    def get_transform(self, img, annotations=None):
-        tfms = np.random.choice(
-            self.transforms,
-            self.num_layers,
-            replace=self.choice_weights is None,
-            p=self.choice_weights)
-        return ComposeTransform(tfms)
-
-
-@TRANSFORMS.register()
-class ShuffleList(TransformGen):
-    """
-    Randomly shuffle the `transforms` order.
-    """
-
-    def __init__(self, transforms):
-        """
-        Args:
-            transforms (list[TransformGen]): List of transform to be shuffled.
-        """
-        super().__init__()
-        self.transforms = transforms
-
-    def get_transform(self, img, annotations=None):
-        np.random.shuffle(self.transforms)
-        return ComposeTransform(self.transforms)
-
-
-@TRANSFORMS.register()
-class RepeatList(TransformGen):
-    """
-    Forward several times of provided transforms for a given image.
-    """
-    def __init__(self, transforms, repeat_times):
-        """
-        Args:
-            transforms (list[TransformGen]): List of transform to be repeated.
-            repeat_times (int): number of duplicates desired.
-        """
-        super().__init__()
-        self.transforms = transforms
-        self.times = repeat_times
-
-    def get_transform(self, img, annotations=None):
-        return ComposeTransform(self.transforms)
-
-    def __call__(self, img, annotations=None, **kwargs):
-        repeat_imgs = []
-        repeat_annotations = []
-        for t in range(self.times):
-            tmp_img, tmp_anno = self.get_transform(img)(img, annotations, **kwargs)
-            repeat_imgs.append(tmp_img)
-            repeat_annotations.append(tmp_anno)
-        repeat_imgs = np.stack(repeat_imgs, axis=0)
-        return repeat_imgs, repeat_annotations
diff --git a/cvpods/modeling/meta_arch/centernet.py b/cvpods/modeling/meta_arch/centernet.py
index ee4699f..207ee9c 100644
--- a/cvpods/modeling/meta_arch/centernet.py
+++ b/cvpods/modeling/meta_arch/centernet.py
@@ -11,7 +11,7 @@
 import torch.nn as nn
 import torch.nn.functional as F
 
-from cvpods.data.transforms.transform_gen import CenterAffine
+from cvpods.data.transforms.augmentations import CenterAffine
 from cvpods.layers import DeformConvWithOff, ModulatedDeformConvWithOff, ShapeSpec
 from cvpods.modeling.losses import reg_l1_loss
 from cvpods.modeling.nn_utils.feature_utils import gather_feature
diff --git a/playground/detection/coco/retinanet/retinanet.res50.fpn.coco.multiscale.1x/README.md b/playground/detection/coco/retinanet/retinanet.res50.fpn.coco.multiscale.1x/README.md
index 6917d50..9eef717 100644
--- a/playground/detection/coco/retinanet/retinanet.res50.fpn.coco.multiscale.1x/README.md
+++ b/playground/detection/coco/retinanet/retinanet.res50.fpn.coco.multiscale.1x/README.md
@@ -1,50 +1,55 @@
 # retinanet.res50.fpn.coco.multiscale.1x  
+
+seed: 54373550
+
 ## Evaluation results for bbox:  
+
 ```  
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.365
- Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.562
- Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.393
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.219
- Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.405
- Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.477
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.314
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.503
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.534
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.348
- Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.579
- Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.683
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.370
+ Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.561
+ Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.395
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.227
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.407
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.485
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.318
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.508
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.541
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.355
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.583
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.692
 ```  
 |   AP   |  AP50  |  AP75  |  APs   |  APm   |  APl   |  
 |:------:|:------:|:------:|:------:|:------:|:------:|  
-| 36.454 | 56.242 | 39.328 | 21.898 | 40.478 | 47.747 |
+| 36.972 | 56.094 | 39.523 | 22.692 | 40.713 | 48.540 |
+
 ### Per-category bbox AP:  
 
 | category      | AP     | category     | AP     | category       | AP     |  
 |:--------------|:-------|:-------------|:-------|:---------------|:-------|  
-| person        | 49.847 | bicycle      | 27.800 | car            | 39.543 |  
-| motorcycle    | 39.535 | airplane     | 60.788 | bus            | 62.082 |  
-| train         | 57.897 | truck        | 32.374 | boat           | 23.642 |  
-| traffic light | 23.785 | fire hydrant | 61.860 | stop sign      | 62.912 |  
-| parking meter | 42.134 | bench        | 20.323 | bird           | 32.035 |  
-| cat           | 62.643 | dog          | 59.460 | horse          | 51.436 |  
-| sheep         | 45.442 | cow          | 49.267 | elephant       | 55.752 |  
-| bear          | 69.257 | zebra        | 62.308 | giraffe        | 62.423 |  
-| backpack      | 12.308 | umbrella     | 32.660 | handbag        | 11.692 |  
-| tie           | 26.095 | suitcase     | 29.544 | frisbee        | 62.662 |  
-| skis          | 18.217 | snowboard    | 22.185 | sports ball    | 43.453 |  
-| kite          | 37.063 | baseball bat | 21.984 | baseball glove | 31.171 |  
-| skateboard    | 48.046 | surfboard    | 30.663 | tennis racket  | 44.667 |  
-| bottle        | 32.999 | wine glass   | 30.731 | cup            | 37.611 |  
-| fork          | 22.843 | knife        | 9.941  | spoon          | 10.597 |  
-| bowl          | 37.554 | banana       | 21.658 | apple          | 16.915 |  
-| sandwich      | 28.278 | orange       | 27.994 | broccoli       | 20.984 |  
-| carrot        | 19.075 | hot dog      | 27.483 | pizza          | 46.153 |  
-| donut         | 39.205 | cake         | 30.042 | chair          | 23.022 |  
-| couch         | 36.458 | potted plant | 23.638 | bed            | 39.539 |  
-| dining table  | 24.671 | toilet       | 54.413 | tv             | 53.012 |  
-| laptop        | 52.960 | mouse        | 59.783 | remote         | 24.399 |  
-| keyboard      | 42.980 | cell phone   | 32.588 | microwave      | 53.939 |  
-| oven          | 31.974 | toaster      | 16.414 | sink           | 31.876 |  
-| refrigerator  | 46.588 | book         | 11.818 | clock          | 48.778 |  
-| vase          | 34.466 | scissors     | 25.628 | teddy bear     | 45.000 |  
-| hair drier    | 0.428  | toothbrush   | 14.942 |                |        |
+| person        | 50.773 | bicycle      | 27.125 | car            | 39.880 |  
+| motorcycle    | 40.405 | airplane     | 63.344 | bus            | 63.718 |  
+| train         | 59.472 | truck        | 33.432 | boat           | 23.513 |  
+| traffic light | 25.051 | fire hydrant | 63.709 | stop sign      | 62.338 |  
+| parking meter | 43.618 | bench        | 20.839 | bird           | 32.856 |  
+| cat           | 64.382 | dog          | 60.487 | horse          | 51.448 |  
+| sheep         | 46.863 | cow          | 49.293 | elephant       | 56.725 |  
+| bear          | 67.596 | zebra        | 64.431 | giraffe        | 62.447 |  
+| backpack      | 12.816 | umbrella     | 33.575 | handbag        | 11.732 |  
+| tie           | 26.924 | suitcase     | 32.361 | frisbee        | 61.239 |  
+| skis          | 16.860 | snowboard    | 18.176 | sports ball    | 43.461 |  
+| kite          | 36.440 | baseball bat | 23.016 | baseball glove | 30.666 |  
+| skateboard    | 46.296 | surfboard    | 30.590 | tennis racket  | 43.959 |  
+| bottle        | 34.105 | wine glass   | 32.470 | cup            | 38.667 |  
+| fork          | 21.774 | knife        | 10.702 | spoon          | 8.498  |  
+| bowl          | 38.039 | banana       | 21.952 | apple          | 18.406 |  
+| sandwich      | 29.000 | orange       | 27.158 | broccoli       | 21.329 |  
+| carrot        | 19.073 | hot dog      | 25.912 | pizza          | 47.289 |  
+| donut         | 39.409 | cake         | 30.148 | chair          | 23.320 |  
+| couch         | 38.598 | potted plant | 23.030 | bed            | 41.347 |  
+| dining table  | 24.819 | toilet       | 55.766 | tv             | 53.597 |  
+| laptop        | 54.526 | mouse        | 59.451 | remote         | 24.147 |  
+| keyboard      | 43.568 | cell phone   | 32.373 | microwave      | 55.890 |  
+| oven          | 31.385 | toaster      | 26.014 | sink           | 32.295 |  
+| refrigerator  | 49.247 | book         | 12.279 | clock          | 48.616 |  
+| vase          | 35.170 | scissors     | 24.159 | teddy bear     | 42.737 |  
+| hair drier    | 5.050  | toothbrush   | 14.578 |                |        |
diff --git a/setup.py b/setup.py
index 6280b8f..18d3af3 100644
--- a/setup.py
+++ b/setup.py
@@ -11,7 +11,7 @@
 from torch.utils.cpp_extension import CUDA_HOME, CppExtension, CUDAExtension
 
 torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
-assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3"
+assert torch_ver >= [1, 6], "Requires PyTorch >= 1.6"
 
 
 def get_version():
diff --git a/tests/data/test_rotation_transform.py b/tests/data/test_rotation_transform.py
deleted file mode 100644
index 3685298..0000000
--- a/tests/data/test_rotation_transform.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
-import numpy as np
-import unittest
-
-from cvpods.data.transforms.transform import RotationTransform
-
-
-class TestRotationTransform(unittest.TestCase):
-    def assertEqualsArrays(self, a1, a2):
-        self.assertTrue(np.allclose(a1, a2))
-
-    def randomData(self, h=5, w=5):
-        image = np.random.rand(h, w)
-        coords = np.array([[i, j] for j in range(h + 1) for i in range(w + 1)], dtype=float)
-        return image, coords, h, w
-
-    def test180(self):
-        image, coords, h, w = self.randomData(6, 6)
-        rot = RotationTransform(h, w, 180, expand=False, center=None)
-        self.assertEqualsArrays(rot.apply_image(image), image[::-1, ::-1])
-        rotated_coords = [[w - c[0], h - c[1]] for c in coords]
-        self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords)
-
-    def test45_coords(self):
-        _, coords, h, w = self.randomData(4, 6)
-        rot = RotationTransform(h, w, 45, expand=False, center=None)
-        rotated_coords = [
-            [(x + y - (h + w) / 2) / np.sqrt(2) + w / 2, h / 2 + (y + (w - h) / 2 - x) / np.sqrt(2)]
-            for (x, y) in coords
-        ]
-        self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords)
-
-    def test90(self):
-        image, coords, h, w = self.randomData()
-        rot = RotationTransform(h, w, 90, expand=False, center=None)
-        self.assertEqualsArrays(rot.apply_image(image), image.T[::-1])
-        rotated_coords = [[c[1], w - c[0]] for c in coords]
-        self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords)
-
-    def test90_expand(self):  # non-square image
-        image, coords, h, w = self.randomData(h=5, w=8)
-        rot = RotationTransform(h, w, 90, expand=True, center=None)
-        self.assertEqualsArrays(rot.apply_image(image), image.T[::-1])
-        rotated_coords = [[c[1], w - c[0]] for c in coords]
-        self.assertEqualsArrays(rot.apply_coords(coords), rotated_coords)
-
-    def test_center_expand(self):
-        # center has no effect if expand=True because it only affects shifting
-        image, coords, h, w = self.randomData(h=5, w=8)
-        angle = np.random.randint(360)
-        rot1 = RotationTransform(h, w, angle, expand=True, center=None)
-        rot2 = RotationTransform(h, w, angle, expand=True, center=(0, 0))
-        rot3 = RotationTransform(h, w, angle, expand=True, center=(h, w))
-        rot4 = RotationTransform(h, w, angle, expand=True, center=(2, 5))
-        for r1 in [rot1, rot2, rot3, rot4]:
-            for r2 in [rot1, rot2, rot3, rot4]:
-                self.assertEqualsArrays(r1.apply_image(image), r2.apply_image(image))
-                self.assertEqualsArrays(r1.apply_coords(coords), r2.apply_coords(coords))
-
-    def test_inverse_transform(self):
-        image, coords, h, w = self.randomData(h=5, w=8)
-        rot = RotationTransform(h, w, 90, expand=True, center=None)
-        rot_image = rot.apply_image(image)
-        self.assertEqualsArrays(rot.inverse().apply_image(rot_image), image)
-        rot = RotationTransform(h, w, 65, expand=True, center=None)
-        rotated_coords = rot.apply_coords(coords)
-        self.assertEqualsArrays(rot.inverse().apply_coords(rotated_coords), coords)
-
-
-if __name__ == "__main__":
-    unittest.main()