diff --git a/README.md b/README.md
index 2cfa25e36810..20dd3156e078 100644
--- a/README.md
+++ b/README.md
@@ -397,7 +397,7 @@ Few pointers to get you started:
 - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/ignite/blob/master/examples/notebooks/FastaiLRFinder_MNIST.ipynb) [Basic example of LR finder on
   MNIST](https://github.com/pytorch/ignite/blob/master/examples/notebooks/FastaiLRFinder_MNIST.ipynb)
 - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/ignite/blob/master/examples/notebooks/Cifar100_bench_amp.ipynb) [Benchmark mixed precision training on Cifar100:
-  torch.cuda.amp vs nvidia/apex](https://github.com/pytorch/ignite/blob/master/examples/notebooks/Cifar100_bench_amp.ipynb)
+  torch.amp vs nvidia/apex](https://github.com/pytorch/ignite/blob/master/examples/notebooks/Cifar100_bench_amp.ipynb)
 - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/ignite/blob/master/examples/notebooks/MNIST_on_TPU.ipynb) [MNIST training on a single
   TPU](https://github.com/pytorch/ignite/blob/master/examples/notebooks/MNIST_on_TPU.ipynb)
 - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1E9zJrptnLJ_PKhmaP5Vhb6DTVRvyrKHx) [CIFAR10 Training on multiple TPUs](https://github.com/pytorch/ignite/tree/master/examples/cifar10)
diff --git a/docs/source/conf.py b/docs/source/conf.py
index ec73ee953df3..247a80246c6f 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -348,7 +348,7 @@ def run(self):
     ("py:class", "torch.optim.optimizer.Optimizer"),
     ("py:class", "torch.utils.data.dataset.Dataset"),
     ("py:class", "torch.utils.data.sampler.BatchSampler"),
-    ("py:class", "torch.cuda.amp.grad_scaler.GradScaler"),
+    ("py:class", "torch.amp.grad_scaler.GradScaler"),
     ("py:class", "torch.optim.lr_scheduler._LRScheduler"),
     ("py:class", "torch.optim.lr_scheduler.LRScheduler"),
     ("py:class", "torch.utils.data.dataloader.DataLoader"),
diff --git a/examples/cifar10/main.py b/examples/cifar10/main.py
index b8dbce5d9601..aadc310382e3 100644
--- a/examples/cifar10/main.py
+++ b/examples/cifar10/main.py
@@ -8,7 +8,7 @@
 import torch.optim as optim
 import utils
 from torch.amp import autocast
-from torch.cuda.amp import GradScaler
+from torch.amp import GradScaler
 
 import ignite
 import ignite.distributed as idist
diff --git a/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py b/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
index 746d7eb54c49..4ca1551ad823 100644
--- a/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
+++ b/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py
@@ -1,7 +1,7 @@
 import fire
 import torch
 from torch.amp import autocast
-from torch.cuda.amp import GradScaler
+from torch.amp import GradScaler
 from torch.nn import CrossEntropyLoss
 from torch.optim import SGD
 from torchvision.models import wide_resnet50_2
diff --git a/examples/cifar10_qat/main.py b/examples/cifar10_qat/main.py
index 7b8366a2a63f..3fb7d59d13ba 100644
--- a/examples/cifar10_qat/main.py
+++ b/examples/cifar10_qat/main.py
@@ -7,7 +7,7 @@
 import torch.optim as optim
 import utils
 from torch.amp import autocast
-from torch.cuda.amp import GradScaler
+from torch.amp import GradScaler
 
 import ignite
 import ignite.distributed as idist
diff --git a/examples/notebooks/Cifar100_bench_amp.ipynb b/examples/notebooks/Cifar100_bench_amp.ipynb
index dc9cfc750d93..214d87eea87d 100644
--- a/examples/notebooks/Cifar100_bench_amp.ipynb
+++ b/examples/notebooks/Cifar100_bench_amp.ipynb
@@ -8,7 +8,7 @@
    "source": [
     "# Benchmark mixed precision training on Cifar100\n",
     "\n",
-    "In this notebook we will benchmark 1) native PyTorch mixed precision module [`torch.cuda.amp`](https://pytorch.org/docs/master/amp.html) and 2) NVidia/Apex package.\n",
+    "In this notebook we will benchmark 1) native PyTorch mixed precision module [`torch.amp`](https://pytorch.org/docs/master/amp.html) and 2) NVidia/Apex package.\n",
     "\n",
     "We will train Wide-ResNet model on Cifar100 dataset using Turing enabled GPU and compare training times.\n",
     "\n",
@@ -16,7 +16,7 @@
     "\n",
     "The ranking is the following:\n",
     "- 1st place: Nvidia/Apex \"O2\"\n",
-    "- 2nd place: `torch.cuda.amp`: autocast and scaler\n",
+    "- 2nd place: `torch.amp`: autocast and scaler\n",
     "- 3rd place: Nvidia/Apex \"O1\"\n",
     "- 4th place: fp32\n",
     "\n",
@@ -31,7 +31,7 @@
    "source": [
     "## Installations and setup\n",
     "\n",
-    "1) Recently added [`torch.cuda.amp`](https://pytorch.org/docs/master/notes/amp_examples.html#working-with-multiple-models-losses-and-optimizers) module to perform automatic mixed precision training instead of using Nvidia/Apex package is available in PyTorch >=1.6.0.\n",
+    "1) Recently added [`torch.amp`](https://pytorch.org/docs/master/notes/amp_examples.html#working-with-multiple-models-losses-and-optimizers) module to perform automatic mixed precision training instead of using Nvidia/Apex package is available in PyTorch >=1.6.0.\n",
     "\n",
     "In this example we only need `pynvml` and `fire` packages, assuming that `torch` and `ignite` are already installed. We can install it using pip:"
    ]
@@ -154,7 +154,7 @@
     "id": "n2p-EMwGfDHs"
    },
    "source": [
-    "## Training with `torch.cuda.amp`"
+    "## Training with `torch.amp`"
    ]
   },
   {
diff --git a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
index c687267d0d52..217e0f8dec1f 100644
--- a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
+++ b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb
@@ -1,31 +1,4 @@
 {
- "nbformat": 4,
- "nbformat_minor": 0,
- "metadata": {
-  "accelerator": "GPU",
-  "colab": {
-   "name": "CycleGAN_with_torch_cuda_amp.ipynb",
-   "provenance": [],
-   "collapsed_sections": []
-  },
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.4"
-  }
- },
  "cells": [
   {
    "cell_type": "markdown",
@@ -33,11 +6,11 @@
     "id": "XwaQH08zFRZW"
    },
    "source": [
-    "# CycleGAN with Ignite and `torch.cuda.amp`\n",
+    "# CycleGAN with Ignite and `torch.amp`\n",
     "\n",
     "In this notebook we provide an implementation of [CycleGAN](https://arxiv.org/abs/1703.10593) and its training on \"Horse 2 Zebra\" dataset using Ignite. This notebook is almost similar to another our [notebook on CycleGAN with Nvidia/Apex](https://github.com/pytorch/ignite/blob/master/examples/notebooks/CycleGAN_with_ignite_and_nvdia_apex.ipynb).\n",
     "\n",
-    "In contrast, we will use recently added [`torch.cuda.amp`](https://pytorch.org/docs/master/notes/amp_examples.html#working-with-multiple-models-losses-and-optimizers) module to perform automatic mixed precision training instead of using Nvidia/Apex package. This module is available in pytorch (>=1.6.0) release.\n",
+    "In contrast, we will use recently added [`torch.amp`](https://pytorch.org/docs/master/notes/amp_examples.html#working-with-multiple-models-losses-and-optimizers) module to perform automatic mixed precision training instead of using Nvidia/Apex package. This module is available in pytorch (>=1.6.0) release.\n",
     "\n",
     "\n",
     "### CycleGAN in a Nutshell\n",
@@ -67,14 +40,14 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "eqe1kXPcXj1U"
    },
+   "outputs": [],
    "source": [
     "!nvidia-smi"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -89,15 +62,15 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "l3LdmHAuFRZa"
    },
+   "outputs": [],
    "source": [
     "!wget https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/horse2zebra.zip -O/tmp/horse2zebra.zip\n",
     "!7z x /tmp/horse2zebra.zip -o/tmp/"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -115,14 +88,14 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "3c6PHUZeFRZu"
    },
+   "outputs": [],
    "source": [
     "!pip install --upgrade --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -135,33 +108,35 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "dWN63EToZA-G"
    },
+   "outputs": [],
    "source": [
     "!pip install --pre pytorch-ignite"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "p8M6GlpmQ5jZ"
    },
+   "outputs": [],
    "source": [
     "import torch\n",
     "import ignite\n",
     "torch.__version__, ignite.__version__"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "d3LCqCufFRZ6"
    },
+   "outputs": [],
    "source": [
     "import random\n",
     "import torch\n",
@@ -169,9 +144,7 @@
     "seed = 17\n",
     "random.seed(seed)\n",
     "_ = torch.manual_seed(seed)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -186,9 +159,11 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "nOr3nd4qFRaB"
    },
+   "outputs": [],
    "source": [
     "from torch.utils.data import Dataset, DataLoader\n",
     "from PIL import Image\n",
@@ -206,15 +181,15 @@
     "    \n",
     "    def __getitem__(self, i):\n",
     "        return Image.open(self.images[i]).convert('RGB')"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "byG73rBHFRaG"
    },
+   "outputs": [],
    "source": [
     "from pathlib import Path\n",
     "\n",
@@ -225,9 +200,7 @@
     "\n",
     "test_A = FilesDataset(root / \"testA\") \n",
     "test_B = FilesDataset(root / \"testB\")"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -240,54 +213,56 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "UZ4rY5S9FRaL"
    },
+   "outputs": [],
    "source": [
     "print(\"Dataset sizes: \\ntrain A: {} | B: {}\\ntest A: {} | B: {}\\n\\t\".format(len(train_A), len(train_B), len(test_A), len(test_B)))"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "R4lyZ6GSFRaR"
    },
+   "outputs": [],
    "source": [
     "print(\"Train random image sizes (A): {}, {}, {}, {}\".format(train_A[0].size, train_A[1].size, train_A[10].size, train_A[-1].size))"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "e-61gbfVFRaX"
    },
+   "outputs": [],
    "source": [
     "print(\"Train random image sizes (B): {}, {}, {}, {}\".format(train_B[0].size, train_B[1].size, train_B[10].size, train_B[-1].size))"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "fAxkND8jFRac"
    },
+   "outputs": [],
    "source": [
     "import matplotlib.pylab as plt\n",
     "%matplotlib inline"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "Xx7cAEI0FRah"
    },
+   "outputs": [],
    "source": [
     "plt.figure(figsize=(10, 5))\n",
     "plt.subplot(121)\n",
@@ -296,15 +271,15 @@
     "plt.subplot(122)\n",
     "plt.title(\"Train dataset 'Zebras'\")\n",
     "plt.imshow(train_B[10])"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "S_v_FuvZFRao"
    },
+   "outputs": [],
    "source": [
     "plt.figure(figsize=(10, 5))\n",
     "plt.subplot(121)\n",
@@ -313,9 +288,7 @@
     "plt.subplot(122)\n",
     "plt.title(\"Test dataset 'Zebras'\")\n",
     "plt.imshow(test_B[0])"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -328,9 +301,11 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "B699THXXFRat"
    },
+   "outputs": [],
    "source": [
     "import random\n",
     "\n",
@@ -365,27 +340,27 @@
     "    \n",
     "    def __getitem__(self, i):\n",
     "        return {k: self.transform(v) for k, v in self.dataset[i].items()}"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "RF6OQwxoFRax"
    },
+   "outputs": [],
    "source": [
     "train_ab_ds = Image2ImageDataset(train_A, train_B)\n",
     "test_ab_ds = Image2ImageDataset(test_A, test_B)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "eIcsATwrFRa1"
    },
+   "outputs": [],
    "source": [
     "dp = train_ab_ds[20]\n",
     "\n",
@@ -396,15 +371,15 @@
     "plt.subplot(122)\n",
     "plt.title(\"Train dataset 'Zebras'\")\n",
     "plt.imshow(dp['B'])"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "X3zfGZoOFRa6"
    },
+   "outputs": [],
    "source": [
     "dp = test_ab_ds[20]\n",
     "\n",
@@ -415,15 +390,15 @@
     "plt.subplot(122)\n",
     "plt.title(\"Test dataset 'Zebras'\")\n",
     "plt.imshow(dp['B'])"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "q2Hda4tjFRa-"
    },
+   "outputs": [],
    "source": [
     "from torchvision.transforms import Compose, ColorJitter, RandomHorizontalFlip, ToTensor, Normalize, RandomCrop\n",
     "\n",
@@ -449,15 +424,15 @@
     "transformed_test_ab_ds = TransformedDataset(test_ab_ds, transform=test_transform)\n",
     "batch_size = 10\n",
     "test_ab_loader = DataLoader(transformed_test_ab_ds, batch_size=batch_size, shuffle=False, drop_last=False, pin_memory=True, num_workers=4)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "HtfXU9uaFRbB"
    },
+   "outputs": [],
    "source": [
     "import torchvision.utils as vutils\n",
     "\n",
@@ -479,9 +454,7 @@
     ")\n",
     "real_batch = None\n",
     "torch.cuda.empty_cache()"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -501,9 +474,11 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "Ri6_NvWfFRbG"
    },
+   "outputs": [],
    "source": [
     "import torch\n",
     "import torch.nn as nn\n",
@@ -577,9 +552,7 @@
     "        x = self.u64(x)\n",
     "        y = self.c7s1_3(x)\n",
     "        return y\n"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -592,17 +565,17 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "rfOTRPt4FRbL"
    },
+   "outputs": [],
    "source": [
     "x = torch.rand(4, 3, 256, 256)\n",
     "g = Generator()\n",
     "y = g(x)\n",
     "y.shape"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -631,9 +604,11 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "rdb5-RAYFRbR"
    },
+   "outputs": [],
    "source": [
     "def get_conv_inorm_lrelu(in_planes, out_planes, stride=2, negative_slope=0.2):\n",
     "    return nn.Sequential(\n",
@@ -663,9 +638,7 @@
     "        x = self.c512(x)\n",
     "        y = self.last_conv(x)\n",
     "        return y\n"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -678,17 +651,17 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "On4vpLc_FRbV"
    },
+   "outputs": [],
    "source": [
     "x = torch.rand(4, 3, 256, 256)\n",
     "d = Discriminator()\n",
     "y = d(x)\n",
     "y.shape"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -701,9 +674,11 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "2vId72FGFRba"
    },
+   "outputs": [],
    "source": [
     "def init_weights(module):\n",
     "    assert isinstance(module, nn.Module)\n",
@@ -713,20 +688,18 @@
     "        torch.nn.init.constant_(module.bias, 0.0)\n",
     "    for c in module.children():\n",
     "        init_weights(c)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "IMoUXZ5yFRbd"
    },
+   "outputs": [],
    "source": [
     "g = None; d = None"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -739,21 +712,23 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "ozU5CQ9JFRbh"
    },
+   "outputs": [],
    "source": [
     "assert torch.backends.cudnn.enabled\n",
     "torch.backends.cudnn.benchmark = True"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "-zZ5ZuXGFRbl"
    },
+   "outputs": [],
    "source": [
     "device = \"cuda\"\n",
     "\n",
@@ -767,9 +742,7 @@
     "init_weights(generator_B2A)\n",
     "discriminator_A = Discriminator().to(device)\n",
     "init_weights(discriminator_A)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -782,9 +755,11 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "YbgTEAMCFRbo"
    },
+   "outputs": [],
    "source": [
     "from itertools import chain\n",
     "import torch.optim as optim\n",
@@ -794,9 +769,7 @@
     "\n",
     "optimizer_G = optim.Adam(chain(generator_A2B.parameters(), generator_B2A.parameters()), lr=lr, betas=(beta1, 0.999))\n",
     "optimizer_D = optim.Adam(chain(discriminator_A.parameters(), discriminator_B.parameters()), lr=lr, betas=(beta1, 0.999))"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -810,17 +783,17 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "p82seQ9JFRbs"
    },
+   "outputs": [],
    "source": [
     "def toggle_grad(model, on_or_off):\n",
     "    # https://github.com/ajbrock/BigGAN-PyTorch/blob/master/utils.py#L674\n",
     "    for param in model.parameters():\n",
     "        param.requires_grad = on_or_off"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -835,9 +808,11 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "jyghMWUPFRbw"
    },
+   "outputs": [],
    "source": [
     "buffer_size = 50\n",
     "fake_a_buffer = []\n",
@@ -860,9 +835,7 @@
     "        else:\n",
     "            output_batch.append(b)\n",
     "    return torch.cat(output_batch, dim=0)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -887,16 +860,18 @@
     "id": "JE8dLeEfIl_Z"
    },
    "source": [
-    "We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.cuda.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
+    "We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)."
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "vrJls4p-FRcA"
    },
+   "outputs": [],
    "source": [
-    "from torch.cuda.amp import GradScaler\n",
+    "from torch.amp import GradScaler\n",
     "from torch.amp import autocast\n",
     "\n",
     "from ignite.utils import convert_tensor\n",
@@ -999,9 +974,7 @@
     "        \"loss_discriminator_a\": loss_a.item(),\n",
     "        \"loss_discriminator_b\": loss_b.item(),\n",
     "    }\n"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -1014,9 +987,11 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "rLZextmDzzw_"
    },
+   "outputs": [],
    "source": [
     "real_batch = next(iter(train_ab_loader))\n",
     "\n",
@@ -1026,9 +1001,7 @@
     "torch.cuda.empty_cache()\n",
     "\n",
     "res"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -1050,36 +1023,38 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "camPNT4TcCFu"
    },
+   "outputs": [],
    "source": [
     "!pip install --upgrade wandb\n",
     "# !wandb login your-token"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "TO58FENsFRcM"
    },
+   "outputs": [],
    "source": [
     "from ignite.engine import Engine, Events\n",
     "from ignite.metrics import RunningAverage\n",
     "\n",
     "from ignite.handlers import TensorboardLogger, WandBLogger\n",
     "from ignite.handlers.tensorboard_logger import OutputHandler, OptimizerParamsHandler"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "jbeZjaBtFRcO"
    },
+   "outputs": [],
    "source": [
     "from functools import partial\n",
     "\n",
@@ -1101,15 +1076,15 @@
     "for name in metric_names:\n",
     "    # here we cannot use lambdas as they do not store argument `name`\n",
     "    RunningAverage(output_transform=partial(output_transform, name=name)).attach(trainer, name)\n"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "qODNF0imFRcQ"
    },
+   "outputs": [],
    "source": [
     "from datetime import datetime\n",
     "\n",
@@ -1121,15 +1096,15 @@
     "                 event_name=Events.ITERATION_COMPLETED)\n",
     "\n",
     "print(\"Experiment name: \", exp_name)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "ljTkKgMYFRcT"
    },
+   "outputs": [],
    "source": [
     "from pathlib import Path\n",
     "\n",
@@ -1147,9 +1122,7 @@
     "    )\n",
     "except RuntimeError:\n",
     "    wb_logger = None"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -1162,9 +1135,11 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "rCcdC6q9FRcV"
    },
+   "outputs": [],
    "source": [
     "from ignite.engine import Engine\n",
     "\n",
@@ -1193,15 +1168,15 @@
     "\n",
     "\n",
     "evaluator = Engine(evaluate_fn)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "UIecBsPKFRcX"
    },
+   "outputs": [],
    "source": [
     "from torch.utils.data import Subset\n",
     "\n",
@@ -1217,15 +1192,15 @@
     "\n",
     "eval_train_loader = DataLoader(small_train_ds, batch_size=eval_batch_size, shuffle=False, drop_last=False, pin_memory=True, num_workers=4)\n",
     "eval_test_loader = DataLoader(small_test_ds, batch_size=eval_batch_size, shuffle=False, drop_last=False, pin_memory=True, num_workers=4)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "H6XkEchHFRca"
    },
+   "outputs": [],
    "source": [
     "@trainer.on(Events.EPOCH_STARTED)\n",
     "def run_evaluation(engine):\n",
@@ -1268,9 +1243,7 @@
     "tb_logger.attach(evaluator,\n",
     "                 log_handler=log_generated_images, \n",
     "                 event_name=Events.COMPLETED)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -1283,9 +1256,11 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "SSIcXzReFRcc"
    },
+   "outputs": [],
    "source": [
     "from ignite.handlers import PiecewiseLinear, ParamGroupScheduler\n",
     "\n",
@@ -1308,9 +1283,7 @@
     "tb_logger.attach(trainer,\n",
     "                 log_handler=OptimizerParamsHandler(optimizer_G, \"lr\"), \n",
     "                 event_name=Events.EPOCH_STARTED)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -1323,20 +1296,22 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "7ZONS845FRcg"
    },
+   "outputs": [],
    "source": [
     "from ignite.handlers import ModelCheckpoint, TerminateOnNan"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "F-emWk-YFRci"
    },
+   "outputs": [],
    "source": [
     "!rm -rf \"/tmp/cycle_gan_checkpoints\" \n",
     "!mkdir \"/tmp/cycle_gan_checkpoints\"\n",
@@ -1358,15 +1333,15 @@
     "\n",
     "trainer.add_event_handler(Events.ITERATION_COMPLETED(every=500), checkpoint_handler, to_save)\n",
     "trainer.add_event_handler(Events.ITERATION_COMPLETED, TerminateOnNan())"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "RtQKr6yxFRck"
    },
+   "outputs": [],
    "source": [
     "from ignite.handlers import ProgressBar\n",
     "\n",
@@ -1375,45 +1350,43 @@
     "# Epoch-wise progress bar with display of training losses\n",
     "ProgressBar(persist=True, bar_format=\"\").attach(trainer, metric_names=['loss_discriminators', 'loss_generators'], \n",
     "                                                event_name=Events.EPOCH_STARTED, closing_event_name=Events.COMPLETED)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "bM-6vr8pcmOW"
    },
+   "outputs": [],
    "source": [
     "# Display in Firefox may not work properly. Use Chrome.\n",
     "%load_ext tensorboard\n",
     "\n",
     "%tensorboard --logdir=/tmp/cycle_gan_horse2zebra_tb_logs"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "CcxhA9rHFRcn"
    },
+   "outputs": [],
    "source": [
     "trainer.run(train_ab_loader, max_epochs=200)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "XtXSfbHqFRct"
    },
+   "outputs": [],
    "source": [
     "tb_logger.close()"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -1428,47 +1401,49 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "2xNAdA-WFRcx"
    },
+   "outputs": [],
    "source": [
     "!ls /tmp/cycle_gan_checkpoints/"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "TOhSsWRzFRcz"
    },
+   "outputs": [],
    "source": [
     "checkpoint_path = \"/tmp/cycle_gan_checkpoints/checkpoint_26500.pt\"\n",
     "\n",
     "# let's save this checkpoint to W&B\n",
     "if wb_logger is not None:\n",
     "    wb_logger.save(checkpoint_path)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "j5BFfZceFRc2"
    },
+   "outputs": [],
    "source": [
     "checkpoint_state_dict = torch.load(checkpoint_path)\n",
     "generator_A2B.load_state_dict(checkpoint_state_dict[\"generator_A2B\"])"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "VGLb14xyFRc4"
    },
+   "outputs": [],
    "source": [
     "def normalize(x):\n",
     "    vmin = x.min()\n",
@@ -1476,15 +1451,15 @@
     "    x.clamp_(min=vmin, max=vmax)\n",
     "    x.add_(-vmin).div_(vmax - vmin + 1e-5)\n",
     "    return x"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "BO_DBOhSFRc7"
    },
+   "outputs": [],
    "source": [
     "i = random.randint(0, len(test_ab_ds) - 1)\n",
     "img = test_ab_ds[i]['A']\n",
@@ -1497,15 +1472,15 @@
     "    \n",
     "\n",
     "img_pred = (255 * normalize(y_pred[0, ...])).cpu().numpy().transpose((1, 2, 0)).astype('uint8')"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "vseNrx2YFRc-"
    },
+   "outputs": [],
    "source": [
     "plt.figure(figsize=(10, 5))\n",
     "plt.subplot(121)\n",
@@ -1514,9 +1489,7 @@
     "plt.subplot(122)\n",
     "plt.title(\"Generated zebra\")\n",
     "plt.imshow(img_pred)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
@@ -1529,20 +1502,22 @@
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "6BlVkvybFRdA"
    },
+   "outputs": [],
    "source": [
     "!wget https://www.kdnuggets.com/wp-content/uploads/photo.jpg -O/tmp/dl_durus.jpg"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "eB7aAeO1FRdC"
    },
+   "outputs": [],
    "source": [
     "from PIL import Image\n",
     "\n",
@@ -1556,15 +1531,15 @@
     "\n",
     "\n",
     "img_pred = (255 * normalize(y_pred[0, ...])).cpu().numpy().transpose((1, 2, 0)).astype('uint8')"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {
     "id": "8Mcypu0lFRdD"
    },
+   "outputs": [],
    "source": [
     "plt.figure(figsize=(15, 8))\n",
     "plt.subplot(121)\n",
@@ -1573,9 +1548,34 @@
     "plt.subplot(122)\n",
     "plt.title(\"Zebras\")\n",
     "plt.imshow(img_pred)"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   }
- ]
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "collapsed_sections": [],
+   "name": "CycleGAN_with_torch_cuda_amp.ipynb",
+   "provenance": []
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }
diff --git a/examples/references/classification/imagenet/main.py b/examples/references/classification/imagenet/main.py
index defb4ddc1510..f2706b9ba181 100644
--- a/examples/references/classification/imagenet/main.py
+++ b/examples/references/classification/imagenet/main.py
@@ -7,7 +7,7 @@
 
 try:
     from torch.amp import autocast
-    from torch.cuda.amp import GradScaler
+    from torch.amp import GradScaler
 except ImportError:
     raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
 
diff --git a/examples/references/segmentation/pascal_voc2012/main.py b/examples/references/segmentation/pascal_voc2012/main.py
index b6fbc7ad494a..581862a3df50 100644
--- a/examples/references/segmentation/pascal_voc2012/main.py
+++ b/examples/references/segmentation/pascal_voc2012/main.py
@@ -7,7 +7,7 @@
 
 try:
     from torch.amp import autocast
-    from torch.cuda.amp import GradScaler
+    from torch.amp import GradScaler
 except ImportError:
     raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0")
 
diff --git a/examples/transformers/main.py b/examples/transformers/main.py
index f8118eabf90e..8eeca9768ac5 100644
--- a/examples/transformers/main.py
+++ b/examples/transformers/main.py
@@ -8,7 +8,7 @@
 import torch.optim as optim
 import utils
 from torch.amp import autocast
-from torch.cuda.amp import GradScaler
+from torch.amp import GradScaler
 
 import ignite
 import ignite.distributed as idist
diff --git a/ignite/engine/__init__.py b/ignite/engine/__init__.py
index 6e82bc2f6bc7..8bce5e2a40f3 100644
--- a/ignite/engine/__init__.py
+++ b/ignite/engine/__init__.py
@@ -133,11 +133,11 @@ def supervised_training_step_amp(
     prepare_batch: Callable = _prepare_batch,
     model_transform: Callable[[Any], Any] = lambda output: output,
     output_transform: Callable[[Any, Any, Any, torch.Tensor], Any] = lambda x, y, y_pred, loss: loss.item(),
-    scaler: Optional["torch.cuda.amp.GradScaler"] = None,
+    scaler: Optional["torch.amp.GradScaler"] = None,
     gradient_accumulation_steps: int = 1,
     model_fn: Callable[[torch.nn.Module, Any], Any] = lambda model, x: model(x),
 ) -> Callable:
-    """Factory function for supervised training using ``torch.cuda.amp``.
+    """Factory function for supervised training using ``torch.amp``.
 
     Args:
         model: the model to train.
@@ -170,7 +170,7 @@ def supervised_training_step_amp(
             model = ...
             optimizer = ...
             loss_fn = ...
-            scaler = torch.cuda.amp.GradScaler(2**10)
+            scaler = torch.amp.GradScaler(device='cuda', init_scale=2**10)
 
             update_fn = supervised_training_step_amp(model, optimizer, loss_fn, 'cuda', scaler=scaler)
             trainer = Engine(update_fn)
@@ -393,8 +393,8 @@ def update(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[to
 
 
 def _check_arg(
-    on_tpu: bool, on_mps: bool, amp_mode: Optional[str], scaler: Optional[Union[bool, "torch.cuda.amp.GradScaler"]]
-) -> Tuple[Optional[str], Optional["torch.cuda.amp.GradScaler"]]:
+    on_tpu: bool, on_mps: bool, amp_mode: Optional[str], scaler: Optional[Union[bool, "torch.amp.GradScaler"]]
+) -> Tuple[Optional[str], Optional["torch.amp.GradScaler"]]:
     """Checking tpu, mps, amp and GradScaler instance combinations."""
     if on_mps and amp_mode:
         raise ValueError("amp_mode cannot be used with mps device. Consider using amp_mode=None or device='cuda'.")
@@ -410,10 +410,10 @@ def _check_arg(
             raise ValueError(f"scaler argument is {scaler}, but amp_mode is {amp_mode}. Consider using amp_mode='amp'.")
         elif amp_mode == "amp" and isinstance(scaler, bool):
             try:
-                from torch.cuda.amp import GradScaler
+                from torch.amp import GradScaler
             except ImportError:
                 raise ImportError("Please install torch>=1.6.0 to use scaler argument.")
-            scaler = GradScaler(enabled=True)
+            scaler = GradScaler(device='cuda', enabled=True)
 
     if on_tpu:
         return "tpu", None
@@ -434,7 +434,7 @@ def create_supervised_trainer(
     output_transform: Callable[[Any, Any, Any, torch.Tensor], Any] = lambda x, y, y_pred, loss: loss.item(),
     deterministic: bool = False,
     amp_mode: Optional[str] = None,
-    scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
+    scaler: Union[bool, "torch.amp.GradScaler"] = False,
     gradient_accumulation_steps: int = 1,
     model_fn: Callable[[torch.nn.Module, Any], Any] = lambda model, x: model(x),
 ) -> Engine:
@@ -459,7 +459,7 @@ def create_supervised_trainer(
             :class:`~ignite.engine.deterministic.DeterministicEngine`, otherwise :class:`~ignite.engine.engine.Engine`
             (default: False).
         amp_mode: can be ``amp`` or ``apex``, model and optimizer will be casted to float16 using
-            `torch.cuda.amp <https://pytorch.org/docs/stable/amp.html>`_ for ``amp`` and
+            `torch.amp <https://pytorch.org/docs/stable/amp.html>`_ for ``amp`` and
             using `apex <https://nvidia.github.io/apex>`_ for ``apex``. (default: None)
         scaler: GradScaler instance for gradient scaling if `torch>=1.6.0`
             and ``amp_mode`` is ``amp``. If ``amp_mode`` is ``apex``, this argument will be ignored.
@@ -689,7 +689,7 @@ def supervised_evaluation_step_amp(
     model_fn: Callable[[torch.nn.Module, Any], Any] = lambda model, x: model(x),
 ) -> Callable:
     """
-    Factory function for supervised evaluation using ``torch.cuda.amp``.
+    Factory function for supervised evaluation using ``torch.amp``.
 
     Args:
         model: the model to train.
@@ -771,7 +771,7 @@ def create_supervised_evaluator(
             to be assigned to engine's state.output after each iteration. Default is returning `(y_pred, y,)` which fits
             output expected by metrics. If you change it you should use `output_transform` in metrics.
         amp_mode: can be ``amp``, model will be casted to float16 using
-            `torch.cuda.amp <https://pytorch.org/docs/stable/amp.html>`_
+            `torch.amp <https://pytorch.org/docs/stable/amp.html>`_
         model_fn: the model function that receives `model` and `x`, and returns `y_pred`.
 
     Returns:
diff --git a/tests/ignite/engine/test_create_supervised.py b/tests/ignite/engine/test_create_supervised.py
index ba42baddddae..6122ebffbce9 100644
--- a/tests/ignite/engine/test_create_supervised.py
+++ b/tests/ignite/engine/test_create_supervised.py
@@ -48,7 +48,7 @@ def _default_create_supervised_trainer(
     trainer_device: Optional[str] = None,
     trace: bool = False,
     amp_mode: str = None,
-    scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
+    scaler: Union[bool, "torch.amp.GradScaler"] = False,
     with_model_transform: bool = False,
     with_model_fn: bool = False,
 ):
@@ -104,7 +104,7 @@ def _test_create_supervised_trainer(
     trainer_device: Optional[str] = None,
     trace: bool = False,
     amp_mode: str = None,
-    scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
+    scaler: Union[bool, "torch.amp.GradScaler"] = False,
     with_model_transform: bool = False,
     with_model_fn: bool = False,
 ):
@@ -170,10 +170,10 @@ def _():
 @pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 def test_create_supervised_training_scalar_assignment():
     with mock.patch("ignite.engine._check_arg") as check_arg_mock:
-        check_arg_mock.return_value = None, torch.cuda.amp.GradScaler(enabled=False)
+        check_arg_mock.return_value = None, torch.amp.GradScaler(device='cuda',enabled=False)
         trainer, _ = _default_create_supervised_trainer(model_device="cpu", trainer_device="cpu", scaler=True)
         assert hasattr(trainer.state, "scaler")
-        assert isinstance(trainer.state.scaler, torch.cuda.amp.GradScaler)
+        assert isinstance(trainer.state.scaler, torch.amp.GradScaler)
 
 
 def _test_create_mocked_supervised_trainer(
@@ -181,7 +181,7 @@ def _test_create_mocked_supervised_trainer(
     trainer_device: Optional[str] = None,
     trace: bool = False,
     amp_mode: str = None,
-    scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False,
+    scaler: Union[bool, "torch.amp.GradScaler"] = False,
 ):
     with mock.patch("ignite.engine.supervised_training_step_amp") as training_step_amp_mock:
         with mock.patch("ignite.engine.supervised_training_step_apex") as training_step_apex_mock:
@@ -446,7 +446,7 @@ def test_create_supervised_trainer_apex_error():
 def mock_torch_cuda_amp_module():
     with patch.dict(
         "sys.modules",
-        {"torch.amp": None, "torch.cuda.amp": None, "torch.amp.autocast_mode": None},
+        {"torch.amp": None, "torch.amp": None, "torch.amp.autocast_mode": None},
     ):
         yield torch
 
@@ -462,7 +462,7 @@ def test_create_supervised_trainer_amp_error(mock_torch_cuda_amp_module):
 
 @pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0")
 def test_create_supervised_trainer_scaler_not_amp():
-    scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())
+    scaler = torch.amp.GradScaler(enabled=torch.cuda.is_available())
 
     with pytest.raises(ValueError, match=f"scaler argument is {scaler}, but amp_mode is None."):
         _test_create_supervised_trainer(amp_mode=None, scaler=scaler)
@@ -540,7 +540,7 @@ def test_create_supervised_trainer_on_cuda_amp_scaler():
     _test_create_mocked_supervised_trainer(
         model_device=model_device, trainer_device=trainer_device, amp_mode="amp", scaler=True
     )
-    scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available())
+    scaler = torch.amp.GradScaler(enabled=torch.cuda.is_available())
     _test_create_supervised_trainer(
         gradient_accumulation_steps=1,
         model_device=model_device,