diff --git a/README.md b/README.md index 2cfa25e36810..20dd3156e078 100644 --- a/README.md +++ b/README.md @@ -397,7 +397,7 @@ Few pointers to get you started: - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/ignite/blob/master/examples/notebooks/FastaiLRFinder_MNIST.ipynb) [Basic example of LR finder on MNIST](https://github.com/pytorch/ignite/blob/master/examples/notebooks/FastaiLRFinder_MNIST.ipynb) - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/ignite/blob/master/examples/notebooks/Cifar100_bench_amp.ipynb) [Benchmark mixed precision training on Cifar100: - torch.cuda.amp vs nvidia/apex](https://github.com/pytorch/ignite/blob/master/examples/notebooks/Cifar100_bench_amp.ipynb) + torch.amp vs nvidia/apex](https://github.com/pytorch/ignite/blob/master/examples/notebooks/Cifar100_bench_amp.ipynb) - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/pytorch/ignite/blob/master/examples/notebooks/MNIST_on_TPU.ipynb) [MNIST training on a single TPU](https://github.com/pytorch/ignite/blob/master/examples/notebooks/MNIST_on_TPU.ipynb) - [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1E9zJrptnLJ_PKhmaP5Vhb6DTVRvyrKHx) [CIFAR10 Training on multiple TPUs](https://github.com/pytorch/ignite/tree/master/examples/cifar10) diff --git a/docs/source/conf.py b/docs/source/conf.py index ec73ee953df3..247a80246c6f 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -348,7 +348,7 @@ def run(self): ("py:class", "torch.optim.optimizer.Optimizer"), ("py:class", "torch.utils.data.dataset.Dataset"), ("py:class", "torch.utils.data.sampler.BatchSampler"), - ("py:class", "torch.cuda.amp.grad_scaler.GradScaler"), + ("py:class", "torch.amp.grad_scaler.GradScaler"), ("py:class", "torch.optim.lr_scheduler._LRScheduler"), ("py:class", "torch.optim.lr_scheduler.LRScheduler"), ("py:class", "torch.utils.data.dataloader.DataLoader"), diff --git a/examples/cifar10/main.py b/examples/cifar10/main.py index b8dbce5d9601..aadc310382e3 100644 --- a/examples/cifar10/main.py +++ b/examples/cifar10/main.py @@ -8,7 +8,7 @@ import torch.optim as optim import utils from torch.amp import autocast -from torch.cuda.amp import GradScaler +from torch.amp import GradScaler import ignite import ignite.distributed as idist diff --git a/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py b/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py index 746d7eb54c49..4ca1551ad823 100644 --- a/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py +++ b/examples/cifar100_amp_benchmark/benchmark_torch_cuda_amp.py @@ -1,7 +1,7 @@ import fire import torch from torch.amp import autocast -from torch.cuda.amp import GradScaler +from torch.amp import GradScaler from torch.nn import CrossEntropyLoss from torch.optim import SGD from torchvision.models import wide_resnet50_2 diff --git a/examples/cifar10_qat/main.py b/examples/cifar10_qat/main.py index 7b8366a2a63f..3fb7d59d13ba 100644 --- a/examples/cifar10_qat/main.py +++ b/examples/cifar10_qat/main.py @@ -7,7 +7,7 @@ import torch.optim as optim import utils from torch.amp import autocast -from torch.cuda.amp import GradScaler +from torch.amp import GradScaler import ignite import ignite.distributed as idist diff --git a/examples/notebooks/Cifar100_bench_amp.ipynb b/examples/notebooks/Cifar100_bench_amp.ipynb index dc9cfc750d93..214d87eea87d 100644 --- a/examples/notebooks/Cifar100_bench_amp.ipynb +++ b/examples/notebooks/Cifar100_bench_amp.ipynb @@ -8,7 +8,7 @@ "source": [ "# Benchmark mixed precision training on Cifar100\n", "\n", - "In this notebook we will benchmark 1) native PyTorch mixed precision module [`torch.cuda.amp`](https://pytorch.org/docs/master/amp.html) and 2) NVidia/Apex package.\n", + "In this notebook we will benchmark 1) native PyTorch mixed precision module [`torch.amp`](https://pytorch.org/docs/master/amp.html) and 2) NVidia/Apex package.\n", "\n", "We will train Wide-ResNet model on Cifar100 dataset using Turing enabled GPU and compare training times.\n", "\n", @@ -16,7 +16,7 @@ "\n", "The ranking is the following:\n", "- 1st place: Nvidia/Apex \"O2\"\n", - "- 2nd place: `torch.cuda.amp`: autocast and scaler\n", + "- 2nd place: `torch.amp`: autocast and scaler\n", "- 3rd place: Nvidia/Apex \"O1\"\n", "- 4th place: fp32\n", "\n", @@ -31,7 +31,7 @@ "source": [ "## Installations and setup\n", "\n", - "1) Recently added [`torch.cuda.amp`](https://pytorch.org/docs/master/notes/amp_examples.html#working-with-multiple-models-losses-and-optimizers) module to perform automatic mixed precision training instead of using Nvidia/Apex package is available in PyTorch >=1.6.0.\n", + "1) Recently added [`torch.amp`](https://pytorch.org/docs/master/notes/amp_examples.html#working-with-multiple-models-losses-and-optimizers) module to perform automatic mixed precision training instead of using Nvidia/Apex package is available in PyTorch >=1.6.0.\n", "\n", "In this example we only need `pynvml` and `fire` packages, assuming that `torch` and `ignite` are already installed. We can install it using pip:" ] @@ -154,7 +154,7 @@ "id": "n2p-EMwGfDHs" }, "source": [ - "## Training with `torch.cuda.amp`" + "## Training with `torch.amp`" ] }, { diff --git a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb index c687267d0d52..217e0f8dec1f 100644 --- a/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb +++ b/examples/notebooks/CycleGAN_with_torch_cuda_amp.ipynb @@ -1,31 +1,4 @@ { - "nbformat": 4, - "nbformat_minor": 0, - "metadata": { - "accelerator": "GPU", - "colab": { - "name": "CycleGAN_with_torch_cuda_amp.ipynb", - "provenance": [], - "collapsed_sections": [] - }, - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.4" - } - }, "cells": [ { "cell_type": "markdown", @@ -33,11 +6,11 @@ "id": "XwaQH08zFRZW" }, "source": [ - "# CycleGAN with Ignite and `torch.cuda.amp`\n", + "# CycleGAN with Ignite and `torch.amp`\n", "\n", "In this notebook we provide an implementation of [CycleGAN](https://arxiv.org/abs/1703.10593) and its training on \"Horse 2 Zebra\" dataset using Ignite. This notebook is almost similar to another our [notebook on CycleGAN with Nvidia/Apex](https://github.com/pytorch/ignite/blob/master/examples/notebooks/CycleGAN_with_ignite_and_nvdia_apex.ipynb).\n", "\n", - "In contrast, we will use recently added [`torch.cuda.amp`](https://pytorch.org/docs/master/notes/amp_examples.html#working-with-multiple-models-losses-and-optimizers) module to perform automatic mixed precision training instead of using Nvidia/Apex package. This module is available in pytorch (>=1.6.0) release.\n", + "In contrast, we will use recently added [`torch.amp`](https://pytorch.org/docs/master/notes/amp_examples.html#working-with-multiple-models-losses-and-optimizers) module to perform automatic mixed precision training instead of using Nvidia/Apex package. This module is available in pytorch (>=1.6.0) release.\n", "\n", "\n", "### CycleGAN in a Nutshell\n", @@ -67,14 +40,14 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "eqe1kXPcXj1U" }, + "outputs": [], "source": [ "!nvidia-smi" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -89,15 +62,15 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "l3LdmHAuFRZa" }, + "outputs": [], "source": [ "!wget https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/horse2zebra.zip -O/tmp/horse2zebra.zip\n", "!7z x /tmp/horse2zebra.zip -o/tmp/" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -115,14 +88,14 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "3c6PHUZeFRZu" }, + "outputs": [], "source": [ "!pip install --upgrade --pre torch torchvision -f https://download.pytorch.org/whl/nightly/cu101/torch_nightly.html" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -135,33 +108,35 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "dWN63EToZA-G" }, + "outputs": [], "source": [ "!pip install --pre pytorch-ignite" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "p8M6GlpmQ5jZ" }, + "outputs": [], "source": [ "import torch\n", "import ignite\n", "torch.__version__, ignite.__version__" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "d3LCqCufFRZ6" }, + "outputs": [], "source": [ "import random\n", "import torch\n", @@ -169,9 +144,7 @@ "seed = 17\n", "random.seed(seed)\n", "_ = torch.manual_seed(seed)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -186,9 +159,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "nOr3nd4qFRaB" }, + "outputs": [], "source": [ "from torch.utils.data import Dataset, DataLoader\n", "from PIL import Image\n", @@ -206,15 +181,15 @@ " \n", " def __getitem__(self, i):\n", " return Image.open(self.images[i]).convert('RGB')" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "byG73rBHFRaG" }, + "outputs": [], "source": [ "from pathlib import Path\n", "\n", @@ -225,9 +200,7 @@ "\n", "test_A = FilesDataset(root / \"testA\") \n", "test_B = FilesDataset(root / \"testB\")" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -240,54 +213,56 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "UZ4rY5S9FRaL" }, + "outputs": [], "source": [ "print(\"Dataset sizes: \\ntrain A: {} | B: {}\\ntest A: {} | B: {}\\n\\t\".format(len(train_A), len(train_B), len(test_A), len(test_B)))" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "R4lyZ6GSFRaR" }, + "outputs": [], "source": [ "print(\"Train random image sizes (A): {}, {}, {}, {}\".format(train_A[0].size, train_A[1].size, train_A[10].size, train_A[-1].size))" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "e-61gbfVFRaX" }, + "outputs": [], "source": [ "print(\"Train random image sizes (B): {}, {}, {}, {}\".format(train_B[0].size, train_B[1].size, train_B[10].size, train_B[-1].size))" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "fAxkND8jFRac" }, + "outputs": [], "source": [ "import matplotlib.pylab as plt\n", "%matplotlib inline" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "Xx7cAEI0FRah" }, + "outputs": [], "source": [ "plt.figure(figsize=(10, 5))\n", "plt.subplot(121)\n", @@ -296,15 +271,15 @@ "plt.subplot(122)\n", "plt.title(\"Train dataset 'Zebras'\")\n", "plt.imshow(train_B[10])" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "S_v_FuvZFRao" }, + "outputs": [], "source": [ "plt.figure(figsize=(10, 5))\n", "plt.subplot(121)\n", @@ -313,9 +288,7 @@ "plt.subplot(122)\n", "plt.title(\"Test dataset 'Zebras'\")\n", "plt.imshow(test_B[0])" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -328,9 +301,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "B699THXXFRat" }, + "outputs": [], "source": [ "import random\n", "\n", @@ -365,27 +340,27 @@ " \n", " def __getitem__(self, i):\n", " return {k: self.transform(v) for k, v in self.dataset[i].items()}" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "RF6OQwxoFRax" }, + "outputs": [], "source": [ "train_ab_ds = Image2ImageDataset(train_A, train_B)\n", "test_ab_ds = Image2ImageDataset(test_A, test_B)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "eIcsATwrFRa1" }, + "outputs": [], "source": [ "dp = train_ab_ds[20]\n", "\n", @@ -396,15 +371,15 @@ "plt.subplot(122)\n", "plt.title(\"Train dataset 'Zebras'\")\n", "plt.imshow(dp['B'])" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "X3zfGZoOFRa6" }, + "outputs": [], "source": [ "dp = test_ab_ds[20]\n", "\n", @@ -415,15 +390,15 @@ "plt.subplot(122)\n", "plt.title(\"Test dataset 'Zebras'\")\n", "plt.imshow(dp['B'])" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "q2Hda4tjFRa-" }, + "outputs": [], "source": [ "from torchvision.transforms import Compose, ColorJitter, RandomHorizontalFlip, ToTensor, Normalize, RandomCrop\n", "\n", @@ -449,15 +424,15 @@ "transformed_test_ab_ds = TransformedDataset(test_ab_ds, transform=test_transform)\n", "batch_size = 10\n", "test_ab_loader = DataLoader(transformed_test_ab_ds, batch_size=batch_size, shuffle=False, drop_last=False, pin_memory=True, num_workers=4)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "HtfXU9uaFRbB" }, + "outputs": [], "source": [ "import torchvision.utils as vutils\n", "\n", @@ -479,9 +454,7 @@ ")\n", "real_batch = None\n", "torch.cuda.empty_cache()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -501,9 +474,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "Ri6_NvWfFRbG" }, + "outputs": [], "source": [ "import torch\n", "import torch.nn as nn\n", @@ -577,9 +552,7 @@ " x = self.u64(x)\n", " y = self.c7s1_3(x)\n", " return y\n" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -592,17 +565,17 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "rfOTRPt4FRbL" }, + "outputs": [], "source": [ "x = torch.rand(4, 3, 256, 256)\n", "g = Generator()\n", "y = g(x)\n", "y.shape" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -631,9 +604,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "rdb5-RAYFRbR" }, + "outputs": [], "source": [ "def get_conv_inorm_lrelu(in_planes, out_planes, stride=2, negative_slope=0.2):\n", " return nn.Sequential(\n", @@ -663,9 +638,7 @@ " x = self.c512(x)\n", " y = self.last_conv(x)\n", " return y\n" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -678,17 +651,17 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "On4vpLc_FRbV" }, + "outputs": [], "source": [ "x = torch.rand(4, 3, 256, 256)\n", "d = Discriminator()\n", "y = d(x)\n", "y.shape" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -701,9 +674,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "2vId72FGFRba" }, + "outputs": [], "source": [ "def init_weights(module):\n", " assert isinstance(module, nn.Module)\n", @@ -713,20 +688,18 @@ " torch.nn.init.constant_(module.bias, 0.0)\n", " for c in module.children():\n", " init_weights(c)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "IMoUXZ5yFRbd" }, + "outputs": [], "source": [ "g = None; d = None" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -739,21 +712,23 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "ozU5CQ9JFRbh" }, + "outputs": [], "source": [ "assert torch.backends.cudnn.enabled\n", "torch.backends.cudnn.benchmark = True" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "-zZ5ZuXGFRbl" }, + "outputs": [], "source": [ "device = \"cuda\"\n", "\n", @@ -767,9 +742,7 @@ "init_weights(generator_B2A)\n", "discriminator_A = Discriminator().to(device)\n", "init_weights(discriminator_A)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -782,9 +755,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "YbgTEAMCFRbo" }, + "outputs": [], "source": [ "from itertools import chain\n", "import torch.optim as optim\n", @@ -794,9 +769,7 @@ "\n", "optimizer_G = optim.Adam(chain(generator_A2B.parameters(), generator_B2A.parameters()), lr=lr, betas=(beta1, 0.999))\n", "optimizer_D = optim.Adam(chain(discriminator_A.parameters(), discriminator_B.parameters()), lr=lr, betas=(beta1, 0.999))" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -810,17 +783,17 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "p82seQ9JFRbs" }, + "outputs": [], "source": [ "def toggle_grad(model, on_or_off):\n", " # https://github.com/ajbrock/BigGAN-PyTorch/blob/master/utils.py#L674\n", " for param in model.parameters():\n", " param.requires_grad = on_or_off" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -835,9 +808,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "jyghMWUPFRbw" }, + "outputs": [], "source": [ "buffer_size = 50\n", "fake_a_buffer = []\n", @@ -860,9 +835,7 @@ " else:\n", " output_batch.append(b)\n", " return torch.cat(output_batch, dim=0)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -887,16 +860,18 @@ "id": "JE8dLeEfIl_Z" }, "source": [ - "We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.cuda.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.cuda.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)." + "We will use [`torch.amp.autocast`](https://pytorch.org/docs/master/amp.html#torch.amp.autocast) and [`torch.amp.GradScaler`](https://pytorch.org/docs/master/amp.html#torch.amp.GradScaler) to perform automatic mixed precision training. Our code follows a [typical mixed precision training example](https://pytorch.org/docs/master/notes/amp_examples.html#typical-mixed-precision-training)." ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "vrJls4p-FRcA" }, + "outputs": [], "source": [ - "from torch.cuda.amp import GradScaler\n", + "from torch.amp import GradScaler\n", "from torch.amp import autocast\n", "\n", "from ignite.utils import convert_tensor\n", @@ -999,9 +974,7 @@ " \"loss_discriminator_a\": loss_a.item(),\n", " \"loss_discriminator_b\": loss_b.item(),\n", " }\n" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -1014,9 +987,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "rLZextmDzzw_" }, + "outputs": [], "source": [ "real_batch = next(iter(train_ab_loader))\n", "\n", @@ -1026,9 +1001,7 @@ "torch.cuda.empty_cache()\n", "\n", "res" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -1050,36 +1023,38 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "camPNT4TcCFu" }, + "outputs": [], "source": [ "!pip install --upgrade wandb\n", "# !wandb login your-token" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "TO58FENsFRcM" }, + "outputs": [], "source": [ "from ignite.engine import Engine, Events\n", "from ignite.metrics import RunningAverage\n", "\n", "from ignite.handlers import TensorboardLogger, WandBLogger\n", "from ignite.handlers.tensorboard_logger import OutputHandler, OptimizerParamsHandler" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "jbeZjaBtFRcO" }, + "outputs": [], "source": [ "from functools import partial\n", "\n", @@ -1101,15 +1076,15 @@ "for name in metric_names:\n", " # here we cannot use lambdas as they do not store argument `name`\n", " RunningAverage(output_transform=partial(output_transform, name=name)).attach(trainer, name)\n" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "qODNF0imFRcQ" }, + "outputs": [], "source": [ "from datetime import datetime\n", "\n", @@ -1121,15 +1096,15 @@ " event_name=Events.ITERATION_COMPLETED)\n", "\n", "print(\"Experiment name: \", exp_name)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "ljTkKgMYFRcT" }, + "outputs": [], "source": [ "from pathlib import Path\n", "\n", @@ -1147,9 +1122,7 @@ " )\n", "except RuntimeError:\n", " wb_logger = None" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -1162,9 +1135,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "rCcdC6q9FRcV" }, + "outputs": [], "source": [ "from ignite.engine import Engine\n", "\n", @@ -1193,15 +1168,15 @@ "\n", "\n", "evaluator = Engine(evaluate_fn)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "UIecBsPKFRcX" }, + "outputs": [], "source": [ "from torch.utils.data import Subset\n", "\n", @@ -1217,15 +1192,15 @@ "\n", "eval_train_loader = DataLoader(small_train_ds, batch_size=eval_batch_size, shuffle=False, drop_last=False, pin_memory=True, num_workers=4)\n", "eval_test_loader = DataLoader(small_test_ds, batch_size=eval_batch_size, shuffle=False, drop_last=False, pin_memory=True, num_workers=4)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "H6XkEchHFRca" }, + "outputs": [], "source": [ "@trainer.on(Events.EPOCH_STARTED)\n", "def run_evaluation(engine):\n", @@ -1268,9 +1243,7 @@ "tb_logger.attach(evaluator,\n", " log_handler=log_generated_images, \n", " event_name=Events.COMPLETED)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -1283,9 +1256,11 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "SSIcXzReFRcc" }, + "outputs": [], "source": [ "from ignite.handlers import PiecewiseLinear, ParamGroupScheduler\n", "\n", @@ -1308,9 +1283,7 @@ "tb_logger.attach(trainer,\n", " log_handler=OptimizerParamsHandler(optimizer_G, \"lr\"), \n", " event_name=Events.EPOCH_STARTED)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -1323,20 +1296,22 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "7ZONS845FRcg" }, + "outputs": [], "source": [ "from ignite.handlers import ModelCheckpoint, TerminateOnNan" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "F-emWk-YFRci" }, + "outputs": [], "source": [ "!rm -rf \"/tmp/cycle_gan_checkpoints\" \n", "!mkdir \"/tmp/cycle_gan_checkpoints\"\n", @@ -1358,15 +1333,15 @@ "\n", "trainer.add_event_handler(Events.ITERATION_COMPLETED(every=500), checkpoint_handler, to_save)\n", "trainer.add_event_handler(Events.ITERATION_COMPLETED, TerminateOnNan())" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "RtQKr6yxFRck" }, + "outputs": [], "source": [ "from ignite.handlers import ProgressBar\n", "\n", @@ -1375,45 +1350,43 @@ "# Epoch-wise progress bar with display of training losses\n", "ProgressBar(persist=True, bar_format=\"\").attach(trainer, metric_names=['loss_discriminators', 'loss_generators'], \n", " event_name=Events.EPOCH_STARTED, closing_event_name=Events.COMPLETED)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "bM-6vr8pcmOW" }, + "outputs": [], "source": [ "# Display in Firefox may not work properly. Use Chrome.\n", "%load_ext tensorboard\n", "\n", "%tensorboard --logdir=/tmp/cycle_gan_horse2zebra_tb_logs" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "CcxhA9rHFRcn" }, + "outputs": [], "source": [ "trainer.run(train_ab_loader, max_epochs=200)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "XtXSfbHqFRct" }, + "outputs": [], "source": [ "tb_logger.close()" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -1428,47 +1401,49 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "2xNAdA-WFRcx" }, + "outputs": [], "source": [ "!ls /tmp/cycle_gan_checkpoints/" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "TOhSsWRzFRcz" }, + "outputs": [], "source": [ "checkpoint_path = \"/tmp/cycle_gan_checkpoints/checkpoint_26500.pt\"\n", "\n", "# let's save this checkpoint to W&B\n", "if wb_logger is not None:\n", " wb_logger.save(checkpoint_path)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "j5BFfZceFRc2" }, + "outputs": [], "source": [ "checkpoint_state_dict = torch.load(checkpoint_path)\n", "generator_A2B.load_state_dict(checkpoint_state_dict[\"generator_A2B\"])" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "VGLb14xyFRc4" }, + "outputs": [], "source": [ "def normalize(x):\n", " vmin = x.min()\n", @@ -1476,15 +1451,15 @@ " x.clamp_(min=vmin, max=vmax)\n", " x.add_(-vmin).div_(vmax - vmin + 1e-5)\n", " return x" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "BO_DBOhSFRc7" }, + "outputs": [], "source": [ "i = random.randint(0, len(test_ab_ds) - 1)\n", "img = test_ab_ds[i]['A']\n", @@ -1497,15 +1472,15 @@ " \n", "\n", "img_pred = (255 * normalize(y_pred[0, ...])).cpu().numpy().transpose((1, 2, 0)).astype('uint8')" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "vseNrx2YFRc-" }, + "outputs": [], "source": [ "plt.figure(figsize=(10, 5))\n", "plt.subplot(121)\n", @@ -1514,9 +1489,7 @@ "plt.subplot(122)\n", "plt.title(\"Generated zebra\")\n", "plt.imshow(img_pred)" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "markdown", @@ -1529,20 +1502,22 @@ }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "6BlVkvybFRdA" }, + "outputs": [], "source": [ "!wget https://www.kdnuggets.com/wp-content/uploads/photo.jpg -O/tmp/dl_durus.jpg" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "eB7aAeO1FRdC" }, + "outputs": [], "source": [ "from PIL import Image\n", "\n", @@ -1556,15 +1531,15 @@ "\n", "\n", "img_pred = (255 * normalize(y_pred[0, ...])).cpu().numpy().transpose((1, 2, 0)).astype('uint8')" - ], - "execution_count": null, - "outputs": [] + ] }, { "cell_type": "code", + "execution_count": null, "metadata": { "id": "8Mcypu0lFRdD" }, + "outputs": [], "source": [ "plt.figure(figsize=(15, 8))\n", "plt.subplot(121)\n", @@ -1573,9 +1548,34 @@ "plt.subplot(122)\n", "plt.title(\"Zebras\")\n", "plt.imshow(img_pred)" - ], - "execution_count": null, - "outputs": [] + ] } - ] + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "CycleGAN_with_torch_cuda_amp.ipynb", + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.4" + } + }, + "nbformat": 4, + "nbformat_minor": 0 } diff --git a/examples/references/classification/imagenet/main.py b/examples/references/classification/imagenet/main.py index defb4ddc1510..f2706b9ba181 100644 --- a/examples/references/classification/imagenet/main.py +++ b/examples/references/classification/imagenet/main.py @@ -7,7 +7,7 @@ try: from torch.amp import autocast - from torch.cuda.amp import GradScaler + from torch.amp import GradScaler except ImportError: raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0") diff --git a/examples/references/segmentation/pascal_voc2012/main.py b/examples/references/segmentation/pascal_voc2012/main.py index b6fbc7ad494a..581862a3df50 100644 --- a/examples/references/segmentation/pascal_voc2012/main.py +++ b/examples/references/segmentation/pascal_voc2012/main.py @@ -7,7 +7,7 @@ try: from torch.amp import autocast - from torch.cuda.amp import GradScaler + from torch.amp import GradScaler except ImportError: raise RuntimeError("Please, use recent PyTorch version, e.g. >=1.12.0") diff --git a/examples/transformers/main.py b/examples/transformers/main.py index f8118eabf90e..8eeca9768ac5 100644 --- a/examples/transformers/main.py +++ b/examples/transformers/main.py @@ -8,7 +8,7 @@ import torch.optim as optim import utils from torch.amp import autocast -from torch.cuda.amp import GradScaler +from torch.amp import GradScaler import ignite import ignite.distributed as idist diff --git a/ignite/engine/__init__.py b/ignite/engine/__init__.py index 6e82bc2f6bc7..8bce5e2a40f3 100644 --- a/ignite/engine/__init__.py +++ b/ignite/engine/__init__.py @@ -133,11 +133,11 @@ def supervised_training_step_amp( prepare_batch: Callable = _prepare_batch, model_transform: Callable[[Any], Any] = lambda output: output, output_transform: Callable[[Any, Any, Any, torch.Tensor], Any] = lambda x, y, y_pred, loss: loss.item(), - scaler: Optional["torch.cuda.amp.GradScaler"] = None, + scaler: Optional["torch.amp.GradScaler"] = None, gradient_accumulation_steps: int = 1, model_fn: Callable[[torch.nn.Module, Any], Any] = lambda model, x: model(x), ) -> Callable: - """Factory function for supervised training using ``torch.cuda.amp``. + """Factory function for supervised training using ``torch.amp``. Args: model: the model to train. @@ -170,7 +170,7 @@ def supervised_training_step_amp( model = ... optimizer = ... loss_fn = ... - scaler = torch.cuda.amp.GradScaler(2**10) + scaler = torch.amp.GradScaler(device='cuda', init_scale=2**10) update_fn = supervised_training_step_amp(model, optimizer, loss_fn, 'cuda', scaler=scaler) trainer = Engine(update_fn) @@ -393,8 +393,8 @@ def update(engine: Engine, batch: Sequence[torch.Tensor]) -> Union[Any, Tuple[to def _check_arg( - on_tpu: bool, on_mps: bool, amp_mode: Optional[str], scaler: Optional[Union[bool, "torch.cuda.amp.GradScaler"]] -) -> Tuple[Optional[str], Optional["torch.cuda.amp.GradScaler"]]: + on_tpu: bool, on_mps: bool, amp_mode: Optional[str], scaler: Optional[Union[bool, "torch.amp.GradScaler"]] +) -> Tuple[Optional[str], Optional["torch.amp.GradScaler"]]: """Checking tpu, mps, amp and GradScaler instance combinations.""" if on_mps and amp_mode: raise ValueError("amp_mode cannot be used with mps device. Consider using amp_mode=None or device='cuda'.") @@ -410,10 +410,10 @@ def _check_arg( raise ValueError(f"scaler argument is {scaler}, but amp_mode is {amp_mode}. Consider using amp_mode='amp'.") elif amp_mode == "amp" and isinstance(scaler, bool): try: - from torch.cuda.amp import GradScaler + from torch.amp import GradScaler except ImportError: raise ImportError("Please install torch>=1.6.0 to use scaler argument.") - scaler = GradScaler(enabled=True) + scaler = GradScaler(device='cuda', enabled=True) if on_tpu: return "tpu", None @@ -434,7 +434,7 @@ def create_supervised_trainer( output_transform: Callable[[Any, Any, Any, torch.Tensor], Any] = lambda x, y, y_pred, loss: loss.item(), deterministic: bool = False, amp_mode: Optional[str] = None, - scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False, + scaler: Union[bool, "torch.amp.GradScaler"] = False, gradient_accumulation_steps: int = 1, model_fn: Callable[[torch.nn.Module, Any], Any] = lambda model, x: model(x), ) -> Engine: @@ -459,7 +459,7 @@ def create_supervised_trainer( :class:`~ignite.engine.deterministic.DeterministicEngine`, otherwise :class:`~ignite.engine.engine.Engine` (default: False). amp_mode: can be ``amp`` or ``apex``, model and optimizer will be casted to float16 using - `torch.cuda.amp `_ for ``amp`` and + `torch.amp `_ for ``amp`` and using `apex `_ for ``apex``. (default: None) scaler: GradScaler instance for gradient scaling if `torch>=1.6.0` and ``amp_mode`` is ``amp``. If ``amp_mode`` is ``apex``, this argument will be ignored. @@ -689,7 +689,7 @@ def supervised_evaluation_step_amp( model_fn: Callable[[torch.nn.Module, Any], Any] = lambda model, x: model(x), ) -> Callable: """ - Factory function for supervised evaluation using ``torch.cuda.amp``. + Factory function for supervised evaluation using ``torch.amp``. Args: model: the model to train. @@ -771,7 +771,7 @@ def create_supervised_evaluator( to be assigned to engine's state.output after each iteration. Default is returning `(y_pred, y,)` which fits output expected by metrics. If you change it you should use `output_transform` in metrics. amp_mode: can be ``amp``, model will be casted to float16 using - `torch.cuda.amp `_ + `torch.amp `_ model_fn: the model function that receives `model` and `x`, and returns `y_pred`. Returns: diff --git a/tests/ignite/engine/test_create_supervised.py b/tests/ignite/engine/test_create_supervised.py index ba42baddddae..6122ebffbce9 100644 --- a/tests/ignite/engine/test_create_supervised.py +++ b/tests/ignite/engine/test_create_supervised.py @@ -48,7 +48,7 @@ def _default_create_supervised_trainer( trainer_device: Optional[str] = None, trace: bool = False, amp_mode: str = None, - scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False, + scaler: Union[bool, "torch.amp.GradScaler"] = False, with_model_transform: bool = False, with_model_fn: bool = False, ): @@ -104,7 +104,7 @@ def _test_create_supervised_trainer( trainer_device: Optional[str] = None, trace: bool = False, amp_mode: str = None, - scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False, + scaler: Union[bool, "torch.amp.GradScaler"] = False, with_model_transform: bool = False, with_model_fn: bool = False, ): @@ -170,10 +170,10 @@ def _(): @pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0") def test_create_supervised_training_scalar_assignment(): with mock.patch("ignite.engine._check_arg") as check_arg_mock: - check_arg_mock.return_value = None, torch.cuda.amp.GradScaler(enabled=False) + check_arg_mock.return_value = None, torch.amp.GradScaler(device='cuda',enabled=False) trainer, _ = _default_create_supervised_trainer(model_device="cpu", trainer_device="cpu", scaler=True) assert hasattr(trainer.state, "scaler") - assert isinstance(trainer.state.scaler, torch.cuda.amp.GradScaler) + assert isinstance(trainer.state.scaler, torch.amp.GradScaler) def _test_create_mocked_supervised_trainer( @@ -181,7 +181,7 @@ def _test_create_mocked_supervised_trainer( trainer_device: Optional[str] = None, trace: bool = False, amp_mode: str = None, - scaler: Union[bool, "torch.cuda.amp.GradScaler"] = False, + scaler: Union[bool, "torch.amp.GradScaler"] = False, ): with mock.patch("ignite.engine.supervised_training_step_amp") as training_step_amp_mock: with mock.patch("ignite.engine.supervised_training_step_apex") as training_step_apex_mock: @@ -446,7 +446,7 @@ def test_create_supervised_trainer_apex_error(): def mock_torch_cuda_amp_module(): with patch.dict( "sys.modules", - {"torch.amp": None, "torch.cuda.amp": None, "torch.amp.autocast_mode": None}, + {"torch.amp": None, "torch.amp": None, "torch.amp.autocast_mode": None}, ): yield torch @@ -462,7 +462,7 @@ def test_create_supervised_trainer_amp_error(mock_torch_cuda_amp_module): @pytest.mark.skipif(Version(torch.__version__) < Version("1.12.0"), reason="Skip if < 1.12.0") def test_create_supervised_trainer_scaler_not_amp(): - scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available()) + scaler = torch.amp.GradScaler(enabled=torch.cuda.is_available()) with pytest.raises(ValueError, match=f"scaler argument is {scaler}, but amp_mode is None."): _test_create_supervised_trainer(amp_mode=None, scaler=scaler) @@ -540,7 +540,7 @@ def test_create_supervised_trainer_on_cuda_amp_scaler(): _test_create_mocked_supervised_trainer( model_device=model_device, trainer_device=trainer_device, amp_mode="amp", scaler=True ) - scaler = torch.cuda.amp.GradScaler(enabled=torch.cuda.is_available()) + scaler = torch.amp.GradScaler(enabled=torch.cuda.is_available()) _test_create_supervised_trainer( gradient_accumulation_steps=1, model_device=model_device,