opendilab
diff --git a/‎.github/workflows/test.yml‎
Lines changed: 27 additions & 1 deletion b/‎.github/workflows/test.yml‎
Lines changed: 27 additions & 1 deletion
diff --git a/‎ditk/distributed/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎ditk/distributed/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎ditk/distributed/env.py‎
Lines changed: 110 additions & 0 deletions b/‎ditk/distributed/env.py‎
Lines changed: 110 additions & 0 deletions
diff --git a/‎ditk/logging/rich.py‎
Lines changed: 141 additions & 3 deletions b/‎ditk/logging/rich.py‎
Lines changed: 141 additions & 3 deletions
@@ -21,6 +21,23 @@ jobs:
           - '3.11'
           - '3.12'
           - '3.13'
+        torch-version:
+          - 'none'
+          - '2.4'
+          - '2.7'
+        exclude:
+          - python-version: '3.8'
+            torch-version: '2.7'
+          - python-version: '3.9'
+            torch-version: '2.4'
+          - python-version: '3.10'
+            torch-version: '2.4'
+          - python-version: '3.11'
+            torch-version: '2.4'
+          - python-version: '3.12'
+            torch-version: '2.4'
+          - python-version: '3.13'
+            torch-version: '2.4'
 
     steps:
       - name: Get system version for Linux
@@ -77,8 +94,17 @@ jobs:
         uses: actions/setup-python@v6
         with:
           python-version: ${{ matrix.python-version }}
-      - name: Install dependencies
+      - name: Install dependencies With Torch
         shell: bash
+        if: ${{ matrix.torch-version != 'none' }}
+        run: |
+          python -m pip install --upgrade pip
+          pip install --upgrade flake8 setuptools wheel twine
+          pip install 'torch==${{ matrix.torch-version }}' -r requirements.txt
+          pip install -r requirements-test.txt
+      - name: Install dependencies Without Torch
+        shell: bash
+        if: ${{ matrix.torch-version == 'none' }}
         run: |
           python -m pip install --upgrade pip
           pip install --upgrade flake8 setuptools wheel twine
 
@@ -0,0 +1 @@
+from .env import is_main_process, is_distributed, get_rank, get_world_size
@@ -0,0 +1,110 @@
+"""
+Distributed training utilities for PyTorch.
+
+This module provides utility functions to handle distributed training scenarios in PyTorch.
+It offers convenient methods to check distributed status, get process information, and
+determine the main process. The functions gracefully handle both distributed and
+non-distributed environments.
+
+Example::
+    >>> # Check if distributed training is active
+    >>> if is_distributed():
+    ...     print(f"Running on rank {get_rank()} of {get_world_size()}")
+    >>> 
+    >>> # Execute code only on main process
+    >>> if is_main_process():
+    ...     print("This runs only on the main process")
+"""
+
+
+def is_distributed() -> bool:
+    """
+    Check if distributed training is available and initialized.
+
+    This function verifies whether PyTorch distributed training is both available
+    (compiled with distributed support) and properly initialized. It handles cases
+    where PyTorch or its distributed module might not be installed.
+
+    :return: True if distributed training is available and initialized, False otherwise.
+    :rtype: bool
+
+    Example::
+        >>> if is_distributed():
+        ...     print("Distributed training is active")
+        ... else:
+        ...     print("Running in single-process mode")
+    """
+    try:
+        import torch
+        import torch.distributed as dist
+    except (ImportError, ModuleNotFoundError):
+        return False
+
+    # Check if distributed is available (compiled with distributed support) and is initialized
+    return dist.is_available() and dist.is_initialized()
+
+
+def get_rank() -> int:
+    """
+    Get the global rank of the current process.
+
+    Returns the global rank (process ID) of the current process in distributed training.
+    In non-distributed environments, this function returns 0, making it safe to use
+    in both distributed and single-process scenarios.
+
+    :return: Global rank of the current process. Returns 0 if distributed training is not active.
+    :rtype: int
+
+    Example::
+        >>> rank = get_rank()
+        >>> print(f"Current process rank: {rank}")
+    """
+    if is_distributed():
+        import torch.distributed as dist
+        return dist.get_rank()
+    else:
+        return 0
+
+
+def get_world_size() -> int:
+    """
+    Get the total number of processes across all nodes.
+
+    Returns the total number of processes participating in distributed training.
+    In non-distributed environments, this function returns 1, ensuring consistent
+    behavior across different training setups.
+
+    :return: Total number of processes in the distributed training. Returns 1 if distributed training is not active.
+    :rtype: int
+
+    Example::
+        >>> world_size = get_world_size()
+        >>> print(f"Total number of processes: {world_size}")
+    """
+    if is_distributed():
+        import torch.distributed as dist
+        return dist.get_world_size()
+    else:
+        return 1
+
+
+# Utility functions for easier usage
+def is_main_process() -> bool:
+    """
+    Check if the current process is the main process (global rank 0).
+
+    This function is useful for executing code that should only run once across
+    all processes, such as logging, saving checkpoints, or printing progress.
+    In non-distributed environments, it always returns True.
+
+    :return: True if current process is main process (rank 0) or if distributed is not available.
+    :rtype: bool
+
+    Example::
+        >>> if is_main_process():
+        ...     print("Saving model checkpoint...")
+        ...     # Save checkpoint logic here
+    """
+    if not is_distributed():
+        return True
+    return get_rank() == 0
@@ -1,38 +1,176 @@
+"""
+Rich logging utilities for distributed training environments.
+
+This module provides enhanced logging capabilities using the Rich library,
+with special support for distributed training scenarios. It includes utilities
+for creating properly formatted console outputs with terminal width detection,
+distributed rank information, and rich text formatting.
+
+The module automatically detects distributed training environments and can
+include rank information in log messages to help distinguish between different
+processes in multi-GPU or multi-node training setups.
+"""
+
 import logging
+import os
 import shutil
 from functools import lru_cache
+from typing import Optional
 
 from rich.console import Console
 from rich.logging import RichHandler
 
 import ditk
 from .base import _LogLevelType
+from ..distributed import is_distributed, get_rank, get_world_size
 
 # This value is set due the requirement of displaying the tables
 _DEFAULT_WIDTH = 170
 
 
 @lru_cache()
 def _get_terminal_width() -> int:
+    """
+    Get the current terminal width with caching for performance.
+
+    This function detects the terminal width and caches the result to avoid
+    repeated system calls. It falls back to a default width if terminal
+    size detection fails.
+
+    :return: The terminal width in characters.
+    :rtype: int
+
+    Example::
+        >>> width = _get_terminal_width()
+        >>> print(f"Terminal width: {width}")
+        Terminal width: 170
+    """
     width, _ = shutil.get_terminal_size(fallback=(_DEFAULT_WIDTH, 24))
     return width
 
 
 @lru_cache()
 def _get_rich_console(use_stdout: bool = False) -> Console:
+    """
+    Create and cache a Rich Console instance with appropriate configuration.
+
+    This function creates a Rich Console with the detected terminal width
+    and configures output to stderr by default (or stdout if specified).
+    The result is cached to ensure consistent console usage across the application.
+
+    :param use_stdout: Whether to use stdout instead of stderr for output.
+    :type use_stdout: bool
+
+    :return: A configured Rich Console instance.
+    :rtype: Console
+
+    Example::
+        >>> console = _get_rich_console()
+        >>> console.print("Hello, World!")
+        Hello, World!
+    """
     return Console(width=_get_terminal_width(), stderr=not use_stdout)
 
 
-_RICH_FMT = logging.Formatter(fmt="%(message)s", datefmt="[%m-%d %H:%M:%S]")
+def _get_log_format(
+        include_distributed: bool = True,
+        distributed_format: str = "[Rank {rank}/{world_size}][PID: {pid}]"
+) -> str:
+    """
+    Get the appropriate log format based on distributed training status.
+
+    This function generates a logging format string that optionally includes
+    distributed training information such as rank and world size. When
+    distributed training is detected and enabled, it prepends rank information
+    to log messages to help identify which process generated each log entry.
+
+    :param include_distributed: Whether to include distributed information in the format.
+    :type include_distributed: bool
+    :param distributed_format: Format string template for distributed info, should contain
+                              {rank} and {world_size} placeholders.
+    :type distributed_format: str
 
+    :return: Format string for logging that includes distributed info if applicable.
+    :rtype: str
+
+    Example::
+        >>> # In a distributed environment
+        >>> format_str = _get_log_format(include_distributed=True)
+        >>> print(format_str)
+        [Rank 0/4] %(message)s
+
+        >>> # Without distributed info
+        >>> format_str = _get_log_format(include_distributed=False)
+        >>> print(format_str)
+        %(message)s
+    """
+    if include_distributed and is_distributed():
+        rank = get_rank()
+        world_size = get_world_size()
+        prefix = distributed_format.format(rank=rank, world_size=world_size, pid=os.getpid())
+        return f"{prefix} %(message)s"
+    else:
+        return "%(message)s"
+
+
+def _create_rich_handler(
+        use_stdout: bool = False,
+        level: _LogLevelType = logging.NOTSET,
+        include_distributed: bool = True,
+        distributed_format: Optional[str] = None
+) -> RichHandler:
+    """
+    Create a Rich handler with optional distributed training information.
+
+    This function creates a fully configured RichHandler that provides
+    enhanced logging output with rich text formatting, traceback highlighting,
+    and optional distributed training rank information. The handler is
+    configured with appropriate formatters and console settings.
+
+    :param use_stdout: Whether to use stdout instead of stderr for log output.
+    :type use_stdout: bool
+    :param level: Logging level threshold for this handler.
+    :type level: _LogLevelType
+    :param include_distributed: Whether to include distributed rank information
+                               in log messages when running in distributed mode.
+    :type include_distributed: bool
+    :param distributed_format: Custom format template for distributed info.
+                              If None, uses a default Rich markup format with
+                              bold blue styling. Should contain {rank} and
+                              {world_size} placeholders.
+    :type distributed_format: Optional[str]
+
+    :return: A configured RichHandler instance ready for use with Python logging.
+    :rtype: RichHandler
+
+    Example::
+        >>> # Create a basic rich handler
+        >>> handler = _create_rich_handler()
+        >>> logger = logging.getLogger("my_logger")
+        >>> logger.addHandler(handler)
+        >>> logger.info("This will be beautifully formatted!")
+
+        >>> # Create handler with custom distributed format
+        >>> handler = _create_rich_handler(
+        ...     distributed_format="[Process {rank}]",
+        ...     level=logging.INFO
+        ... )
+    """
+    if distributed_format is None:
+        distributed_format = "[bold blue]\\[Rank {rank}/{world_size}][/bold blue][bold blue]\\[PID: {pid}][/bold blue]"  # Rich markup support
+
+    # Dynamically create formatter with distributed information
+    rich_fmt = logging.Formatter(
+        fmt=_get_log_format(include_distributed, distributed_format),
+        datefmt="[%m-%d %H:%M:%S]"
+    )
 
-def _create_rich_handler(use_stdout: bool = False, level: _LogLevelType = logging.NOTSET) -> RichHandler:
     handler = RichHandler(
         level=level,
         console=_get_rich_console(use_stdout),
         rich_tracebacks=True,
         markup=True,
         tracebacks_suppress=[ditk],
     )
-    handler.setFormatter(_RICH_FMT)
+    handler.setFormatter(rich_fmt)
     return handler
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+from .env import is_main_process, is_distributed, get_rank, get_world_size`