diff --git a/captum/attr/_gnn/README.md b/captum/attr/_gnn/README.md
new file mode 100644
index 0000000000..d1099c378b
--- /dev/null
+++ b/captum/attr/_gnn/README.md
@@ -0,0 +1,18 @@
+# GNN Explainability in Captum
+
+This directory contains implementations of Graph Neural Network (GNN) explainability methods within the Captum library. These methods aim to provide insights into the predictions made by GNN models.
+
+## Implemented Methods
+
+### GNNExplainer
+-   **Description:** GNNExplainer is a model-agnostic method that identifies a compact subgraph structure and a small subset of node features that are influential for a GNN's prediction.
+-   **Reference:** [GNNExplainer: Generating Explanations for Graph Neural Networks](https://arxiv.org/abs/1903.03894)
+
+### PGExplainer
+-   **Description:** PGExplainer (Parameterized Explainer) is a method that trains a parameterized explainer network to generate explanations (edge masks) for GNN predictions.
+-   **Reference:** [Parameterized Explainer for Graph Neural Network](https://arxiv.org/abs/2011.04573)
+    Note: The reference link for PGExplainer in the prompt (2011.04573) seems to be for a different paper. The common one is often https://arxiv.org/abs/2004.11990. I will use the one from the prompt. If this is incorrect, it should be updated.
+
+## Usage
+
+Please refer to the Captum documentation and example notebooks for detailed instructions on how to use these GNN explainability methods.
diff --git a/captum/attr/_gnn/gnn_explainer.py b/captum/attr/_gnn/gnn_explainer.py
new file mode 100644
index 0000000000..438f7cd800
--- /dev/null
+++ b/captum/attr/_gnn/gnn_explainer.py
@@ -0,0 +1,311 @@
+import inspect
+import warnings
+from typing import Any, Callable, Dict, Optional, Tuple
+
+import torch
+import torch.nn.functional as F
+from captum.attr import Attribution
+from captum.log import log_usage
+from torch import Tensor
+from torch.nn import Parameter
+
+
+DEFAULT_COEFFS = {
+    "edge_size": 0.005,
+    "edge_ent": 1.0,
+    "node_feat_size": 1.0,
+    "node_feat_ent": 0.1,
+}
+
+
+class GNNExplainer(Attribution):
+    r"""
+    GNNExplainer a model interpretability technique for Graph Neural Networks
+    (GNNs) as described in the paper:
+    `GNNExplainer: Generating Explanations for Graph Neural Networks
+    <https://arxiv.org/abs/1903.03894>`_
+
+    GNNExplainer aims to identify a compact subgraph structure and a small
+    subset of node features that are influential to the GNN's prediction.
+    """
+
+    def __init__(self, model: Callable) -> None:
+        r"""
+        Args:
+            model (Callable): The GNN model that is being explained.
+                    The model should take at least two inputs:
+                    node features (inputs) and edge_index.
+                    It should return a single tensor output.
+                    The model can optionally take edge_weights as a third input.
+                    If edge_weights are used, they should be multiplied by the
+                    learned edge_mask.
+        """
+        self.model = model
+        super().__init__(model)
+
+    def _init_masks(self, inputs: Tensor, edge_index: Tensor) -> None:
+        """Initialize learnable masks."""
+        num_nodes, num_node_feats = inputs.shape
+        num_edges = edge_index.shape[1]
+
+        self.node_feat_mask = Parameter(torch.randn(num_node_feats) * 0.1)
+        # Edge mask is initialized for each edge.
+        self.edge_mask = Parameter(torch.randn(num_edges) * 0.1)
+
+
+    def _clear_masks(self) -> None:
+        """Clear masks that were stored as attributes."""
+        self.node_feat_mask = None
+        self.edge_mask = None
+
+
+    def _get_masked_inputs(
+        self,
+        inputs: Tensor,
+        edge_index: Tensor,
+        edge_mask_value: Tensor,
+        node_feat_mask_value: Tensor,
+        apply_sigmoid: bool = True,
+    ) -> Tuple[Tensor, Tensor]:
+        """
+        Applies the masks to the inputs and edge_index.
+        Assumes edge_mask_value and node_feat_mask_value are the raw parameters.
+        """
+        if apply_sigmoid:
+            node_feat_m = torch.sigmoid(node_feat_mask_value)
+            edge_m = torch.sigmoid(edge_mask_value)
+        else:
+            node_feat_m = node_feat_mask_value
+            edge_m = edge_mask_value
+
+        masked_inputs = inputs * node_feat_m
+        # The edge_mask is used as edge_weights in the forward pass
+        return masked_inputs, edge_m
+
+
+    def _loss_fn(
+        self,
+        masked_pred: Tensor,
+        original_pred: Tensor,
+        edge_mask_value: Tensor, # raw mask before sigmoid
+        node_feat_mask_value: Tensor, # raw mask before sigmoid
+        coeffs: Dict[str, float],
+        target_class: Optional[int] = None,
+    ) -> Tensor:
+        """
+        Computes the loss for GNNExplainer.
+        """
+        if target_class is None:
+            # Use the predicted class if not specified
+            target_class = torch.argmax(original_pred, dim=-1)
+
+        # 1. Prediction Loss (Negative Log-Likelihood for the target class)
+        # Ensure masked_pred is in log scale if model output isn't already
+        # Assuming model output is raw logits, apply log_softmax
+        log_probs = F.log_softmax(masked_pred, dim=-1)
+        pred_loss = -log_probs[0, target_class] # Assuming batch size 1 or explaining for one node
+
+        # 2. Edge Mask Sparsity Loss
+        edge_m = torch.sigmoid(edge_mask_value)
+        loss_edge_size = coeffs["edge_size"] * torch.sum(edge_m)
+
+        # 3. Edge Mask Entropy Loss (to encourage binary values)
+        ent_edge = -edge_m * torch.log2(edge_m + 1e-12) - (1 - edge_m) * torch.log2(
+            1 - edge_m + 1e-12
+        )
+        loss_edge_ent = coeffs["edge_ent"] * torch.mean(ent_edge)
+
+
+        # 4. Node Feature Mask Sparsity Loss
+        node_feat_m = torch.sigmoid(node_feat_mask_value)
+        loss_node_feat_size = coeffs["node_feat_size"] * torch.sum(node_feat_m)
+
+        # 5. Node Feature Mask Entropy Loss
+        ent_node_feat = -node_feat_m * torch.log2(node_feat_m + 1e-12) - \
+                        (1-node_feat_m) * torch.log2(1-node_feat_m + 1e-12)
+        loss_node_feat_ent = coeffs["node_feat_ent"] * torch.mean(ent_node_feat)
+
+        total_loss = (
+            pred_loss
+            + loss_edge_size
+            + loss_edge_ent
+            + loss_node_feat_size
+            + loss_node_feat_ent
+        )
+        return total_loss
+
+
+    @log_usage()
+    def attribute(
+        self,
+        inputs: Tensor,
+        edge_index: Tensor,
+        target_node: Optional[int] = None, # Specific node to explain
+        target_class: Optional[int] = None, # Specific class to explain
+        num_epochs: int = 100,
+        lr: float = 0.01,
+        coeffs: Optional[Dict[str, float]] = None,
+        **kwargs: Any,
+    ) -> Tuple[Tensor, Tensor]:
+        r"""
+        Explains the GNN's prediction for a given node or graph.
+
+        Args:
+            inputs (Tensor): The node features. Shape: (num_nodes, num_node_features)
+            edge_index (Tensor): The edge index of the graph.
+                        Shape: (2, num_edges)
+            target_node (int, optional): The node for which the explanation
+                        is generated. If None, the explanation is for the
+                        entire graph's prediction. Default: None
+            target_class (int, optional): The specific class for which the
+                        explanation is generated. If None, the class with the
+                        highest prediction score is chosen. Default: None
+            num_epochs (int): The number of epochs to train the masks.
+                        Default: 100
+            lr (float): The learning rate for optimizing the masks.
+                        Default: 0.01
+            coeffs (Dict[str, float], optional): Coefficients for the different
+                        loss terms (edge_size, edge_ent, node_feat_size,
+                        node_feat_ent). Default: Predefined DEFAULT_COEFFS.
+            **kwargs (Any): Additional arguments that are passed to the GNN model
+                        during the forward pass.
+
+        Returns:
+            Tuple[Tensor, Tensor]:
+            - node_feat_mask (Tensor): The learned node feature mask
+                                    Shape: (num_node_features,)
+            - edge_mask (Tensor): The learned edge_mask (attributions for edges)
+                                    Shape: (num_edges,)
+        """
+        if coeffs is None:
+            coeffs = DEFAULT_COEFFS
+
+        self._init_masks(inputs, edge_index)
+        optimizer = torch.optim.Adam([self.node_feat_mask, self.edge_mask], lr=lr)
+
+        # --- Improved edge_weight handling: Check model signature ---
+        model_accepts_edge_weight = False
+        # Check if the model is a torch.nn.Module and has a forward method
+        if isinstance(self.model, torch.nn.Module) and hasattr(self.model, "forward"):
+            sig = inspect.signature(self.model.forward)
+            if "edge_weight" in sig.parameters:
+                model_accepts_edge_weight = True
+        elif callable(self.model) and not isinstance(self.model, torch.nn.Module):
+            # For general callables (not nn.Module), inspect directly.
+            try:
+                sig = inspect.signature(self.model)
+                if "edge_weight" in sig.parameters:
+                    model_accepts_edge_weight = True
+            except ValueError:
+                # Some callables (e.g. built-ins) may not have a signature.
+                pass
+        # --- End of improved edge_weight handling ---
+
+        # Get original model prediction (logits)
+        # For the original prediction, we don't use the edge_mask yet.
+        original_pred = self.model(inputs, edge_index, **kwargs)
+        if isinstance(original_pred, tuple): # Handle models returning multiple outputs
+            original_pred = original_pred[0]
+
+
+        # Determine target_class if not provided
+        if target_class is None:
+            if target_node is not None:
+                # Explain the prediction for the target_node
+                _target_class = torch.argmax(original_pred[target_node]).item()
+            else:
+                # Explain the graph-level prediction (e.g., max of summed node embeddings)
+                # This might require a different handling of original_pred
+                # For now, let's assume original_pred is [num_nodes, num_classes]
+                # and we take the class with max score for the graph.
+                # This part might need refinement based on specific GNN model for graph pred.
+                _target_class = torch.argmax(original_pred.sum(dim=0)).item()
+        else:
+            _target_class = target_class
+
+
+        for epoch in range(num_epochs):
+            optimizer.zero_grad()
+
+            masked_inputs, current_edge_mask_weights = self._get_masked_inputs(
+                inputs,
+                edge_index,
+                self.edge_mask,
+                self.node_feat_mask,
+                apply_sigmoid=True, # Sigmoid is applied within _get_masked_inputs for weights
+            )
+
+            model_kwargs = kwargs.copy()
+            if model_accepts_edge_weight:
+                model_kwargs["edge_weight"] = current_edge_mask_weights
+                masked_pred = self.model(masked_inputs, edge_index, **model_kwargs)
+            else:
+                if epoch == 0 and not getattr(self, "_warned_edge_weight_this_call", False):
+                    warnings.warn(
+                        "The GNN model's forward method does not explicitly accept "
+                        "'edge_weight'. The learned edge mask might not be utilized by "
+                        "the model. Please ensure your model can utilize edge weights "
+                        "for the GNNExplainer to be fully effective.", UserWarning
+                    )
+                    self._warned_edge_weight_this_call = True # Flag for current call
+                masked_pred = self.model(masked_inputs, edge_index, **kwargs)
+
+            if isinstance(masked_pred, tuple):
+                masked_pred = masked_pred[0]
+
+            # We need to decide how to get the relevant prediction for the loss
+            # If target_node is set, use its prediction. Otherwise, use graph-level pred.
+            pred_for_loss = masked_pred
+            original_pred_for_loss = original_pred
+            if target_node is not None:
+                pred_for_loss = masked_pred[target_node].unsqueeze(0)
+                original_pred_for_loss = original_pred[target_node].unsqueeze(0)
+            # If explaining graph and model output is per node, sum up for graph prediction
+            # This is a common approach but might need to be adapted based on the GCN.
+            elif masked_pred.ndim > 1 and masked_pred.shape[0] > 1 : #  num_nodes x num_classes
+                pred_for_loss = masked_pred.sum(dim=0).unsqueeze(0)
+                original_pred_for_loss = original_pred.sum(dim=0).unsqueeze(0)
+
+
+            loss = self._loss_fn(
+                pred_for_loss,
+                original_pred_for_loss,
+                self.edge_mask, # Pass raw mask
+                self.node_feat_mask, # Pass raw mask
+                coeffs,
+                _target_class,
+            )
+
+            loss.backward()
+            optimizer.step()
+
+        final_edge_mask = torch.sigmoid(self.edge_mask).detach()
+        final_node_feat_mask = torch.sigmoid(self.node_feat_mask).detach()
+
+        # Clear the warning flag for this call if it was set
+        if hasattr(self, "_warned_edge_weight_this_call"):
+            delattr(self, "_warned_edge_weight_this_call")
+
+        self._clear_masks() # Clean up masks from attributes
+
+        return final_node_feat_mask, final_edge_mask
+
+
+    def __deepcopy__(self, memo) -> "GNNExplainer":
+        """
+        Custom deepcopy implementation for GNNExplainer.
+        This method is called by `copy.deepcopy`.
+        It ensures that the GNNExplainer instance, including its model,
+        is correctly copied. Learnable masks (node_feat_mask, edge_mask)
+        are not part of the explainer's persistent state as they are
+        initialized within the `attribute` method and cleared afterwards;
+        therefore, they don't need special handling here.
+        """
+        # The `Attribution` base class's __deepcopy__ handles the `model` attribute.
+        # If GNNExplainer had other attributes requiring deep copying,
+        # they would be handled here. For example:
+        # new_copy = self.__class__(self.model)
+        # memo[id(self)] = new_copy
+        # new_copy.some_other_attribute = copy.deepcopy(self.some_other_attribute, memo)
+        # return new_copy
+        return super().__deepcopy__(memo) # type: ignore
diff --git a/captum/attr/_gnn/pg_explainer.py b/captum/attr/_gnn/pg_explainer.py
new file mode 100644
index 0000000000..9317ffdd62
--- /dev/null
+++ b/captum/attr/_gnn/pg_explainer.py
@@ -0,0 +1,302 @@
+import copy
+from typing import Any, Callable, Dict, Optional, Tuple, Union
+
+import torch
+import torch.nn.functional as F
+import torch.optim as optim
+from captum.attr import Attribution
+from captum.log import log_usage
+from torch import Tensor, nn
+
+# Default coefficients for PGExplainer loss terms
+DEFAULT_PGEXPLAINER_COEFFS = {
+    "prediction_ce": 1.0,  # Cross-entropy for fidelity
+    "edge_size": 0.01,     # Sparsity: L1 norm for edge mask
+    "edge_ent": 0.1,       # Entropy for edge mask (encourage binary values)
+}
+
+
+class PGExplainer(Attribution):
+    r"""
+    PGExplainer (Parameterized Graph Explainer) a model interpretability technique
+    for Graph Neural Networks (GNNs) from the paper:
+    `Parameterized Explainer for Graph Neural Network <https://arxiv.org/abs/2004.11990>`_
+
+    PGExplainer trains a separate neural network (the explainer network) to
+    generate edge masks for explanations. It aims to provide instance-level
+    explanations by identifying important edges.
+    """
+
+    def __init__(self, model: nn.Module, explainer_net: nn.Module, name: str = "PGExplainer") -> None:
+        r"""
+        Args:
+            model (nn.Module): The GNN model that is being explained.
+                               It should take node features and edge_index as input,
+                               and can optionally accept `edge_weight`.
+            explainer_net (nn.Module): The explainer network that is trained to
+                                       generate edge masks. This network typically
+                                       takes graph structure information (e.g., node
+                                       features of connected nodes for each edge)
+                                       and outputs a scalar per edge.
+            name (str, optional): A human-readable name for the explainer.
+        """
+        self.explainer_net = explainer_net
+        super().__init__(model)
+        self.name = name
+
+    def _get_explainer_inputs(
+        self, inputs: Tensor, edge_index: Tensor, model_outputs: Optional[Tensor] = None
+    ) -> Tensor:
+        """
+        Prepares inputs for the explainer_net.
+        A common strategy is to concatenate features of source and target nodes
+        for each edge, and optionally, the GNN's output/embeddings for these nodes.
+
+        Args:
+            inputs (Tensor): Node features. Shape: (num_nodes, num_node_features)
+            edge_index (Tensor): Edge index. Shape: (2, num_edges)
+            model_outputs (Optional[Tensor]): Node embeddings or outputs from the main
+                                           GNN model. Shape: (num_nodes, D)
+
+        Returns:
+            Tensor: Input tensor for explainer_net.
+                    Shape: (num_edges, explainer_input_dim)
+        """
+        src_nodes, dest_nodes = edge_index[0], edge_index[1]
+        explainer_inputs = [inputs[src_nodes], inputs[dest_nodes]]
+
+        if model_outputs is not None:
+            explainer_inputs.append(model_outputs[src_nodes])
+            explainer_inputs.append(model_outputs[dest_nodes])
+
+        return torch.cat(explainer_inputs, dim=-1)
+
+
+    def _calculate_loss(
+        self,
+        edge_mask_logits: Tensor,
+        masked_pred: Tensor,
+        original_pred_for_loss: Tensor,
+        target_class_for_loss: Tensor,
+        coeffs: Dict[str, float],
+        temperature: float = 1.0,
+    ) -> Tensor:
+        """
+        Calculates the total loss for training the explainer_net.
+        """
+        # Apply sigmoid and temperature to get edge probabilities
+        edge_probs = torch.sigmoid(edge_mask_logits / temperature)
+
+        # 1. Prediction Cross-Entropy Loss (Fidelity)
+        # Assumes masked_pred and original_pred_for_loss are logits
+        ce_loss = F.cross_entropy(masked_pred, target_class_for_loss.expand(masked_pred.shape[0]))
+        loss = coeffs["prediction_ce"] * ce_loss
+
+        # 2. Edge Mask Sparsity Loss (L1 norm)
+        loss_edge_size = coeffs["edge_size"] * torch.mean(edge_probs) # Mean instead of sum for stability
+        loss += loss_edge_size
+
+        # 3. Edge Mask Entropy Loss (to encourage binary values)
+        # Prevents mask values from being stuck at 0.5
+        entropy = -edge_probs * torch.log2(edge_probs + 1e-12) - \
+                  (1 - edge_probs) * torch.log2(1 - edge_probs + 1e-12)
+        loss_edge_ent = coeffs["edge_ent"] * torch.mean(entropy)
+        loss += loss_edge_ent
+
+        return loss
+
+
+    @log_usage()
+    def attribute(
+        self,
+        inputs: Tensor,
+        edge_index: Tensor,
+        target_node_idx: Optional[Tensor] = None, # Node indices for which to explain
+        target_class: Optional[Tensor] = None, # Target class for each explanation
+        train_mode: bool = False,
+        epochs: int = 100,
+        lr: float = 0.01,
+        loss_coeffs: Optional[Dict[str, float]] = None,
+        temperature: float = 1.0, # Temperature for sigmoid in loss
+        model_kwargs: Optional[Dict[str, Any]] = None, # For main model
+        explainer_kwargs: Optional[Dict[str, Any]] = None, # For explainer_net
+    ) -> Tensor:
+        r"""
+        Trains the explainer network or generates an edge mask using a trained one.
+
+        Args:
+            inputs (Tensor): Node features. Shape: (num_nodes, num_node_features)
+            edge_index (Tensor): Edge index. Shape: (2, num_edges)
+            target_node_idx (Optional[Tensor]): Indices of target nodes for which
+                        explanations are required or for which loss is computed
+                        during training. If None, assumes graph-level explanation.
+                        Shape: (num_targets,)
+            target_class (Optional[Tensor]): Target class for each node/graph in
+                        target_node_idx. If None, it's inferred from the model's
+                        prediction. Shape: (num_targets,)
+            train_mode (bool): If True, trains `self.explainer_net`. Otherwise,
+                        uses the trained `explainer_net` for inference.
+                        Default: False
+            epochs (int): Number of epochs for training. Default: 100 (if train_mode)
+            lr (float): Learning rate for training. Default: 0.01 (if train_mode)
+            loss_coeffs (Optional[Dict[str, float]]): Coefficients for loss terms.
+                        Uses DEFAULT_PGEXPLAINER_COEFFS if None.
+            temperature (float): Temperature for scaling edge mask logits before
+                        sigmoid, affects sharpness of probabilities. Default: 1.0
+            model_kwargs (Optional[Dict[str, Any]]): Additional arguments for the main GNN model.
+            explainer_kwargs (Optional[Dict[str, Any]]): Additional arguments for the explainer network.
+
+        Returns:
+            Tensor: Edge mask (probabilities). Shape: (num_edges,)
+                    If train_mode is True, this is the mask from the last batch/epoch,
+                    primarily for inspection, as the main result is the trained explainer.
+        """
+        if model_kwargs is None:
+            model_kwargs = {}
+        if explainer_kwargs is None:
+            explainer_kwargs = {}
+        if loss_coeffs is None:
+            loss_coeffs = DEFAULT_PGEXPLAINER_COEFFS.copy()
+
+        # Use model's eval mode for generating embeddings if needed, and for original preds
+        self.model.eval()
+        # Get initial node embeddings/outputs from the main model (optional for explainer_net)
+        # This depends on explainer_net's architecture. For now, let's assume it might use them.
+        # h_nodes = self.model(inputs, edge_index, **model_kwargs) # Or specific layers
+        # if isinstance(h_nodes, tuple): h_nodes = h_nodes[0]
+        # For simplicity, _get_explainer_inputs will just use `inputs` for now.
+        # User can design explainer_net to take pre-computed embeddings via explainer_kwargs.
+
+        explainer_input_feats = self._get_explainer_inputs(inputs, edge_index) # No model_outputs for now
+
+        if train_mode:
+            self.explainer_net.train()
+            self.model.eval() # Keep main model in eval mode during explainer training
+
+            optimizer = optim.Adam(self.explainer_net.parameters(), lr=lr)
+
+            # Get original predictions (for calculating fidelity loss)
+            # These are the "ground truth" predictions we want the subgraph to match.
+            original_pred_logits = self.model(inputs, edge_index, **model_kwargs)
+            if isinstance(original_pred_logits, tuple):
+                original_pred_logits = original_pred_logits[0]
+
+            if target_node_idx is not None:
+                original_pred_for_loss = original_pred_logits[target_node_idx]
+            else: # Graph-level explanation
+                original_pred_for_loss = original_pred_logits.sum(dim=0, keepdim=True) # Example aggregation
+
+            if target_class is None:
+                _target_class_for_loss = torch.argmax(original_pred_for_loss, dim=-1)
+            else:
+                _target_class_for_loss = target_class
+
+            for epoch in range(epochs):
+                optimizer.zero_grad()
+
+                edge_mask_logits = self.explainer_net(explainer_input_feats, edge_index, **explainer_kwargs)
+                edge_mask_probs_for_model = torch.sigmoid(edge_mask_logits / temperature) # For model forward pass
+
+                # Masked prediction: pass edge_mask_probs_for_model as edge_weight
+                # This assumes self.model accepts 'edge_weight'
+                masked_pred_logits = self.model(inputs, edge_index,
+                                              edge_weight=edge_mask_probs_for_model.squeeze(-1),
+                                              **model_kwargs)
+                if isinstance(masked_pred_logits, tuple):
+                    masked_pred_logits = masked_pred_logits[0]
+
+                if target_node_idx is not None:
+                    masked_pred_for_loss = masked_pred_logits[target_node_idx]
+                else: # Graph-level
+                    masked_pred_for_loss = masked_pred_logits.sum(dim=0, keepdim=True)
+
+                loss = self._calculate_loss(
+                    edge_mask_logits,
+                    masked_pred_for_loss,
+                    original_pred_for_loss, # Should be from original graph
+                    _target_class_for_loss,
+                    loss_coeffs,
+                    temperature,
+                )
+                loss.backward()
+                optimizer.step()
+                # print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item()}") # Optional logging
+
+            # After training, return the latest edge mask from the trained explainer
+            self.explainer_net.eval()
+            edge_mask_logits = self.explainer_net(explainer_input_feats, edge_index, **explainer_kwargs)
+            final_edge_mask = torch.sigmoid(edge_mask_logits / temperature).detach()
+            return final_edge_mask.squeeze(-1) if final_edge_mask.ndim > 1 else final_edge_mask
+
+        else: # Inference mode
+            self.explainer_net.eval()
+            edge_mask_logits = self.explainer_net(explainer_input_feats, edge_index, **explainer_kwargs)
+            edge_mask_probs = torch.sigmoid(edge_mask_logits / temperature).detach()
+            return edge_mask_probs.squeeze(-1) if edge_mask_probs.ndim > 1 else edge_mask_probs
+
+
+    def __deepcopy__(self, memo) -> "PGExplainer":
+        r"""
+        Custom deepcopy implementation.
+        Ensures that the explainer_net and the model are correctly copied.
+        """
+        new_instance = self.__class__.__new__(self.__class__)
+        memo[id(self)] = new_instance
+
+        # Call Attribution's init or deepcopy its relevant parts if direct init is complex
+        # For now, let's assume Attribution's deepcopy handles its own state including self.model
+        # This is a bit of a simplification; proper handling might involve
+        # re-calling __init__ on new_instance or carefully copying from self.__dict__.
+        # super(PGExplainer, new_instance).__init__(copy.deepcopy(self.model, memo))
+        # The above super call is tricky with __new__.
+
+        # Let's try this: copy attributes, then deepcopy complex ones.
+        for k, v in self.__dict__.items():
+            setattr(new_instance, k, v)
+
+        # Deepcopy mutable or complex objects
+        new_instance.model = copy.deepcopy(self.model, memo)
+        new_instance.explainer_net = copy.deepcopy(self.explainer_net, memo)
+        # self.name is a string, typically immutable, shallow copy is fine.
+
+        # If Attribution class has its own __deepcopy__, it's better to call it.
+        # However, direct call to super().__deepcopy__(memo) might be cleaner if it exists
+        # and correctly initializes 'new_instance'.
+        # The provided structure for Attribution doesn't show a __deepcopy__,
+        # so we manage it here. If it does, the strategy would change.
+
+        # A common pattern for __deepcopy__ if superclass doesn't have a good one:
+        # cls = self.__class__
+        # result = cls.__new__(cls)
+        # memo[id(self)] = result
+        # for k, v in self.__dict__.items():
+        #     setattr(result, k, copy.deepcopy(v, memo))
+        # return result
+        # This pattern is more robust. Let's use this simplified version:
+
+        # Call super's deepcopy first IF it's well-defined and returns the new instance.
+        # Assuming Attribution is like nn.Module or has a similar __deepcopy__
+        # new_copy = super().__deepcopy__(memo) # This would handle self.model
+        # new_copy.explainer_net = copy.deepcopy(self.explainer_net, memo)
+        # new_copy.name = self.name
+        # return new_copy
+        # Since I don't know Attribution's __deepcopy__ internals, I'll stick to a safer explicit copy.
+
+        # Simplified and more standard approach:
+        cls = self.__class__
+        result = cls.__new__(cls) # Create new instance without calling __init__
+        memo[id(self)] = result
+
+        # Manually call __init__ or copy attributes
+        # Calling __init__ is cleaner if possible:
+        # result.__init__(model=copy.deepcopy(self.model, memo),
+        #                  explainer_net=copy.deepcopy(self.explainer_net, memo),
+        #                  name=self.name)
+        # This is often the best way.
+
+        # For now, sticking to the pattern from previous GNNExplainer for consistency.
+        # This assumes super().__deepcopy__ handles model and other parent attributes.
+        new_copy = super().__deepcopy__(memo) # type: ignore
+        new_copy.explainer_net = copy.deepcopy(self.explainer_net, memo)
+        new_copy.name = self.name
+        return new_copy
diff --git a/sphinx/source/gnn_explainer.rst b/sphinx/source/gnn_explainer.rst
new file mode 100644
index 0000000000..dadf611727
--- /dev/null
+++ b/sphinx/source/gnn_explainer.rst
@@ -0,0 +1,7 @@
+GNNExplainer
+============
+
+.. automodule:: captum.attr._gnn.gnn_explainer
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/sphinx/source/index.rst b/sphinx/source/index.rst
index 80f328d8a5..c771008fca 100644
--- a/sphinx/source/index.rst
+++ b/sphinx/source/index.rst
@@ -12,6 +12,8 @@ Captum API Reference
    :caption: API Reference
 
    attribution
+   gnn_explainer
+   pg_explainer
    llm_attr
    noise_tunnel
    layer
diff --git a/sphinx/source/pg_explainer.rst b/sphinx/source/pg_explainer.rst
new file mode 100644
index 0000000000..81a363130d
--- /dev/null
+++ b/sphinx/source/pg_explainer.rst
@@ -0,0 +1,7 @@
+PGExplainer
+===========
+
+.. automodule:: captum.attr._gnn.pg_explainer
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/tests/attr/_gnn/test_gnn_explainer.py b/tests/attr/_gnn/test_gnn_explainer.py
new file mode 100644
index 0000000000..1488bd7a03
--- /dev/null
+++ b/tests/attr/_gnn/test_gnn_explainer.py
@@ -0,0 +1,211 @@
+import unittest
+import warnings
+
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+from captum.attr._gnn.gnn_explainer import GNNExplainer, DEFAULT_COEFFS
+from tests.helpers.basic_models import BasicGNN, BasicGNN_MultiLayer
+
+
+class TestGNNExplainer(unittest.TestCase):
+    def _create_simple_graph(self, num_nodes=5, num_node_features=3, num_edges=6, device="cpu"):
+        inputs = torch.rand(num_nodes, num_node_features, device=device)
+        edge_index = torch.randint(0, num_nodes, (2, num_edges), device=device)
+        # Ensure no self-loops for simplicity in some GNN models, though GNNExplainer should handle it
+        edge_index = edge_index[:, edge_index[0] != edge_index[1]]
+        return inputs, edge_index
+
+    def test_gnn_explainer_init(self) -> None:
+        model = BasicGNN(3, 10, 2)
+        explainer = GNNExplainer(model)
+        self.assertIsNotNone(explainer)
+        self.assertIs(explainer.model, model)
+
+    def test_gnn_explainer_attribute_smoke(self) -> None:
+        num_nodes, num_features, num_edges = 5, 3, 6
+        inputs, edge_index = self._create_simple_graph(num_nodes, num_features, num_edges)
+        model = BasicGNN(num_features, 10, 2) # in_channels, hidden_channels, out_channels
+
+        explainer = GNNExplainer(model)
+        node_feat_mask, edge_mask = explainer.attribute(
+            inputs, edge_index, num_epochs=2, lr=0.1 # Short training for smoke test
+        )
+
+        self.assertEqual(node_feat_mask.shape, (num_features,))
+        self.assertEqual(edge_mask.shape, (edge_index.shape[1],))
+        self.assertTrue(torch.all(node_feat_mask >= 0) and torch.all(node_feat_mask <= 1))
+        self.assertTrue(torch.all(edge_mask >= 0) and torch.all(edge_mask <= 1))
+
+    def test_gnn_explainer_attribute_target_node(self) -> None:
+        num_nodes, num_features, num_edges = 5, 3, 6
+        inputs, edge_index = self._create_simple_graph(num_nodes, num_features, num_edges)
+        model = BasicGNN(num_features, 10, 2)
+        explainer = GNNExplainer(model)
+        target_node = 0
+
+        node_feat_mask, edge_mask = explainer.attribute(
+            inputs, edge_index, target_node=target_node, num_epochs=2
+        )
+        self.assertEqual(node_feat_mask.shape, (num_features,))
+        self.assertEqual(edge_mask.shape, (edge_index.shape[1],))
+
+    def test_gnn_explainer_attribute_target_class(self) -> None:
+        num_nodes, num_features, num_edges = 5, 3, 6
+        inputs, edge_index = self._create_simple_graph(num_nodes, num_features, num_edges)
+        model = BasicGNN(num_features, 10, 2) # 2 output classes
+        explainer = GNNExplainer(model)
+        target_class = 1
+
+        node_feat_mask, edge_mask = explainer.attribute(
+            inputs, edge_index, target_class=target_class, num_epochs=2
+        )
+        self.assertEqual(node_feat_mask.shape, (num_features,))
+        self.assertEqual(edge_mask.shape, (edge_index.shape[1],))
+
+    def test_gnn_explainer_graph_level_explanation(self) -> None:
+        num_nodes, num_features, num_edges = 5, 3, 6
+        inputs, edge_index = self._create_simple_graph(num_nodes, num_features, num_edges)
+        # Model output for graph-level could be sum of node embeddings then a linear layer
+        # BasicGNN outputs per node, GNNExplainer handles sum internally for graph explanation
+        model = BasicGNN(num_features, 10, 2)
+        explainer = GNNExplainer(model)
+
+        node_feat_mask, edge_mask = explainer.attribute(
+            inputs, edge_index, target_node=None, num_epochs=2 # Explicitly graph-level
+        )
+        self.assertEqual(node_feat_mask.shape, (num_features,))
+        self.assertEqual(edge_mask.shape, (edge_index.shape[1],))
+
+
+    def test_gnn_explainer_model_with_edge_weight(self) -> None:
+        num_nodes, num_features, num_edges = 4, 3, 5
+        inputs, edge_index = self._create_simple_graph(num_nodes, num_features, num_edges)
+
+        # Define a model that uses edge_weight
+        class ModelWithEdgeWeight(BasicGNN):
+            def forward(self, x, edge_index, edge_weight=None, **kwargs):
+                # Simplified: just pass it along if GNN layer supports it
+                # Or use it directly:
+                # for i in range(self.num_layers -1):
+                #     x = F.relu(self.convs[i](x, edge_index, edge_weight=edge_weight if i==0 else None))
+                # x = self.convs[-1](x, edge_index, edge_weight=edge_weight if self.num_layers-1 == 0 else None)
+                # For this test, the fact that it's accepted is key.
+                # BasicGNN's internal GCNConv doesn't directly use edge_weight in its base form.
+                # Let's make a simpler model for this test or adapt BasicGNN
+
+                # Let's use a model structure that explicitly can take edge_weight
+                # For now, we assume BasicGNN can be modified or we use a mock
+                # that has 'edge_weight' in its signature.
+                # The GNNExplainer checks signature, not if it's *used*.
+                if edge_weight is not None:
+                    x = x * edge_weight.mean() # Dummy use of edge_weight to show it could be used
+                return super().forward(x, edge_index, **kwargs)
+
+        model = ModelWithEdgeWeight(num_features, 5, 2)
+        explainer = GNNExplainer(model)
+
+        with warnings.catch_warnings(record=True) as caught_warnings:
+            warnings.simplefilter("always")
+            explainer.attribute(inputs, edge_index, num_epochs=1)
+            self.assertEqual(len(caught_warnings), 0, "Should not warn if model accepts edge_weight.")
+            # Check for specific warning text if possible/needed
+
+    def test_gnn_explainer_model_without_edge_weight(self) -> None:
+        num_nodes, num_features, num_edges = 4, 3, 5
+        inputs, edge_index = self._create_simple_graph(num_nodes, num_features, num_edges)
+
+        # BasicGNN by default does not have edge_weight in its direct forward signature
+        # (though its GCNConv layers might, GNNExplainer checks the model's forward)
+        # To be sure, let's define one that definitely doesn't.
+        class ModelWithoutEdgeWeight(nn.Module):
+            def __init__(self, in_channels, hidden_channels, out_channels):
+                super().__init__()
+                self.conv1 = nn.Linear(in_channels, hidden_channels) # Simplified
+                self.conv2 = nn.Linear(hidden_channels, out_channels)
+
+            def forward(self, x, edge_index_ignored, **kwargs): # No edge_weight
+                # Simplified forward, ignoring edge_index for this dummy model
+                x = F.relu(self.conv1(x.mean(dim=0, keepdim=True))) # Aggregate then process
+                x = self.conv2(x)
+                # Output needs to be per-node for GNNExplainer's default loss processing
+                # This dummy model isn't a real GNN, just for signature testing
+                return torch.randn(inputs.shape[0], self.conv2.out_features)
+
+
+        model = ModelWithoutEdgeWeight(num_features, 5, 2)
+        explainer = GNNExplainer(model)
+
+        with self.assertWarnsRegex(UserWarning, "does not explicitly accept 'edge_weight'"):
+            explainer.attribute(inputs, edge_index, num_epochs=1)
+            # Ensure the _warned_edge_weight_this_call flag is reset
+            delattr(explainer, "_warned_edge_weight_this_call")
+
+
+    def test_gnn_explainer_mask_properties(self) -> None:
+        num_nodes, num_features, num_edges = 6, 4, 8
+        inputs, edge_index = self._create_simple_graph(num_nodes, num_features, num_edges)
+        model = BasicGNN(num_features, 10, 3)
+        explainer = GNNExplainer(model)
+
+        node_feat_mask, edge_mask = explainer.attribute(inputs, edge_index, num_epochs=3)
+
+        self.assertEqual(node_feat_mask.shape, (num_features,))
+        self.assertTrue(torch.all(node_feat_mask >= 0.0) and torch.all(node_feat_mask <= 1.0),
+                        "Node feature mask values should be between 0 and 1.")
+
+        self.assertEqual(edge_mask.shape, (edge_index.shape[1],))
+        self.assertTrue(torch.all(edge_mask >= 0.0) and torch.all(edge_mask <= 1.0),
+                        "Edge mask values should be between 0 and 1.")
+
+    def test_gnn_explainer_deepcopy(self) -> None:
+        import copy
+        model = BasicGNN(3, 5, 2)
+        explainer = GNNExplainer(model)
+        explainer.some_custom_attribute = "test_value" # Add a dummy attribute
+
+        explainer_copy = copy.deepcopy(explainer)
+
+        self.assertIsNot(explainer, explainer_copy)
+        self.assertIsInstance(explainer_copy, GNNExplainer)
+        self.assertIsNot(explainer.model, explainer_copy.model, "Model should be deepcopied.")
+        # Check if model parameters are different objects but have same values initially
+        for p1, p2 in zip(explainer.model.parameters(), explainer_copy.model.parameters()):
+            self.assertIsNot(p1, p2)
+            self.assertTrue(torch.equal(p1.data, p2.data))
+
+        self.assertEqual(explainer_copy.some_custom_attribute, "test_value")
+        explainer_copy.some_custom_attribute = "new_value"
+        self.assertEqual(explainer.some_custom_attribute, "test_value") # Original should be unchanged
+
+        # Test functionality after deepcopy
+        inputs, edge_index = self._create_simple_graph()
+        try:
+            node_feat_mask, edge_mask = explainer_copy.attribute(inputs, edge_index, num_epochs=1)
+            self.assertIsNotNone(node_feat_mask)
+            self.assertIsNotNone(edge_mask)
+        except Exception as e:
+            self.fail(f"Attribute call failed after deepcopy: {e}")
+
+    def test_gnn_explainer_default_coeffs(self) -> None:
+        inputs, edge_index = self._create_simple_graph()
+        model = BasicGNN(inputs.shape[1], 5, 2)
+        explainer = GNNExplainer(model)
+        # This test indirectly checks if default coeffs are used without error
+        # A more direct test would involve checking the loss values, which is complex.
+        try:
+            explainer.attribute(inputs, edge_index, num_epochs=1, coeffs=None) # Explicitly use default
+        except Exception as e:
+            self.fail(f"Attribute call failed with default coeffs: {e}")
+
+        custom_coeffs = DEFAULT_COEFFS.copy()
+        custom_coeffs["edge_size"] = 0.5
+        try:
+            explainer.attribute(inputs, edge_index, num_epochs=1, coeffs=custom_coeffs)
+        except Exception as e:
+            self.fail(f"Attribute call failed with custom coeffs: {e}")
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tests/attr/_gnn/test_pg_explainer.py b/tests/attr/_gnn/test_pg_explainer.py
new file mode 100644
index 0000000000..cd3190ae96
--- /dev/null
+++ b/tests/attr/_gnn/test_pg_explainer.py
@@ -0,0 +1,195 @@
+import unittest
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import copy
+
+from captum.attr._gnn.pg_explainer import PGExplainer, DEFAULT_PGEXPLAINER_COEFFS
+from tests.helpers.basic_models import BasicGNN # Assuming this is available
+
+
+# A simple explainer network for testing PGExplainer
+class SimpleExplainerNet(nn.Module):
+    def __init__(self, input_dim, hidden_dim=16):
+        super().__init__()
+        # input_dim is typically 2 * node_feature_dim (+ optional 2 * gnn_embedding_dim)
+        self.fc1 = nn.Linear(input_dim, hidden_dim)
+        self.fc2 = nn.Linear(hidden_dim, 1) # Outputs a single logit per edge
+
+    def forward(self, x, edge_index, **kwargs): # edge_index might be used for graph context
+        x = F.relu(self.fc1(x))
+        return self.fc2(x)
+
+
+class TestPGExplainer(unittest.TestCase):
+    def _create_simple_graph(self, num_nodes=5, num_node_features=3, num_edges=6, device="cpu"):
+        inputs = torch.rand(num_nodes, num_node_features, device=device)
+        edge_index = torch.randint(0, num_nodes, (2, num_edges), device=device)
+        edge_index = edge_index[:, edge_index[0] != edge_index[1]] # Ensure no self-loops
+        # Ensure all nodes are part of at least one edge for some tests
+        if edge_index.shape[1] > 0:
+             for i in range(num_nodes):
+                if i not in edge_index: # if a node is isolated
+                    # add a dummy edge if possible, otherwise graph might be too small
+                    if edge_index.shape[1] < num_edges:
+                        if i != edge_index[0,0]:
+                            new_edge = torch.tensor([[i],[edge_index[0,0]]], device=device, dtype=torch.long)
+                        else: # if i is the only other node, connect to node 1 if possible
+                            new_edge = torch.tensor([[i],[(i+1)%num_nodes]], device=device, dtype=torch.long)
+                        edge_index = torch.cat([edge_index, new_edge], dim=1)
+
+        return inputs, edge_index.to(torch.long)
+
+
+    def setUp(self):
+        self.num_nodes, self.num_features, self.num_edges = 6, 4, 8
+        self.inputs, self.edge_index = self._create_simple_graph(
+            self.num_nodes, self.num_features, self.num_edges
+        )
+
+        self.gnn_model = BasicGNN(self.num_features, hidden_channels=5, out_channels=2)
+
+        # Determine explainer_input_dim based on _get_explainer_inputs
+        # Default: 2 * num_node_features
+        self.explainer_input_dim = 2 * self.num_features
+        self.explainer_net = SimpleExplainerNet(self.explainer_input_dim)
+
+        self.pg_explainer = PGExplainer(self.gnn_model, self.explainer_net)
+
+    def test_pgexplainer_init(self):
+        self.assertIsInstance(self.pg_explainer, PGExplainer)
+        self.assertIs(self.pg_explainer.model, self.gnn_model)
+        self.assertIs(self.pg_explainer.explainer_net, self.explainer_net)
+
+    def test_pgexplainer_get_explainer_inputs(self):
+        expl_inputs = self.pg_explainer._get_explainer_inputs(self.inputs, self.edge_index)
+        self.assertEqual(expl_inputs.shape, (self.edge_index.shape[1], self.explainer_input_dim))
+
+        # Test with model_outputs
+        model_outputs_mock = torch.rand(self.num_nodes, 5) # 5 is embedding dim
+        expl_inputs_with_emb = self.pg_explainer._get_explainer_inputs(
+            self.inputs, self.edge_index, model_outputs=model_outputs_mock
+        )
+        expected_dim_with_emb = self.explainer_input_dim + 2 * 5
+        self.assertEqual(expl_inputs_with_emb.shape, (self.edge_index.shape[1], expected_dim_with_emb))
+
+    def test_pgexplainer_calculate_loss_smoke(self):
+        edge_mask_logits = torch.randn(self.edge_index.shape[1], 1)
+        # Assume target_node_idx is None for graph-level explanation for simplicity in this smoke test
+        num_classes = self.gnn_model.convs[-1].out_features
+        masked_pred = torch.randn(1, num_classes) # graph-level prediction
+        original_pred = torch.randn(1, num_classes)
+        target_class = torch.randint(0, num_classes, (1,))
+
+        loss = self.pg_explainer._calculate_loss(
+            edge_mask_logits, masked_pred, original_pred, target_class, DEFAULT_PGEXPLAINER_COEFFS
+        )
+        self.assertIsInstance(loss, Tensor)
+        self.assertEqual(loss.ndim, 0) # scalar loss
+
+    def test_pgexplainer_attribute_inference_mode_smoke(self):
+        edge_mask = self.pg_explainer.attribute(self.inputs, self.edge_index, train_mode=False)
+        self.assertEqual(edge_mask.shape, (self.edge_index.shape[1],))
+        self.assertTrue(torch.all(edge_mask >= 0) and torch.all(edge_mask <= 1))
+
+    def test_pgexplainer_attribute_train_mode_smoke(self):
+        # Short training cycle
+        edge_mask = self.pg_explainer.attribute(
+            self.inputs, self.edge_index, train_mode=True, epochs=2, lr=0.01
+        )
+        self.assertEqual(edge_mask.shape, (self.edge_index.shape[1],))
+        self.assertTrue(torch.all(edge_mask >= 0) and torch.all(edge_mask <= 1))
+
+
+    def test_pgexplainer_training_updates_parameters(self):
+        initial_params = [p.clone() for p in self.explainer_net.parameters()]
+
+        self.pg_explainer.attribute(
+            self.inputs, self.edge_index, train_mode=True, epochs=3, lr=0.01,
+            # Provide target_node_idx for node-level loss calculation if model output is per node
+            target_node_idx=torch.tensor([0, 1])
+        )
+
+        updated_params = list(self.explainer_net.parameters())
+        self.assertGreater(len(initial_params), 0)
+        self.assertEqual(len(initial_params), len(updated_params))
+
+        params_changed = False
+        for p_init, p_updated in zip(initial_params, updated_params):
+            if not torch.equal(p_init, p_updated):
+                params_changed = True
+                break
+        self.assertTrue(params_changed, "Explainer network parameters should change after training.")
+
+
+    def test_pgexplainer_target_handling_train_mode(self):
+        # Test with target_node_idx and target_class
+        target_nodes = torch.tensor([0, 1, 2])
+        # Assuming model has 2 output classes
+        target_classes = torch.tensor([0, 1, 0])
+
+        try:
+            self.pg_explainer.attribute(
+                self.inputs, self.edge_index,
+                target_node_idx=target_nodes,
+                target_class=target_classes,
+                train_mode=True, epochs=1, lr=0.01
+            )
+        except Exception as e:
+            self.fail(f"Training with target_node_idx and target_class failed: {e}")
+
+    def test_pgexplainer_custom_loss_coeffs(self):
+        custom_coeffs = DEFAULT_PGEXPLAINER_COEFFS.copy()
+        custom_coeffs["edge_size"] = 0.5
+        custom_coeffs["prediction_ce"] = 0.1
+
+        try:
+            self.pg_explainer.attribute(
+                self.inputs, self.edge_index, train_mode=True, epochs=1,
+                loss_coeffs=custom_coeffs, target_node_idx=torch.tensor([0])
+            )
+        except Exception as e:
+            self.fail(f"Training with custom loss coefficients failed: {e}")
+
+    def test_pgexplainer_deepcopy(self):
+        explainer_copy = copy.deepcopy(self.pg_explainer)
+
+        self.assertIsNot(self.pg_explainer, explainer_copy)
+        self.assertIsInstance(explainer_copy, PGExplainer)
+
+        # Check model and explainer_net are new instances
+        self.assertIsNot(self.pg_explainer.model, explainer_copy.model)
+        self.assertIsNot(self.pg_explainer.explainer_net, explainer_copy.explainer_net)
+
+        # Check parameters are copied
+        for p_orig, p_copy in zip(self.pg_explainer.model.parameters(), explainer_copy.model.parameters()):
+            self.assertIsNot(p_orig, p_copy)
+            self.assertTrue(torch.equal(p_orig.data, p_copy.data))
+
+        for p_orig, p_copy in zip(self.pg_explainer.explainer_net.parameters(), explainer_copy.explainer_net.parameters()):
+            self.assertIsNot(p_orig, p_copy)
+            self.assertTrue(torch.equal(p_orig.data, p_copy.data))
+
+        # Test functionality after deepcopy (inference mode)
+        try:
+            edge_mask = explainer_copy.attribute(self.inputs, self.edge_index, train_mode=False)
+            self.assertIsNotNone(edge_mask)
+            self.assertEqual(edge_mask.shape, (self.edge_index.shape[1],))
+        except Exception as e:
+            self.fail(f"Attribute call (inference) failed after deepcopy: {e}")
+
+        # Test functionality after deepcopy (training mode)
+        try:
+            initial_params_copy = [p.clone() for p in explainer_copy.explainer_net.parameters()]
+            explainer_copy.attribute(self.inputs, self.edge_index, train_mode=True, epochs=1, lr=0.01, target_node_idx=torch.tensor([0]))
+            params_changed_copy = False
+            for p_init, p_updated in zip(initial_params_copy, explainer_copy.explainer_net.parameters()):
+                if not torch.equal(p_init, p_updated):
+                    params_changed_copy = True
+                    break
+            self.assertTrue(params_changed_copy, "Copied explainer network parameters should change after training.")
+        except Exception as e:
+            self.fail(f"Attribute call (training) failed after deepcopy: {e}")
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/tutorials/GNNExplainer_Tutorial.ipynb b/tutorials/GNNExplainer_Tutorial.ipynb
new file mode 100644
index 0000000000..d72a522a07
--- /dev/null
+++ b/tutorials/GNNExplainer_Tutorial.ipynb
@@ -0,0 +1,317 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# GNNExplainer Tutorial\n",
+    "\n",
+    "This tutorial demonstrates how to use `GNNExplainer` from the Captum library to explain predictions made by a Graph Neural Network (GNN). GNNExplainer identifies a compact subgraph structure and a small subset of node features that are most influential for a GNN's prediction.\n",
+    "\n",
+    "**Reference:** [GNNExplainer: Generating Explanations for Graph Neural Networks](https://arxiv.org/abs/1903.03894)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Setup\n",
+    "\n",
+    "First, let's install and import the necessary libraries. We'll need `torch`, `torch_geometric` for graph data and GNN layers, `captum` for GNNExplainer, and `networkx` / `matplotlib` for visualization."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -q torch torchvision torchaudio\n",
+    "!pip install -q torch_geometric\n",
+    "!pip install -q captum\n",
+    "!pip install -q networkx matplotlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn.functional as F\n",
+    "from torch_geometric.datasets import KarateClub\n",
+    "from torch_geometric.nn import GCNConv\n",
+    "from torch_geometric.utils import to_networkx\n",
+    "import networkx as nx\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# Assuming GNNExplainer is in this path relative to the notebook or installed in the env\n",
+    "# For a real Captum integration, it would be: from captum.attr import GNNExplainer\n",
+    "from captum.attr._gnn.gnn_explainer import GNNExplainer # Adjust if necessary\n",
+    "\n",
+    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Data Loading\n",
+    "\n",
+    "We'll use the classic Zachary's Karate Club dataset from `torch_geometric.datasets`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = KarateClub()\n",
+    "data = dataset[0].to(device)\n",
+    "print(f\"Dataset: {dataset.name}\")\n",
+    "print(f\"Number of nodes: {data.num_nodes}\")\n",
+    "print(f\"Number of edges: {data.num_edges}\")\n",
+    "print(f\"Number of features: {data.num_node_features}\")\n",
+    "print(f\"Number of classes: {dataset.num_classes}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. Model Definition\n",
+    "\n",
+    "Let's define a simple Graph Convolutional Network (GCN) model for node classification."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class GCN(torch.nn.Module):\n",
+    "    def __init__(self, in_channels, hidden_channels, out_channels):\n",
+    "        super(GCN, self).__init__()\n",
+    "        self.conv1 = GCNConv(in_channels, hidden_channels)\n",
+    "        self.conv2 = GCNConv(hidden_channels, out_channels)\n",
+    "\n",
+    "    def forward(self, x, edge_index, edge_weight=None):\n",
+    "        x = self.conv1(x, edge_index, edge_weight=edge_weight)\n",
+    "        x = F.relu(x)\n",
+    "        x = F.dropout(x, training=self.training)\n",
+    "        x = self.conv2(x, edge_index, edge_weight=edge_weight) # Pass edge_weight to second layer too if desired\n",
+    "        return F.log_softmax(x, dim=1)\n",
+    "\n",
+    "model = GCN(dataset.num_node_features, 16, dataset.num_classes).to(device)\n",
+    "print(model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Model Training\n",
+    "We need to train the model to get meaningful explanations. GNNExplainer works by finding important graph structures for the *current* model's predictions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)\n",
+    "\n",
+    "model.train()\n",
+    "for epoch in range(200):\n",
+    "    optimizer.zero_grad()\n",
+    "    out = model(data.x, data.edge_index)\n",
+    "    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])\n",
+    "    loss.backward()\n",
+    "    optimizer.step()\n",
+    "    if epoch % 20 == 0:\n",
+    "        print(f\"Epoch {epoch}, Loss: {loss.item():.4f}\")\n",
+    "\n",
+    "model.eval()\n",
+    "_, pred_labels = model(data.x, data.edge_index).max(dim=1)\n",
+    "correct_nodes = (pred_labels[data.test_mask] == data.y[data.test_mask]).sum()\n",
+    "accuracy = int(correct_nodes) / int(data.test_mask.sum())\n",
+    "print(f'Test Accuracy: {accuracy:.4f}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Attribution with GNNExplainer\n",
+    "\n",
+    "Now, let's use GNNExplainer to understand the prediction for a specific node."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Instantiate GNNExplainer\n",
+    "explainer = GNNExplainer(model)\n",
+    "\n",
+    "# Choose a target node to explain\n",
+    "target_node_idx = 0 \n",
+    "print(f\"Explaining node: {target_node_idx}\")\n",
+    "print(f\"Node true label: {data.y[target_node_idx].item()}\")\n",
+    "print(f\"Node predicted label: {pred_labels[target_node_idx].item()}\")\n",
+    "\n",
+    "# Get attributions\n",
+    "# GNNExplainer's attribute method might require target_class if not taking argmax internally\n",
+    "# For node classification, we usually explain the predicted class or the true class.\n",
+    "target_class = pred_labels[target_node_idx].item()\n",
+    "\n",
+    "node_feat_mask, edge_mask = explainer.attribute(\n",
+    "    inputs=data.x, \n",
+    "    edge_index=data.edge_index, \n",
+    "    target_node=target_node_idx,\n",
+    "    target_class=target_class, # Specify class if GNNExplainer needs it\n",
+    "    num_epochs=150, # Number of epochs to train the masks\n",
+    "    lr=0.01 # Learning rate for mask optimization\n",
+    ")\n",
+    "\n",
+    "print(\"\\nNode Feature Mask (first 5 features):\")\n",
+    "print(node_feat_mask[:5])\n",
+    "print(\"\\nEdge Mask (first 5 edges):\")\n",
+    "print(edge_mask[:5])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The `node_feat_mask` tells us the importance of each input feature for the specified node's prediction. The `edge_mask` indicates the importance of each edge."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. Visualization\n",
+    "\n",
+    "Let's visualize the explanation. We can highlight the important edges based on the `edge_mask`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_graph_with_masks(edge_index, edge_mask, target_node_idx, title, node_labels=None, threshold=0.5):\n",
+    "    num_nodes = edge_index.max().item() + 1\n",
+    "    g_nx = nx.Graph()\n",
+    "    g_nx.add_nodes_from(range(num_nodes))\n",
+    "    \n",
+    "    # Add edges with weights from edge_mask\n",
+    "    for i, (u, v) in enumerate(edge_index.t().tolist()):\n",
+    "        g_nx.add_edge(u, v, weight=edge_mask[i].item())\n",
+    "        \n",
+    "    pos = nx.spring_layout(g_nx, seed=42) # Kamada-Kawai for better structure sometimes\n",
+    "    \n",
+    "    plt.figure(figsize=(10, 8))\n",
+    "    \n",
+    "    # Draw nodes\n",
+    "    node_colors = ['lightblue'] * num_nodes\n",
+    "    node_colors[target_node_idx] = 'red' # Highlight target node\n",
+    "    nx.draw_networkx_nodes(g_nx, pos, node_color=node_colors, node_size=500)\n",
+    "    \n",
+    "    # Draw edges: highlight important ones\n",
+    "    edge_weights = [g_nx[u][v]['weight'] for u, v in g_nx.edges()]\n",
+    "    edge_alphas = [w if w > threshold else 0.1 for w in edge_weights] # Make less important edges more transparent\n",
+    "    edge_widths = [3*w if w > threshold else 0.5 for w in edge_weights]\n",
+    "\n",
+    "    nx.draw_networkx_edges(g_nx, pos, width=edge_widths, alpha=edge_alphas, edge_color='gray')\n",
+    "    \n",
+    "    # Draw labels\n",
+    "    if node_labels is not None:\n",
+    "        labels = {i: f\"{i}\\n(L:{node_labels[i].item()})\" for i in g_nx.nodes()}\n",
+    "    else:\n",
+    "        labels = {i: str(i) for i in g_nx.nodes()}\n",
+    "    nx.draw_networkx_labels(g_nx, pos, labels=labels, font_size=10)\n",
+    "    \n",
+    "    plt.title(title)\n",
+    "    plt.axis('off')\n",
+    "    plt.show()\n",
+    "\n",
+    "# Visualize the explanation\n",
+    "plot_graph_with_masks(data.edge_index, edge_mask, target_node_idx, \n",
+    "                        f'GNNExplainer Explanation for Node {target_node_idx} (Predicted: {pred_labels[target_node_idx].item()})',\n",
+    "                        node_labels=data.y, threshold=0.2) # Lower threshold for more visibility"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "In the plot above, the target node is highlighted (e.g., in red). Edges with higher importance scores from the `edge_mask` are shown as thicker and less transparent. This helps identify the computational subgraph that GNNExplainer deems important for the prediction of the target node."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Node Feature Importance\n",
+    "The `node_feat_mask` indicates which input features are important. For the Karate Club dataset, features are one-hot encoded node identities, so the feature mask might not be as directly interpretable as in other contexts. However, if features had semantic meaning (e.g., age, degree in a social network), this mask would highlight which of those contributed most."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(f\"Node Feature Mask for Node {target_node_idx}:\")\n",
+    "for i, val in enumerate(node_feat_mask):\n",
+    "    if val > 0.1: # Show features with some importance\n",
+    "        print(f\"  Feature {i}: {val.item():.4f}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. Conclusion\n",
+    "\n",
+    "This tutorial demonstrated the basic workflow of using `GNNExplainer` with Captum:\n",
+    "1. Training a GNN model.\n",
+    "2. Instantiating `GNNExplainer` with the trained model.\n",
+    "3. Calling the `attribute` method to get node feature and edge masks.\n",
+    "4. Visualizing these masks to interpret the model's prediction for a specific node.\n",
+    "\n",
+    "GNNExplainer helps in understanding which parts of the graph (edges) and which node features are crucial for the GNN's decision-making process, enhancing transparency and trust in GNN models."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tutorials/PGExplainer_Tutorial.ipynb b/tutorials/PGExplainer_Tutorial.ipynb
new file mode 100644
index 0000000000..2bec9e02a9
--- /dev/null
+++ b/tutorials/PGExplainer_Tutorial.ipynb
@@ -0,0 +1,338 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# PGExplainer Tutorial\n",
+    "\n",
+    "This tutorial demonstrates how to use `PGExplainer` from the Captum library to explain predictions made by a Graph Neural Network (GNN). PGExplainer trains a parameterized neural network (the explainer network) to generate edge masks that explain the GNN's predictions.\n",
+    "\n",
+    "**Reference:** [Parameterized Explainer for Graph Neural Network](https://arxiv.org/abs/2004.11990) (Note: The original prompt had a different link, this is a commonly cited one. Please verify against the intended reference in Captum's context.)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 1. Setup\n",
+    "\n",
+    "Install and import necessary libraries."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "!pip install -q torch torchvision torchaudio\n",
+    "!pip install -q torch_geometric\n",
+    "!pip install -q captum\n",
+    "!pip install -q networkx matplotlib"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import torch.nn.functional as F\n",
+    "from torch_geometric.datasets import KarateClub\n",
+    "from torch_geometric.nn import GCNConv\n",
+    "from torch_geometric.utils import to_networkx\n",
+    "import networkx as nx\n",
+    "import matplotlib.pyplot as plt\n",
+    "\n",
+    "# Assuming PGExplainer is in this path or installed\n",
+    "from captum.attr._gnn.pg_explainer import PGExplainer # Adjust if necessary\n",
+    "\n",
+    "device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 2. Data Loading\n",
+    "\n",
+    "We use Zachary's Karate Club dataset."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset = KarateClub()\n",
+    "data = dataset[0].to(device)\n",
+    "print(f\"Dataset: {dataset.name}\")\n",
+    "print(f\"Nodes: {data.num_nodes}, Edges: {data.num_edges}, Features: {data.num_node_features}, Classes: {dataset.num_classes}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 3. GNN Model Definition (Main Model to Explain)\n",
+    "\n",
+    "Define and train a simple GCN model. PGExplainer will explain this model's predictions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class GCN(torch.nn.Module):\n",
+    "    def __init__(self, in_channels, hidden_channels, out_channels):\n",
+    "        super(GCN, self).__init__()\n",
+    "        self.conv1 = GCNConv(in_channels, hidden_channels)\n",
+    "        self.conv2 = GCNConv(hidden_channels, out_channels)\n",
+    "\n",
+    "    def forward(self, x, edge_index, edge_weight=None):\n",
+    "        # PGExplainer will pass learned edge_weights here\n",
+    "        x = self.conv1(x, edge_index, edge_weight=edge_weight)\n",
+    "        x = F.relu(x)\n",
+    "        x = F.dropout(x, training=self.training)\n",
+    "        x = self.conv2(x, edge_index, edge_weight=edge_weight) # Also pass to conv2\n",
+    "        return F.log_softmax(x, dim=1)\n",
+    "\n",
+    "gnn_model = GCN(dataset.num_node_features, 16, dataset.num_classes).to(device)\n",
+    "\n",
+    "# Train the GNN model\n",
+    "optimizer_gnn = torch.optim.Adam(gnn_model.parameters(), lr=0.01, weight_decay=5e-4)\n",
+    "gnn_model.train()\n",
+    "for epoch in range(200):\n",
+    "    optimizer_gnn.zero_grad()\n",
+    "    out = gnn_model(data.x, data.edge_index)\n",
+    "    loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])\n",
+    "    loss.backward()\n",
+    "    optimizer_gnn.step()\n",
+    "    if epoch % 20 == 0:\n",
+    "        print(f\"GNN Training Epoch {epoch}, Loss: {loss.item():.4f}\")\n",
+    "\n",
+    "gnn_model.eval()\n",
+    "_, pred_labels = gnn_model(data.x, data.edge_index).max(dim=1)\n",
+    "correct_nodes = (pred_labels[data.test_mask] == data.y[data.test_mask]).sum()\n",
+    "accuracy = int(correct_nodes) / int(data.test_mask.sum())\n",
+    "print(f'GNN Test Accuracy: {accuracy:.4f}')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 4. Explainer Network Definition\n",
+    "\n",
+    "Define the network that PGExplainer will train to generate edge masks. It typically takes concatenated features of an edge's source and target nodes."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class PGExplainerNet(nn.Module):\n",
+    "    def __init__(self, input_dim, hidden_dim=32, output_dim=1):\n",
+    "        super().__init__()\n",
+    "        # input_dim = 2 * node_feature_dim (from PGExplainer._get_explainer_inputs)\n",
+    "        self.fc1 = nn.Linear(input_dim, hidden_dim)\n",
+    "        self.fc2 = nn.Linear(hidden_dim, output_dim)\n",
+    "\n",
+    "    def forward(self, x, edge_index, **kwargs): # edge_index might be used for context\n",
+    "        x = F.relu(self.fc1(x))\n",
+    "        x = self.fc2(x) # Raw logits for the edge mask\n",
+    "        return x\n",
+    "\n",
+    "# The input dimension for the explainer network is 2 * num_node_features\n",
+    "# because PGExplainer._get_explainer_inputs concatenates features of src and dest nodes.\n",
+    "explainer_input_dim = 2 * data.num_node_features\n",
+    "explainer_net = PGExplainerNet(explainer_input_dim).to(device)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 5. PGExplainer Initialization\n",
+    "\n",
+    "Instantiate PGExplainer with the trained GNN and the explainer network."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "pg_explainer = PGExplainer(gnn_model, explainer_net)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 6. Training the PGExplainer (Explainer Network)\n",
+    "\n",
+    "Call `attribute` in `train_mode=True` to train the `explainer_net`."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Starting PGExplainer (explainer_net) training...\")\n",
+    "# For training, we might focus on explaining all nodes or a subset.\n",
+    "# PGExplainer's loss internally handles predictions for nodes/graphs.\n",
+    "# If target_node_idx is not provided, it might assume graph-level or use all nodes.\n",
+    "# Let's train it to explain predictions for all nodes in the training mask.\n",
+    "\n",
+    "pg_explainer.attribute(\n",
+    "    inputs=data.x,\n",
+    "    edge_index=data.edge_index,\n",
+    "    target_node_idx=data.train_mask.nonzero().squeeze(), # Explain training nodes\n",
+    "    # target_class can be omitted to use GNN's predictions for loss\n",
+    "    train_mode=True,\n",
+    "    epochs=50, # Number of epochs to train the explainer_net\n",
+    "    lr=0.005,\n",
+    "    # loss_coeffs can be specified, otherwise defaults are used\n",
+    ")\n",
+    "print(\"PGExplainer training finished.\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 7. Generating Explanations (Inference)\n",
+    "\n",
+    "After training `explainer_net`, use `attribute` in `train_mode=False` to get edge masks."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"Generating edge mask using trained PGExplainer...\")\n",
+    "edge_mask = pg_explainer.attribute(\n",
+    "    inputs=data.x,\n",
+    "    edge_index=data.edge_index,\n",
+    "    train_mode=False\n",
+    ")\n",
+    "\n",
+    "print(f\"Generated edge mask shape: {edge_mask.shape}\")\n",
+    "print(\"Edge Mask (first 10 edges):\")\n",
+    "print(edge_mask[:10])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 8. Visualization\n",
+    "\n",
+    "Visualize the explanation by highlighting important edges."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def plot_pg_explanation(edge_index, edge_mask, title, node_labels=None, threshold=0.7):\n",
+    "    num_nodes = edge_index.max().item() + 1\n",
+    "    g_nx = nx.Graph()\n",
+    "    g_nx.add_nodes_from(range(num_nodes))\n",
+    "    \n",
+    "    for i, (u, v) in enumerate(edge_index.t().tolist()):\n",
+    "        g_nx.add_edge(u, v, weight=edge_mask[i].item())\n",
+    "        \n",
+    "    pos = nx.spring_layout(g_nx, seed=42)\n",
+    "    \n",
+    "    plt.figure(figsize=(10, 8))\n",
+    "    \n",
+    "    node_color = ['lightblue'] * num_nodes\n",
+    "    # Example: if you want to highlight a specific node for context, e.g., node 0\n",
+    "    # node_color[0] = 'pink' \n",
+    "    nx.draw_networkx_nodes(g_nx, pos, node_color=node_color, node_size=500)\n",
+    "    \n",
+    "    edge_weights = [g_nx[u][v]['weight'] for u, v in g_nx.edges()]\n",
+    "    edge_alphas = [w if w > threshold else 0.1 for w in edge_weights]\n",
+    "    edge_widths = [3*w if w > threshold else 0.5 for w in edge_weights]\n",
+    "\n",
+    "    nx.draw_networkx_edges(g_nx, pos, width=edge_widths, alpha=edge_alphas, edge_color='gray')\n",
+    "    \n",
+    "    if node_labels is not None:\n",
+    "        labels = {i: f\"{i}\\n(L:{node_labels[i].item()})\" for i in g_nx.nodes()}\n",
+    "    else:\n",
+    "        labels = {i: str(i) for i in g_nx.nodes()}\n",
+    "    nx.draw_networkx_labels(g_nx, pos, labels=labels, font_size=10)\n",
+    "    \n",
+    "    plt.title(title)\n",
+    "    plt.axis('off')\n",
+    "    plt.show()\n",
+    "\n",
+    "plot_pg_explanation(data.edge_index, edge_mask, \n",
+    "                      f'PGExplainer - Edge Importance Mask (Thresholded)',\n",
+    "                      node_labels=data.y, threshold=0.5) # Adjust threshold for visualization"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The plot shows the graph with edges weighted by their importance according to the trained PGExplainer. Thicker/more opaque edges are considered more important by the explainer for the GNN's predictions (in general, as PGExplainer is trained over multiple instances/predictions)."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 9. Conclusion\n",
+    "\n",
+    "This tutorial demonstrated the workflow for `PGExplainer`:\n",
+    "1. Training a GNN model that you want to explain.\n",
+    "2. Defining an `explainer_net` (a small neural network).\n",
+    "3. Instantiating `PGExplainer` with both the GNN and the `explainer_net`.\n",
+    "4. Training the `explainer_net` by calling `attribute` in `train_mode=True`.\n",
+    "5. Generating global edge importance masks using the trained `explainer_net` by calling `attribute` in `train_mode=False`.\n",
+    "6. Visualizing the resulting edge mask.\n",
+    "\n",
+    "PGExplainer provides a powerful way to get instance-agnostic (or amortized instance-level, depending on training) explanations by learning a dedicated explanation generation model."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.7" # Example version
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}