diff --git a/nemo_automodel/components/config/loader.py b/nemo_automodel/components/config/loader.py
index 01c4196d0..a3b4cb28b 100644
--- a/nemo_automodel/components/config/loader.py
+++ b/nemo_automodel/components/config/loader.py
@@ -246,11 +246,13 @@ def instantiate(self, *args, **kwargs):
                 "Instantiation failed for `{}`\n"
                 "Accepted signature : {}\n"
                 "Positional args    : {}\n"
-                "Keyword args       : {}\n".format(
+                "Keyword args       : {}\n"
+                "Exception          : {}\n".format(
                     func.__name__,
                     sig,
                     args,
                     pprint.pformat(config_kwargs, compact=True, indent=4),
+                    e,
                 ),
                 file=sys.stderr,
             )
diff --git a/nemo_automodel/components/datasets/llm/chat_dataset.py b/nemo_automodel/components/datasets/llm/chat_dataset.py
index 84ff8e12c..51f36e8ae 100644
--- a/nemo_automodel/components/datasets/llm/chat_dataset.py
+++ b/nemo_automodel/components/datasets/llm/chat_dataset.py
@@ -133,6 +133,8 @@ def __init__(
         split: Optional[str] = None,
         name: Optional[str] = None,
         seq_length: Optional[int] = None,
+        padding: Union[str, bool] = "do_not_pad",
+        truncation: Union[str, bool] = "do_not_truncate",
         start_of_turn_token: Optional[str] = None,
         chat_template: Optional[str] = None,
     ) -> None:
@@ -149,6 +151,8 @@ def __init__(
 
         self.tokenizer = tokenizer
         self.seq_length = seq_length
+        self.padding = padding
+        self.truncation = truncation
         self.start_of_turn_token = start_of_turn_token
 
         self.dataset = _load_openai_messages(path_or_dataset_id, split=split, name=name)
@@ -178,6 +182,8 @@ def __getitem__(self, idx: int) -> Dict[str, List[int]]:
             eos_token_id,
             self.pad_token_id,
             seq_length=self.seq_length,
+            padding=self.padding,
+            truncation=self.truncation,
             tools=tools,
         )
         return sample
diff --git a/nemo_automodel/components/datasets/llm/column_mapped_text_instruction_dataset.py b/nemo_automodel/components/datasets/llm/column_mapped_text_instruction_dataset.py
index c83100550..8a9d82dbb 100644
--- a/nemo_automodel/components/datasets/llm/column_mapped_text_instruction_dataset.py
+++ b/nemo_automodel/components/datasets/llm/column_mapped_text_instruction_dataset.py
@@ -28,6 +28,8 @@
     format_prompt_completion,
 )
 
+logger = logging.getLogger(__name__)
+
 # Supported cases:
 # Format:
 # - Context + question + answer
@@ -165,6 +167,8 @@ def __init__(
         name: Optional[str] = None,
         answer_only_loss_mask: bool = True,
         seq_length: Optional[int] = None,
+        padding: Union[str, bool] = "do_not_pad",
+        truncation: Union[str, bool] = "do_not_truncate",
         start_of_turn_token: Optional[str] = None,
         limit_dataset_samples: Optional[int] = None,
     ) -> None:
@@ -193,6 +197,12 @@ def __init__(
 
         assert tokenizer is not None, "Tokenizer is required"
         self.tokenizer = tokenizer
+        if getattr(self.tokenizer, "pad_token", None) is None:
+            if hasattr(self.tokenizer, "eos_token"):
+                self.tokenizer.pad_token = self.tokenizer
+            else:
+                logger.warning("Setting tokenizer pad_token to ' '. tokenizer does not have `eos_token`.")
+                self.tokenizer.pad_token = " "
 
         self.dataset = _load_dataset(path_or_dataset_id, split=split, streaming=False, name=name)
 
@@ -226,6 +236,8 @@ def __init__(
         self.answer_only_loss_mask = answer_only_loss_mask
         self.start_of_turn_token = start_of_turn_token
         self.seq_length = seq_length
+        self.padding = padding
+        self.truncation = truncation
 
     def __len__(self) -> int:  # noqa: D401
         """
@@ -255,6 +267,8 @@ def __getitem__(self, idx):  # noqa: D401
         row = self.dataset[idx]
         mapped = {dest: row[src] for dest, src in self.column_mapping.items() if src in row}
         mapped = self._apply_tokenizer(mapped)
+        if not any(label != -100 for label in mapped["labels"]):
+            return self.__getitem__((idx + 1) % len(self.dataset))
         assert _check_all_values_equal_length(mapped), "All values must be of the same length"
         return mapped
 
@@ -293,6 +307,8 @@ def _apply_tokenizer(self, sample: Dict[str, str]) -> Dict[str, List[int]]:
                 eos_token_id,
                 pad_token_id,
                 seq_length=self.seq_length,
+                padding=self.padding,
+                truncation=self.truncation,
             )
         else:
             prompt = " ".join(filter(lambda x: x is not None, (context, question, "")))
@@ -304,5 +320,7 @@ def _apply_tokenizer(self, sample: Dict[str, str]) -> Dict[str, List[int]]:
                 eos_token_id,
                 pad_token_id,
                 seq_length=self.seq_length,
+                padding=self.padding,
+                truncation=self.truncation,
                 answer_only_loss_mask=self.answer_only_loss_mask,
             )
diff --git a/nemo_automodel/components/datasets/llm/formatting_utils.py b/nemo_automodel/components/datasets/llm/formatting_utils.py
index a26f21906..dc5c66540 100644
--- a/nemo_automodel/components/datasets/llm/formatting_utils.py
+++ b/nemo_automodel/components/datasets/llm/formatting_utils.py
@@ -14,7 +14,7 @@
 
 import logging
 import re
-from typing import TYPE_CHECKING, Dict, List, Optional
+from typing import TYPE_CHECKING, Dict, List, Optional, Union
 
 logger = logging.getLogger(__name__)
 
@@ -66,6 +66,7 @@ def _package_tokenized_example(
     eos_token_id,
     pad_token_id,
     seq_length,
+    truncation=None,
 ):
     """
     Package a tokenized example with proper masking and padding.
@@ -77,7 +78,7 @@ def _package_tokenized_example(
         eos_token_id: The end-of-sequence token id.
         pad_token_id: The padding token id.
         seq_length: Optional sequence length for padding.
-
+        truncation: Optional truncation strategy.
     Returns:
         A dictionary with input_ids, labels, and attention_mask.
     """
@@ -86,6 +87,8 @@ def _package_tokenized_example(
     if not _has_chat_template(tokenizer) and eos_token_id != input_ids[-1]:
         input_ids += [eos_token_id]
         assistant_masks += [1]
+    if not _has_chat_template(tokenizer) and pad_token_id is not None:
+        assistant_masks += [pad_token_id]
 
     labels = input_ids.copy()
     input_ids = input_ids[:-1]
@@ -95,7 +98,7 @@ def _package_tokenized_example(
     labels[:] = [label if bool(m) else -100 for label, m in zip(labels, assistant_masks)]
     # remove BOS
     labels = labels[1:]
-    if not _has_chat_template(tokenizer):
+    if not _has_chat_template(tokenizer) and truncation is None:
         assert labels[-1] == eos_token_id, f"labels[-1]={labels[-1]} != eos_token_id={eos_token_id}"
         assert input_ids[-1] != eos_token_id, f"input_ids[-1]={input_ids[-1]} == eos_token_id={eos_token_id}"
     assert len(input_ids) == len(labels), f"len(input_ids)={len(input_ids)} != len(labels)={len(labels)}"
@@ -125,6 +128,8 @@ def format_prompt_completion(
     eos_token_id: int,
     pad_token_id: int,
     seq_length: Optional[int] = None,
+    padding: Union[str, bool] = "do_not_pad",
+    truncation: Union[str, bool] = "do_not_truncate",
     answer_only_loss_mask: bool = True,
 ) -> Dict[str, List[int]]:
     """
@@ -150,7 +155,7 @@ def format_prompt_completion(
     else:
         len_prompt_ids = 0
     # Tokenize full text
-    input_ids = tokenizer(full_text)["input_ids"]
+    input_ids = tokenizer(full_text, padding=padding, truncation=truncation, max_length=seq_length)["input_ids"]
 
     # Create assistant_masks: 0 for prompt tokens, 1 for answer tokens
     assistant_masks = [0] * len_prompt_ids + [1] * (len(input_ids) - len_prompt_ids)
@@ -162,6 +167,7 @@ def format_prompt_completion(
         eos_token_id=eos_token_id,
         pad_token_id=pad_token_id,
         seq_length=seq_length,
+        truncation=truncation,
     )
 
 
@@ -171,6 +177,8 @@ def format_chat_template(
     eos_token_id: int,
     pad_token_id: int,
     seq_length: Optional[int] = None,
+    padding: Union[str, bool] = "do_not_pad",
+    truncation: Union[str, bool] = "do_not_truncate",
     tools: Optional[List[Dict]] = None,
 ) -> Dict[str, List[int]]:
     """
@@ -199,6 +207,9 @@ def format_chat_template(
         tokenize=True,
         return_dict=True,
         return_assistant_tokens_mask=template_has_generation_kwd,
+        padding=padding,
+        truncation=truncation,
+        max_length=seq_length,
     )
 
     # Choose the last conversation as answer other history are context by finding the last masked token
diff --git a/tests/functional_tests/datasets/llm/test_column_mapped_text_instruction_dataset.py b/tests/functional_tests/datasets/llm/test_column_mapped_text_instruction_dataset.py
new file mode 100644
index 000000000..44853785e
--- /dev/null
+++ b/tests/functional_tests/datasets/llm/test_column_mapped_text_instruction_dataset.py
@@ -0,0 +1,217 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import json
+import os
+from pathlib import Path
+
+import pytest
+from transformers import AutoTokenizer
+
+from nemo_automodel.components.datasets.llm.column_mapped_text_instruction_dataset import (
+    ColumnMappedTextInstructionDataset,
+)
+
+
+def _write_jsonl(tmp_path: Path) -> Path:
+    """Create a small JSONL dataset for testing."""
+    rows = [
+        {
+            "context": "Architecturally, the school has a Catholic character. Atop the Main Building's gold dome is a golden statue of the Virgin Mary.",
+            "question": "To whom did the Virgin Mary allegedly appear in 1858 in Lourdes France?",
+            "answers": "Saint Bernadette Soubirous",
+        },
+        {
+            "context": "Immediately in front of the Main Building and facing it, is a copper statue of Christ with arms upraised.",
+            "question": "What is in front of the Notre Dame Main Building?",
+            "answers": "a copper statue of Christ",
+        },
+        {
+            "context": "Next to the Main Building is the Basilica of the Sacred Heart.",
+            "question": "The Basilica of the Sacred heart at Notre Dame is beside to which structure?",
+            "answers": "the Main Building",
+        },
+        {
+            "context": "Immediately behind the basilica is the Grotto, a Marian place of prayer and reflection.",
+            "question": "What is the Grotto at Notre Dame?",
+            "answers": "a Marian place of prayer and reflection",
+        },
+        {
+            "context": "Atop the Main Building's gold dome is a golden statue of the Virgin Mary.",
+            "question": "What sits on top of the Main Building at Notre Dame?",
+            "answers": "a golden statue of the Virgin Mary",
+        },
+    ]
+    p = tmp_path / "sample.jsonl"
+    with p.open("w") as f:
+        for r in rows:
+            f.write(json.dumps(r) + "\n")
+    return p
+
+
+def _maybe_tokenizer_dir_candidates() -> list[Path]:
+    """Return likely tokenizer directories present in CI test data mounts."""
+    candidates: list[Path] = []
+    # Known bundle with no chat template used elsewhere in the repo
+    test_data_dir = os.environ.get("TEST_DATA_DIR")
+    if test_data_dir:
+        candidates.append(Path(test_data_dir) / "hf_mixtral_2l")
+    # Explicit tokenizers used by existing unit tests
+    base = Path("/home/TestData/akoumparouli/tokenizers/")
+    names = [
+        "gpt-oss-20b",
+        "llama_3.2_1b",
+        "qwen3_30b_a3b_instruct_2507",
+    ]
+    for n in names:
+        candidates.append(base / n)
+    return [p for p in candidates if p.exists()]
+
+
+def _load_tokenizer(path: Path):
+    os.environ.setdefault("TRANSFORMERS_OFFLINE", "1")
+    os.environ.setdefault("HF_HUB_OFFLINE", "1")
+    return AutoTokenizer.from_pretrained(str(path))
+
+
+def _first_sample(ds: ColumnMappedTextInstructionDataset):
+    it = iter(ds)
+    return next(it)
+
+
+@pytest.mark.parametrize(
+    "seq_length,padding,truncation",
+    [
+        (None, "do_not_pad", None),
+        (16, "max_length", True),
+        (16, "do_not_pad", True),
+        (16, True, None),  # padding=True -> longest; with single example behaves like no-op pre-packaging
+    ],
+)
+def test_dataset_non_chat_padding_truncation_options(tmp_path: Path, seq_length, padding, truncation):
+    """Validate shapes and masking for non-chat tokenizers across padding/truncation options."""
+    data_file = _write_jsonl(tmp_path)
+
+    # Find a tokenizer without chat template
+    for d in _maybe_tokenizer_dir_candidates():
+        tok = _load_tokenizer(d)
+        if getattr(tok, "chat_template", None) is None:
+            break
+    else:
+        pytest.skip("No non-chat tokenizer available in test data mounts")
+
+    column_mapping = {"context": "context", "question": "question", "answer": "answers"}
+
+    ds = ColumnMappedTextInstructionDataset(
+        path_or_dataset_id=str(data_file),
+        column_mapping=column_mapping,
+        tokenizer=tok,
+        seq_length=seq_length,
+        padding=padding,
+        truncation=truncation,
+        # answer_only_loss_mask default True
+    )
+
+    sample = _first_sample(ds)
+    assert set(["input_ids", "labels", "attention_mask"]).issubset(sample.keys())
+    assert len(sample["input_ids"]) == len(sample["labels"]) == len(sample["attention_mask"]) > 0
+
+    if isinstance(seq_length, int):
+        if truncation is True:
+            assert len(sample["input_ids"]) == seq_length
+            assert len(sample["labels"]) == seq_length
+            # Trailing padding in labels must be masked
+            assert sample["labels"][-1] == -100
+            assert sample["attention_mask"][-1] in (0, 1)  # depending on pack length, end can be 0
+        elif not truncation is True:
+            assert len(sample["input_ids"]) != seq_length
+            assert len(sample["labels"]) != seq_length
+
+@pytest.mark.parametrize(
+    "seq_length,padding,truncation",
+    [
+        (None, "do_not_pad", None),
+        (128, "max_length", True),
+        (16, "do_not_pad", True),
+        (16, True, None),
+    ],
+)
+def test_dataset_chat_padding_truncation_options(tmp_path: Path, seq_length, padding, truncation):
+    """Validate shapes and masking for chat-template tokenizers across padding/truncation options."""
+    data_file = _write_jsonl(tmp_path)
+
+    # Find a tokenizer with chat template
+    chat_tok = None
+    for d in _maybe_tokenizer_dir_candidates():
+        tok = _load_tokenizer(d)
+        if getattr(tok, "chat_template", None) is not None and callable(getattr(tok, "apply_chat_template", None)):
+            chat_tok = tok
+            break
+    if chat_tok is None:
+        pytest.skip("No chat-template tokenizer available in test data mounts")
+
+    # 3-column mapping
+    column_mapping = {"context": "context", "question": "question", "answer": "answers"}
+
+    ds = ColumnMappedTextInstructionDataset(
+        path_or_dataset_id=str(data_file),
+        column_mapping=column_mapping,
+        tokenizer=chat_tok,
+        seq_length=seq_length,
+        padding=padding,
+        truncation=truncation,
+        start_of_turn_token="<|assistant|>",  # required when answer_only_loss_mask=True and chat template present
+    )
+
+    sample = _first_sample(ds)
+    assert set(["input_ids", "labels", "attention_mask"]).issubset(sample.keys())
+    assert len(sample["input_ids"]) == len(sample["labels"]) == len(sample["attention_mask"]) > 0
+
+    if isinstance(seq_length, int):
+        if truncation is True or padding == "max_length":
+            assert len(sample["input_ids"]) == seq_length
+            assert len(sample["labels"]) == seq_length
+        elif not truncation is True:
+            assert sample["labels"][-1] != -100
+
+
+def test_dataset_two_column_mapping_non_chat(tmp_path: Path):
+    """Ensure 2-column mapping (context+answer) works with non-chat tokenizer."""
+    data_file = _write_jsonl(tmp_path)
+
+    # Choose a non-chat tokenizer
+    for d in _maybe_tokenizer_dir_candidates():
+        tok = _load_tokenizer(d)
+        if getattr(tok, "chat_template", None) is None:
+            break
+    else:
+        pytest.skip("No non-chat tokenizer available in test data mounts")
+
+    # Use only context and answers columns
+    column_mapping = {"context": "context", "answer": "answers"}
+
+    ds = ColumnMappedTextInstructionDataset(
+        path_or_dataset_id=str(data_file),
+        column_mapping=column_mapping,
+        tokenizer=tok,
+        seq_length=32,
+        padding="max_length",
+        truncation=True,
+    )
+
+    sample = _first_sample(ds)
+    assert len(sample["input_ids"]) == 32
+    assert len(sample["labels"]) == 32
+    assert len(sample["attention_mask"]) == 32
+
diff --git a/tests/functional_tests/hf_transformer/test_formatting_utils_options.py b/tests/functional_tests/hf_transformer/test_formatting_utils_options.py
new file mode 100644
index 000000000..f9229b150
--- /dev/null
+++ b/tests/functional_tests/hf_transformer/test_formatting_utils_options.py
@@ -0,0 +1,183 @@
+# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Iterable, List, Tuple
+
+import pytest
+from transformers import AutoTokenizer
+
+from nemo_automodel.components.datasets.llm.formatting_utils import (
+    _add_pad_token,
+    format_chat_template,
+    format_prompt_completion,
+)
+
+
+@pytest.mark.parametrize(
+    "seq_length,padding,truncation",
+    [
+        (None, "do_not_pad", None),
+        (4, "max_length", True),
+    ],
+)
+def test_format_prompt_completion_options(seq_length, padding, truncation):
+    os.environ["TRANSFORMERS_OFFLINE"] = "1"
+    os.environ["HF_HUB_OFFLINE"] = "1"
+    TOKENIZER_DIR = f"{os.environ['TEST_DATA_DIR']}/hf_mixtral_2l"
+    assert os.path.exists(TOKENIZER_DIR), "Tokenizer directory does not exist"
+    tok = AutoTokenizer.from_pretrained(TOKENIZER_DIR)
+    # Only applicable when tokenizer lacks chat template
+    assert getattr(tok, "chat_template", None) is None
+
+    eos_token_id = getattr(tok, "eos_token_id", 0)
+    pad_token_id = _add_pad_token(tok) or eos_token_id
+    if padding != "do_not_pad":
+        tok.pad_token = tok.eos_token
+
+    # If using padding="max_length", seq_length must be an int
+    if padding == "max_length" and not isinstance(seq_length, int):
+        pytest.skip("padding='max_length' requires seq_length to be set.")
+
+    context = "France is a country in Europe."
+    question = "What is the capital of France?"
+    answer = "Paris."
+    prompt = f"{context} {question} "
+
+    out = format_prompt_completion(
+        tokenizer=tok,
+        prompt=prompt,
+        answer=answer,
+        eos_token_id=eos_token_id,
+        pad_token_id=pad_token_id,
+        seq_length=seq_length,
+        padding=padding,
+        truncation=truncation,
+        answer_only_loss_mask=True,
+    )
+
+    # Basic structure
+    assert set(["input_ids", "labels", "attention_mask"]).issubset(out.keys())
+    assert len(out["input_ids"]) == len(out["labels"]) == len(out["attention_mask"]) > 0
+
+    # seq_length enforcement (either by HF padding or our packager)
+    if isinstance(seq_length, int) and padding != "do_not_pad":
+        assert len(out["input_ids"]) == seq_length
+        assert len(out["labels"]) == seq_length
+        # Trailing padding label must be masked
+        assert out["labels"][-1] == -100, (out, pad_token_id)
+
+    # EOS should be present in labels (supervised area) but not as last input_id
+    if getattr(tok, "eos_token_id", None) is not None and not truncation == True:
+        assert tok.eos_token_id in out["labels"], "EOS must appear in labels"
+        # find last non-pad input position and ensure it's not EOS
+        last_non_pad = len(out["input_ids"]) - 1
+        while last_non_pad >= 0 and out["input_ids"][last_non_pad] == pad_token_id:
+            last_non_pad -= 1
+        assert last_non_pad >= 0
+        assert out["input_ids"][last_non_pad] != tok.eos_token_id
+
+    # There should be masked (prompt) and supervised (answer) tokens
+    assert any(l == -100 for l in out["labels"])  # masked prompt
+    if not truncation == True:
+        assert any(l != -100 for l in out["labels"])  # supervised answer
+
+    # Attention mask should have zeros only in padded tail (if any)
+    if isinstance(seq_length, int):
+        # From the end, once we see a 0, the rest must be 0
+        seen_zero = False
+        for v in reversed(out["attention_mask"]):
+            if v == 0:
+                seen_zero = True
+            else:
+                if seen_zero:
+                    pytest.fail("Non-zero attention_mask value after padded zeros.")
+
+
+@pytest.mark.parametrize(
+    "seq_length,padding,truncation",
+    [
+        (None, "do_not_pad", None),
+        (4, "max_length", True),
+    ],
+)
+def test_format_chat_template_options(seq_length, padding, truncation):
+
+    os.environ["TRANSFORMERS_OFFLINE"] = "1"
+    os.environ["HF_HUB_OFFLINE"] = "1"
+    TOKENIZER_DIR = f"{os.environ['TEST_DATA_DIR']}/qwen3_4b_instruct_2407"
+    assert os.path.exists(TOKENIZER_DIR), "Tokenizer directory does not exist"
+    tok = AutoTokenizer.from_pretrained(TOKENIZER_DIR)
+    # Only applicable when tokenizer DOES define a chat template
+    if not getattr(tok, "chat_template", None):
+        pytest.skip(f"Tokenizer qwen3_4b_instruct_2407 has no chat_template; skipping chat-template tests.")
+
+    eos_token_id = getattr(tok, "eos_token_id", 0)
+    pad_token_id = _add_pad_token(tok) or eos_token_id
+
+    if padding == "max_length" and not isinstance(seq_length, int):
+        pytest.skip("padding='max_length' requires seq_length to be set.")
+
+    messages = [
+        {"role": "system", "content": "You are a helpful assistant."},
+        {"role": "user", "content": "What is the capital of France?"},
+        {"role": "assistant", "content": "Paris."},
+    ]
+
+    out = format_chat_template(
+        tokenizer=tok,
+        formatted_text=messages,
+        eos_token_id=eos_token_id,
+        pad_token_id=pad_token_id,
+        seq_length=seq_length,
+        padding=padding,
+        truncation=truncation,
+    )
+
+    # Basic structure
+    assert set(["input_ids", "labels", "attention_mask"]).issubset(out.keys())
+    assert len(out["input_ids"]) == len(out["labels"]) == len(out["attention_mask"]) > 0
+
+    # seq_length enforcement
+    if isinstance(seq_length, int):
+        assert len(out["input_ids"]) == seq_length
+        assert len(out["labels"]) == seq_length
+        if truncation == False:
+            assert out["labels"][-1] == -100
+
+    # For chat templates, EOS should not be the last input id (unless it's all pad)
+    if getattr(tok, "eos_token_id", None) is not None:
+        last_non_pad = len(out["input_ids"]) - 1
+        while last_non_pad >= 0 and out["input_ids"][last_non_pad] == pad_token_id:
+            last_non_pad -= 1
+        if last_non_pad >= 0:
+            assert out["input_ids"][last_non_pad] != tok.eos_token_id
+
+    # There must be at least some supervised tokens in labels
+    assert any(l != -100 for l in out["labels"])  # assistant tokens
+
+    # Attention mask padded tail zeros, if padded
+    if isinstance(seq_length, int) and truncation == False:
+        seen_zero = False
+        for v in reversed(out["attention_mask"]):
+            if v == 0:
+                seen_zero = True
+            else:
+                if seen_zero:
+                    pytest.fail("Non-zero attention_mask value after padded zeros.")
+
+
diff --git a/tests/unit_tests/datasets/llm/test_column_mapped_text_instruction.py b/tests/unit_tests/datasets/llm/test_column_mapped_text_instruction.py
index f35c13392..8ce6fa772 100644
--- a/tests/unit_tests/datasets/llm/test_column_mapped_text_instruction.py
+++ b/tests/unit_tests/datasets/llm/test_column_mapped_text_instruction.py
@@ -67,7 +67,7 @@ def __init__(self):
         self.bos_token_id = 2
         self._counter = 3  # Start token IDs from 3 to avoid conflicts
 
-    def __call__(self, text: str, add_special_tokens: bool = True):  # noqa: D401
+    def __call__(self, text: str, add_special_tokens: bool = True, padding=None, truncation=None, max_length=None):  # noqa: D401
         """Mimic the Hugging Face tokenizer ``__call__`` API.
 
         The real tokenizer would convert *text* into a list of integer token IDs.
diff --git a/tests/unit_tests/datasets/llm/test_tokenizer_apply_functions.py b/tests/unit_tests/datasets/llm/test_tokenizer_apply_functions.py
index b17d27d41..80ce69325 100644
--- a/tests/unit_tests/datasets/llm/test_tokenizer_apply_functions.py
+++ b/tests/unit_tests/datasets/llm/test_tokenizer_apply_functions.py
@@ -58,7 +58,7 @@ def _id_for_token(self, tok: str) -> int:
             self._cursor += 1
         return self._vocab[tok]
 
-    def __call__(self, text: str, *, add_special_tokens: bool = True):  # type: ignore[override]
+    def __call__(self, text: str, *, add_special_tokens: bool = True, padding=None, truncation=None, max_length=None):  # type: ignore[override]
         ids: List[int] = []
         if add_special_tokens:
             ids.append(self.bos_token_id)