Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
113 commits
Select commit Hold shift + click to select a range
7681d71
initial commit for end-of-utterance detection
weiqingw4ng Mar 10, 2025
68e6b6f
change targets to long() type
weiqingw4ng Mar 10, 2025
0069fac
change output_types()
weiqingw4ng Mar 10, 2025
1550b56
add random padding and refactor for multiple utterances per sample
stevehuang52 Mar 17, 2025
867c799
add handling multiple text groundtruth
stevehuang52 Mar 17, 2025
532494a
Merge remote-tracking branch 'origin/main' into end_of_utterance
stevehuang52 Mar 17, 2025
c9c8a0d
update and add eval scripts
stevehuang52 Apr 4, 2025
201b706
drop sou label and add eob label
stevehuang52 Apr 7, 2025
af380f6
update hybrid-rnnt-ctc and rnnt models to use eou dataset
stevehuang52 Apr 8, 2025
82cdb60
set default return eou frame label to false
stevehuang52 Apr 8, 2025
9b6f95d
handle empty utterance
stevehuang52 Apr 8, 2025
4228641
add script for injecting special eou tokens into SPE tokenizer
stevehuang52 Apr 9, 2025
ca5dd35
refactor eou eval utils
stevehuang52 Apr 9, 2025
df3151d
add eou rnnt training
stevehuang52 Apr 11, 2025
514b6d2
update doc
stevehuang52 Apr 11, 2025
a63bef3
update data augmentation
stevehuang52 Apr 15, 2025
bc44d9a
update data related functions
stevehuang52 Apr 16, 2025
b6081cf
fix tokenizer with eou tokens
stevehuang52 Apr 21, 2025
442dfec
adding eou force aligner
weiqingw4ng Apr 23, 2025
13bdc04
update for eou
stevehuang52 Apr 23, 2025
cae3171
Merge branch 'end_of_utterance' of https://github.com/NVIDIA/NeMo int…
stevehuang52 Apr 23, 2025
813be94
fix the case when 'segments_level_ctm_filepath' is not produced
weiqingw4ng Apr 23, 2025
e9cf11a
fix force aligner
stevehuang52 Apr 24, 2025
d7da23e
Merge branch 'end_of_utterance' of https://github.com/NVIDIA/NeMo int…
stevehuang52 Apr 24, 2025
fb4a815
fix aligner
stevehuang52 Apr 24, 2025
e8a49cd
update for asr-eou
stevehuang52 Apr 29, 2025
5667d71
clean up and update infer
stevehuang52 Apr 29, 2025
c9502b4
update
stevehuang52 May 7, 2025
016e5cc
update
stevehuang52 May 12, 2025
78dbb45
fix rnnt_decoding for empty string
stevehuang52 May 12, 2025
cd6da40
update cfg
stevehuang52 May 12, 2025
f8cf80a
update cfg
stevehuang52 May 12, 2025
9362c21
update padding augment
stevehuang52 May 13, 2025
9513e42
update
stevehuang52 May 13, 2025
516d9f4
update
stevehuang52 May 13, 2025
a68531f
update cfg
stevehuang52 May 13, 2025
1915e02
fix eob metric logging
stevehuang52 May 14, 2025
3114f2c
refactor and add hybrid model
stevehuang52 May 15, 2025
f83dc6f
update cfg
stevehuang52 May 15, 2025
c4783b1
update EOU models
stevehuang52 May 17, 2025
b5fd67f
update cfg
stevehuang52 May 17, 2025
27a26a9
update
stevehuang52 May 17, 2025
6f1e59b
refactor percentile calculation
stevehuang52 May 19, 2025
5c8af18
update augmentation
stevehuang52 May 21, 2025
3b1f354
update cfg
stevehuang52 May 22, 2025
dec9694
update model and cfg
stevehuang52 May 27, 2025
33ca51c
update frame eou
stevehuang52 May 27, 2025
d726602
update cfg
stevehuang52 May 27, 2025
5c194a4
add adapter to eou
stevehuang52 May 27, 2025
a1a5cbd
remove pdb
stevehuang52 May 28, 2025
3114b90
update cfg
stevehuang52 May 28, 2025
c270686
update cfg
stevehuang52 May 28, 2025
7f8f760
update cfg
stevehuang52 May 28, 2025
b6d4995
update
stevehuang52 May 30, 2025
39d5e25
add cfg
stevehuang52 May 30, 2025
c799139
fix eou metric
stevehuang52 May 30, 2025
923950b
update adapter
stevehuang52 Jun 1, 2025
604b86d
add scripts
stevehuang52 Jun 5, 2025
5533de7
update docstring
stevehuang52 Jun 5, 2025
68aa1ca
update
stevehuang52 Jun 5, 2025
dd99cf4
update
stevehuang52 Jun 7, 2025
e40459a
update generate eval data
stevehuang52 Jun 7, 2025
97c17f1
update eou val
stevehuang52 Jun 8, 2025
152f1b5
update
stevehuang52 Jun 27, 2025
6a59934
add drop_pnc=true as default for dataloading
stevehuang52 Jun 27, 2025
580156d
update
stevehuang52 Jun 27, 2025
2d1dce5
update cfg
stevehuang52 Jun 28, 2025
3d9ae66
update
stevehuang52 Jun 28, 2025
ace403b
update
stevehuang52 Jun 30, 2025
98749b9
update
stevehuang52 Jul 8, 2025
9fb4395
fix miss rate
stevehuang52 Jul 9, 2025
53e8417
update
stevehuang52 Jul 22, 2025
0faa56b
add ignore_eob_label
stevehuang52 Jul 29, 2025
59d986e
fix and update
stevehuang52 Jul 31, 2025
bf45b35
improve lhotse augmentation
stevehuang52 Aug 2, 2025
6b5a9b3
update cfg
stevehuang52 Aug 2, 2025
af9756e
update cfg
stevehuang52 Aug 2, 2025
d1c9b8d
update
stevehuang52 Aug 3, 2025
69d79c2
update
stevehuang52 Aug 3, 2025
f68e8ed
add debug info
stevehuang52 Aug 4, 2025
01a6f7d
improve data augmentation
stevehuang52 Aug 5, 2025
096b855
update utils
stevehuang52 Aug 6, 2025
a96eede
update
stevehuang52 Aug 7, 2025
632f515
update
stevehuang52 Aug 7, 2025
c706f75
update dataloader
stevehuang52 Aug 8, 2025
3bdf00c
update oomptimizer
stevehuang52 Aug 8, 2025
da0ab3e
update oomptimizer
stevehuang52 Aug 8, 2025
9741780
update eou model
stevehuang52 Aug 8, 2025
f246fd2
update eou model
stevehuang52 Aug 8, 2025
9cf662e
update eou model
stevehuang52 Aug 8, 2025
109aeb2
update augmentation
stevehuang52 Aug 9, 2025
ddc4b55
update aug
stevehuang52 Aug 9, 2025
9762900
update augment
stevehuang52 Aug 9, 2025
33d1e9e
update
stevehuang52 Aug 11, 2025
218b88a
update drop pnc func
stevehuang52 Aug 11, 2025
922dfdd
update eou finetune
stevehuang52 Aug 20, 2025
edc0cbf
Merge remote-tracking branch 'origin/main' into end_of_utterance
stevehuang52 Aug 20, 2025
ca4055a
update transcribe
stevehuang52 Sep 3, 2025
6c3aff2
update cfg
stevehuang52 Sep 11, 2025
f0018e9
fix cfg
stevehuang52 Sep 11, 2025
b9ab277
clean up for PR
stevehuang52 Sep 16, 2025
5ef8ceb
clean up
stevehuang52 Sep 16, 2025
551ac68
Potential fix for code scanning alert no. 16191: Explicit returns mix…
stevehuang52 Sep 16, 2025
e504d6c
Potential fix for code scanning alert no. 16190: Explicit returns mix…
stevehuang52 Sep 16, 2025
f11ef31
Apply isort and black reformatting
stevehuang52 Sep 16, 2025
84e8baa
Potential fix for code scanning alert no. 16185: File is not always c…
stevehuang52 Sep 16, 2025
3890606
clean up
stevehuang52 Sep 16, 2025
26270ce
clean up
stevehuang52 Oct 8, 2025
3799bbd
Merge branch 'main' into heh/eou_pr
stevehuang52 Oct 8, 2025
e884470
fix pylint&flake8
stevehuang52 Oct 8, 2025
8e94839
Merge branch 'heh/eou_pr' of https://github.com/NVIDIA/NeMo into heh/…
stevehuang52 Oct 8, 2025
38f5b93
fix pylint
stevehuang52 Oct 8, 2025
ab77c22
refactor
stevehuang52 Oct 8, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 112 additions & 0 deletions examples/asr/asr_eou/speech_to_text_eou_eval.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Example usage:

```bash
TEST_MANIFEST="[/path/to/your/test_manifest.json,/path/to/your/test_manifest2.json,...]"
TEST_NAME="[test_name1,test_name2,...]"
TEST_BATCH=32
NUM_WORKERS=8

PRETRAINED_NEMO=/path/to/EOU/model.nemo
CONFIG_NAME=fastconformer_transducer_bpe_streaming

python speech_to_text_eou_eval.py \
--config-name $CONFIG_NAME \
++init_from_nemo_model=$PRETRAINED_NEMO \
~model.train_ds \
~model.validation_ds \
++model.test_ds.defer_setup=true \
++model.test_ds.sample_rate=16000 \
++model.test_ds.manifest_filepath=$TEST_MANIFEST \
++model.test_ds.name=$TEST_NAME \
++model.test_ds.batch_size=$TEST_BATCH \
++model.test_ds.num_workers=$NUM_WORKERS \
++model.test_ds.drop_last=false \
++model.test_ds.force_finite=true \
++model.test_ds.shuffle=false \
++model.test_ds.pin_memory=true \
exp_manager.create_wandb_logger=false
```

"""


import lightning.pytorch as pl
import torch

torch.set_float32_matmul_precision("highest")
from omegaconf import DictConfig, OmegaConf, open_dict

from nemo.collections.asr.models import ASRModel
from nemo.core.classes import typecheck
from nemo.core.config import hydra_runner
from nemo.utils import logging
from nemo.utils.exp_manager import exp_manager
from nemo.utils.trainer_utils import resolve_trainer_cfg

typecheck.set_typecheck_enabled(False)


def load_model(cfg: DictConfig, trainer: pl.Trainer) -> ASRModel:
if "init_from_nemo_model" in cfg:
logging.info(f"Loading model from local file: {cfg.init_from_nemo_model}")
model = ASRModel.restore_from(cfg.init_from_nemo_model, trainer=trainer)
elif "init_from_pretrained_model" in cfg:
logging.info(f"Loading model from remote: {cfg.init_from_pretrained_model}")
model = ASRModel.from_pretrained(cfg.init_from_pretrained_model, trainer=trainer)
else:
raise ValueError(
"Please provide either 'init_from_nemo_model' or 'init_from_pretrained_model' in the config file."
)
if cfg.get("init_from_ptl_ckpt", None):
logging.info(f"Loading weights from checkpoint: {cfg.init_from_ptl_ckpt}")
state_dict = torch.load(cfg.init_from_ptl_ckpt, map_location='cpu', weights_only=False)['state_dict']
model.load_state_dict(state_dict, strict=True)
return model


@hydra_runner(config_path="../conf/asr_eou", config_name="fastconformer_transducer_bpe_streaming")
def main(cfg):
logging.info(f'Hydra config: {OmegaConf.to_yaml(cfg)}')

trainer = pl.Trainer(**resolve_trainer_cfg(cfg.trainer))
exp_manager(trainer, cfg.get("exp_manager", None))

asr_model = load_model(cfg, trainer)
asr_model = asr_model.eval() # Set the model to evaluation mode
if hasattr(asr_model, 'wer'):
asr_model.wer.log_prediction = False

with open_dict(asr_model.cfg):
if "save_pred_to_file" in cfg:
asr_model.cfg.save_pred_to_file = cfg.save_pred_to_file
if "calclate_eou_metrics" in cfg:
asr_model.cfg.calclate_eou_metrics = cfg.calclate_eou_metrics
if hasattr(cfg.model, 'test_ds') and cfg.model.test_ds.manifest_filepath is not None:
with open_dict(cfg.model.test_ds):
cfg.model.test_ds.pad_eou_label_secs = asr_model.cfg.get('pad_eou_label_secs', 0.0)
asr_model.setup_multiple_test_data(test_data_config=cfg.model.test_ds)
trainer.test(asr_model)
else:
raise ValueError(
"No test dataset provided. Please provide a test dataset in the config file under model.test_ds."
)
logging.info("Test completed.")


if __name__ == '__main__':
main() # noqa pylint: disable=no-value-for-parameter
Loading
Loading