diff --git a/tools/preprocess_data.py b/tools/preprocess_data.py index 13e5b64a47..4616810c26 100644 --- a/tools/preprocess_data.py +++ b/tools/preprocess_data.py @@ -73,6 +73,9 @@ def initializer(self): else: Encoder.splitter = IdentitySplitter() + + if Encoder.tokenizer.eod is None: + raise ValueError("EOD token is not set.") def split(self, json_line): data = json.loads(json_line)