Skip to content

Commit c8e20ea

Browse files
committed
address comments
1 parent 903488e commit c8e20ea

File tree

1 file changed

+3
-1
lines changed

1 file changed

+3
-1
lines changed

fast_llm/data/preparator/gpt_memmap/prepare.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -229,6 +229,7 @@ def run(self) -> None:
229229
},
230230
batched=False,
231231
desc="Combining fields",
232+
num_proc=self._config.loading_workers,
232233
)
233234
logger.info(f"Sample after combining fields:\n{dataset[0]}")
234235
self._data_column = new_combined_column
@@ -237,11 +238,12 @@ def run(self) -> None:
237238
dataset = dataset.map(
238239
lambda example: {
239240
loss_masking_column: [
240-
(0, len(str(example[source_schema.prompt_column])) - 1)
241+
(0, len(example[source_schema.prompt_column]) - 1)
241242
]# spans are inclusive
242243
},
243244
batched=False,
244245
desc="Setting loss masking spans",
246+
num_proc=self._config.loading_workers,
245247
)
246248
logger.info(f"Sample after setting loss masking spans:\n{dataset[0]}")
247249
self._loss_masking_spans_column = loss_masking_column

0 commit comments

Comments
 (0)