PaddlePaddle · luotao1 · Sep 22, 2025 · Sep 2, 2025
diff --git a/llm/tools/preprocess/create_pretraining_data.py b/llm/tools/preprocess/create_pretraining_data.py
@@ -176,7 +176,7 @@ def get_whole_word_mask_tokens(tokens, words, max_word_length=6):
             i += 1
             continue
 
-        # add "##" mark on the middel tokens of Chinese words
+        # add "##" mark on the middle tokens of Chinese words
         # such as ["通过", "利用"] -> ["通", "##过"， "利", "##用"]
         has_add = False
         for length in range(max_word_length, 0, -1):

diff --git a/llm/utils/fused_layers.py b/llm/utils/fused_layers.py
@@ -106,11 +106,11 @@ def sp_async_reducesctter(x_grad):
 def sync_mp_allreduce(task, dist_tensor):
     mp_placement_index = dist_tensor.process_mesh.dim_names.index("mp")
     new_placments = list()
-    for idx, placment in enumerate(dist_tensor.placements):
+    for idx, placement in enumerate(dist_tensor.placements):
         if idx == mp_placement_index:
             new_placments.append(dist.Replicate())
         else:
-            new_placments.append(placment)
+            new_placments.append(placement)
     place = paddle.framework._current_expected_place()
     place = paddle.framework._get_paddle_place(place)
 

diff --git a/llm/utils/sp_async_reduce_scatter.py b/llm/utils/sp_async_reduce_scatter.py
@@ -172,7 +172,7 @@ def forward_pre_hook(layer, input):
     ipp = id2ipp[id(layer)]
 
 
-def forward_post_hook(layer, input, ouput):
+def forward_post_hook(layer, input, output):
     paddle.nn.functional.linear = paddle_nn_functional_linear
     if is_fused_matmul_bias_supported():
         paddle.incubate.nn.functional.fused_linear = paddle_incubate_nn_functional_fused_linear

diff --git a/paddlenlp/transformers/clipseg/modeling.py b/paddlenlp/transformers/clipseg/modeling.py
@@ -340,7 +340,7 @@ def forward(
         attn_weights = nn.functional.softmax(attn_weights, axis=-1)
 
         if output_attentions:
-            # this operation is a bit akward, but it's required to
+            # this operation is a bit awkward, but it's required to
             # make sure that attn_weights keeps its gradient.
             # In order to do so, attn_weights have to reshaped
             # twice and have to be reused in the following

diff --git a/paddlenlp/transformers/llama/modeling_auto.py b/paddlenlp/transformers/llama/modeling_auto.py
@@ -1146,7 +1146,7 @@ def forward(
             inputs_embeds = paddle.transpose(inputs_embeds, [1, 0, 2])
 
         if self.config.context_parallel_degree > 1 and (attention_mask is not None or self.config.alibi):
-            raise NotImplementedError("Ring FlashAttention dosen't support attention_mask or alibi")
+            raise NotImplementedError("Ring FlashAttention doesn't support attention_mask or alibi")
 
         global_mesh = global_mesh_starts_with_pp()
         if position_ids is None and self.config.sep_parallel_degree > 1:

diff --git a/paddlenlp/transformers/model_utils.py b/paddlenlp/transformers/model_utils.py
@@ -2267,7 +2267,7 @@ def _fuse_or_split_keys(
                             post_quantize=post_quantize,
                         )
                     if post_quantize:
-                        # Split -> quantize(Not support mdoel save)
+                        # Split -> quantize(Not support model save)
                         state_dict = load_state_dict(
                             shard_file,
                             tp_actions if pre_tensor_parallel_split else None,
@@ -2280,7 +2280,7 @@ def _fuse_or_split_keys(
                             dtype,
                         )
                     else:
-                        # quantize -> split(Support mdoel save)
+                        # quantize -> split(Support model save)
                         state_dict = load_state_dict(
                             shard_file,
                             tp_actions if pre_tensor_parallel_split else None,

diff --git a/paddlenlp/utils/downloader.py b/paddlenlp/utils/downloader.py
@@ -257,7 +257,7 @@ def _decompress(fname):
 
     # For protecting decompressing interrupted,
     # decompress to fpath_tmp directory firstly, if decompress
-    # successed, move decompress files to fpath and delete
+    # succeeded, move decompress files to fpath and delete
     # fpath_tmp and remove download compress file.
 
     if tarfile.is_tarfile(fname):

diff --git a/paddlenlp/utils/pdc_sdk.py b/paddlenlp/utils/pdc_sdk.py
@@ -534,7 +534,7 @@ def pdc_backup_to_flash_device(self, persistent_path: str, flash_device_path: st
         # step 2: copy persistent data to flash device
         try:
             copy_tree(persistent_path, flash_device_path)
-            logger.info(f"backup {persistent_path} to {flash_device_path} successed.")
+            logger.info(f"backup {persistent_path} to {flash_device_path} succeeded.")
         except Exception as e:
             logger.error(f"[Error] [pdc_sdk] copy tree {persistent_path} to {flash_device_path} failed, error: {e}")
             self._pdc_backup_failed_directory(flash_device_path)

diff --git a/slm/applications/question_answering/unsupervised_qa/tools/dev_qq_pair_creation.py b/slm/applications/question_answering/unsupervised_qa/tools/dev_qq_pair_creation.py
@@ -36,7 +36,7 @@ def extract_q_from_json_file(json_file, out_file=None, test_sample_num=None, que
         if out_file:
             wf = open(os.path.join(out_file), "w", encoding="utf-8")
         if query_answer_path:
-            qeury_answer_wf = open(query_answer_path, "w", encoding="utf-8")
+            query_answer_wf = open(query_answer_path, "w", encoding="utf-8")
         q_list = []
         for i, json_line in enumerate(rf.readlines()):
             line_dict = json.loads(json_line)
@@ -47,7 +47,7 @@ def extract_q_from_json_file(json_file, out_file=None, test_sample_num=None, que
             answer = line_dict["answer"]
             if not test_sample_num or i < test_sample_num:
                 if query_answer_path:
-                    qeury_answer_wf.write(
+                    query_answer_wf.write(
                         question.replace("\n", " ").replace("\t", " ").strip()
                         + "\t"
                         + answer.replace("\n", " ").replace("\t", " ").strip()
@@ -59,9 +59,9 @@ def extract_q_from_json_file(json_file, out_file=None, test_sample_num=None, que
             else:
                 break
         if query_answer_path:
-            qeury_answer_wf.close()
+            query_answer_wf.close()
         if out_file:
-            wf.colse()
+            wf.close()
         return q_list
 
 

diff --git a/slm/applications/text_classification/hierarchical/analysis/word_interpret.ipynb b/slm/applications/text_classification/hierarchical/analysis/word_interpret.ipynb
@@ -197,7 +197,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Start token level interpretion, it will take some time...\n",
+      "Start token level interpretation, it will take some time...\n",
       "Building prefix dict from the default dictionary ...\n",
       "Loading model from cache /tmp/jieba.cache\n",
       "Loading model cost 0.746 seconds.\n",
@@ -219,7 +219,7 @@
     "    interpreter = GradShapInterpreter(model)\n",
     "\n",
     "# Use interpreter to get the importance scores for all data\n",
-    "print(\"Start token level interpretion, it will take some time...\")\n",
+    "print(\"Start token level interpretation, it will take some time...\")\n",
     "analysis_result = []\n",
     "for batch in interpret_data_loader:\n",
     "    analysis_result += interpreter(tuple(batch))\n",

diff --git a/slm/applications/text_classification/hierarchical/few-shot/utils.py b/slm/applications/text_classification/hierarchical/few-shot/utils.py
@@ -30,7 +30,7 @@ def load_local_dataset(data_path, splits, label_list):
         splits (list):
             Which file(s) to load, such as ['train', 'dev', 'test'].
         label_list (dict):
-            The dictionary that maps labels to indeces.
+            The dictionary that maps labels to indices.
     """
 
     def _reader(data_file, label_list):

diff --git a/slm/applications/text_classification/multi_class/analysis/word_interpret.ipynb b/slm/applications/text_classification/multi_class/analysis/word_interpret.ipynb
@@ -196,7 +196,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Start token level interpretion, it will take some time...\n",
+      "Start token level interpretation, it will take some time...\n",
       "Building prefix dict from the default dictionary ...\n",
       "Loading model from cache /tmp/jieba.cache\n",
       "Loading model cost 1.005 seconds.\n",
@@ -218,7 +218,7 @@
     "    interpreter = GradShapInterpreter(model)\n",
     "\n",
     "# Use interpreter to get the importance scores for all data\n",
-    "print(\"Start token level interpretion, it will take some time...\")\n",
+    "print(\"Start token level interpretation, it will take some time...\")\n",
     "analysis_result = []\n",
     "for batch in interpret_data_loader:\n",
     "    analysis_result += interpreter(tuple(batch))\n",

diff --git a/slm/applications/text_classification/multi_label/analysis/word_interpret.ipynb b/slm/applications/text_classification/multi_label/analysis/word_interpret.ipynb
@@ -214,7 +214,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Start token level interpretion, it will take some time...\n",
+      "Start token level interpretation, it will take some time...\n",
       "Building prefix dict from the default dictionary ...\n",
       "Loading model from cache /tmp/jieba.cache\n",
       "Loading model cost 0.751 seconds.\n",
@@ -236,7 +236,7 @@
     "    interpreter = GradShapInterpreter(model)\n",
     "\n",
     "# Use interpreter to get the importance scores for all data\n",
-    "print(\"Start token level interpretion, it will take some time...\")\n",
+    "print(\"Start token level interpretation, it will take some time...\")\n",
     "analysis_result = []\n",
     "for batch in interpret_data_loader:\n",
     "    analysis_result += interpreter(tuple(batch))\n",

diff --git a/slm/applications/text_classification/multi_label/few-shot/utils.py b/slm/applications/text_classification/multi_label/few-shot/utils.py
@@ -30,7 +30,7 @@ def load_local_dataset(data_path, splits, label_list):
         splits (list):
             Which file(s) to load, such as ['train', 'dev', 'test'].
         label_list (dict):
-            The dictionary that maps labels to indeces.
+            The dictionary that maps labels to indices.
     """
 
     def _reader(data_file, label_list):

diff --git a/slm/examples/benchmark/clue/classification/run_clue_classifier_trainer.py b/slm/examples/benchmark/clue/classification/run_clue_classifier_trainer.py
@@ -89,7 +89,7 @@ class ModelArguments:
     )
 
 
-# Data pre-process function for clue benchmark datatset
+# Data pre-process function for clue benchmark dataset
 def convert_clue(example, label_list, tokenizer=None, max_seq_length=512, **kwargs):
     """convert a glue example into necessary features"""
     is_test = False

diff --git a/slm/examples/benchmark/wiki_lambada/eval.py b/slm/examples/benchmark/wiki_lambada/eval.py
@@ -123,7 +123,7 @@ def _construct_sample(self, tokens):
         labels = tokens[1:]
         tokens = tokens[:-1]
         seq_length = len(tokens)
-        # attention mask for the attention calulate
+        # attention mask for the attention calculate
         attention_mask = np.tri(seq_length, seq_length).reshape((1, seq_length, seq_length))
 
         # the pad and eos tokens do not contribute the loss
@@ -168,7 +168,7 @@ def _construct_sample(self, tokens):
         tokens = tokens[:-1]
 
         seq_length = len(tokens)
-        # attention mask for the attention calulate
+        # attention mask for the attention calculate
         attention_mask = np.tri(seq_length, seq_length).reshape((1, seq_length, seq_length))
 
         # the pad and eos tokens do not contribute the loss

diff --git a/slm/examples/information_extraction/DuIE/README.md b/slm/examples/information_extraction/DuIE/README.md
@@ -115,7 +115,7 @@ sh predict.sh
 之后可以使用官方评估脚本评估训练模型在 dev_data.json 上的效果。如：
 
 ```shell
-python re_official_evaluation.py --golden_file=dev_data.json  --predict_file=predicitons.json.zip [--alias_file alias_dict]
+python re_official_evaluation.py --golden_file=dev_data.json  --predict_file=predictions.json.zip [--alias_file alias_dict]
 ```
 输出指标为 Precision, Recall 和 F1，Alias file 包含了合法的实体别名，最终评测的时候会使用，这里不予提供。
 

diff --git a/slm/examples/information_extraction/DuUIE/uie/seq2struct/data_collator.py b/slm/examples/information_extraction/DuUIE/uie/seq2struct/data_collator.py
@@ -119,13 +119,13 @@ def get_ordered_dict(schema_name_list, tokenizer):
         return schema_ordered_dict
 
     @staticmethod
-    def sample_negative(postive, candidates, k=5):
+    def sample_negative(positive, candidates, k=5):
         if k < 0:
             k = len(candidates)
         negative_set = set()
         for index in np.random.permutation(len(candidates))[:k].tolist():
             negative = candidates[index]
-            if negative not in postive:
+            if negative not in positive:
                 negative_set.add(negative)
 
         return list(negative_set)
@@ -143,7 +143,7 @@ def sample_spot(self, positive, candidates=None):
         """
         neg_cands = candidates if candidates is not None else self.spot_list
 
-        negative_spot = self.sample_negative(postive=positive, candidates=neg_cands, k=self.negative)
+        negative_spot = self.sample_negative(positive=positive, candidates=neg_cands, k=self.negative)
         positive_spot = random.sample(positive, math.floor(len(positive) * self.positive_rate))
 
         converted_spot_prefix = self.convert_prefix(
@@ -166,7 +166,7 @@ def sample_asoc(self, positive, candidates=None):
             List[str]: Sampled Negative Asoc List
         """
         neg_cands = candidates if candidates is not None else self.asoc_list
-        negative_asoc = self.sample_negative(postive=positive, candidates=neg_cands, k=self.negative)
+        negative_asoc = self.sample_negative(positive=positive, candidates=neg_cands, k=self.negative)
         converted_asoc_prefix = self.convert_prefix(
             candidates=positive + negative_asoc,
             prompt=self.asoc_prompt_id,

diff --git a/slm/examples/model_compression/distill_lstm/bert_distill.py b/slm/examples/model_compression/distill_lstm/bert_distill.py
@@ -65,7 +65,7 @@ def evaluate(task_name, model, metric, data_loader):
     model.train()
 
 
-def do_train(agrs):
+def do_train(args):
     paddle.set_device(args.device)
     train_data_loader, dev_data_loader = create_distill_loader(
         args.task_name,

diff --git a/slm/examples/model_interpretation/data/similarity_en b/slm/examples/model_interpretation/data/similarity_en
@@ -13,7 +13,7 @@
 {"id": 13, "sentence1": "How do smart and successful people control their emotions ?", "sentence2": "How can I control my emotions ?", "text_q_seg": ["How", "do", "smart", "and", "successful", "people", "control", "their", "emotions", "?"], "text_t_seg": ["How", "can", "I", "control", "my", "emotions", "?"], "sample_type": "ori", "rel_ids": [1672]}
 {"id": 14, "sentence1": "What are the best tips for outlining / planning a novel ?", "sentence2": "How do I best outline my novel ?", "text_q_seg": ["What", "are", "the", "best", "tips", "for", "outlining", "/", "planning", "a", "novel", "?"], "text_t_seg": ["How", "do", "I", "best", "outline", "my", "novel", "?"], "sample_type": "ori", "rel_ids": [1673]}
 {"id": 15, "sentence1": "What will happen if Donald Trump became the president of America ?", "sentence2": "What will happen now that President - elect Donald Trump has won the election ?", "text_q_seg": ["What", "will", "happen", "if", "Donald", "Trump", "became", "the", "president", "of", "America", "?"], "text_t_seg": ["What", "will", "happen", "now", "that", "President", "-", "elect", "Donald", "Trump", "has", "won", "the", "election", "?"], "sample_type": "ori", "rel_ids": [1674]}
-{"id": 16, "sentence1": "Why did n't Ned Stark bring more men to the Tower of Joy ?", "sentence2": "Why did Ned Stark go to the Tower of Joy with so few men ? Why not bring a small guard ( say 20 more men ) of loyal and discreet northerners ?", "text_q_seg": ["Why", "did", "n't", "Ned", "Stark", "bring", "more", "men", "to", "the", "Tower", "of", "Joy", "?"], "text_t_seg": ["Why", "did", "Ned", "Stark", "go", "to", "the", "Tower", "of", "Joy", "with", "so", "few", "men", "?", "Why", "not", "bring", "a", "small", "guard", "(", "say", "20", "more", "men", ")", "of", "loyal", "and", "discreet", "northerners", "?"], "sample_type": "ori", "rel_ids": [1675]}
+{"id": 16, "sentence1": "Why did n't Ned Stark bring more men to the Tower of Joy ?", "sentence2": "Why did Ned Stark go to the Tower of Joy with so few men ? Why not bring a small guard ( say 20 more men ) of loyal and discrete northerners ?", "text_q_seg": ["Why", "did", "n't", "Ned", "Stark", "bring", "more", "men", "to", "the", "Tower", "of", "Joy", "?"], "text_t_seg": ["Why", "did", "Ned", "Stark", "go", "to", "the", "Tower", "of", "Joy", "with", "so", "few", "men", "?", "Why", "not", "bring", "a", "small", "guard", "(", "say", "20", "more", "men", ")", "of", "loyal", "and", "discrete", "northerners", "?"], "sample_type": "ori", "rel_ids": [1675]}
 {"id": 17, "sentence1": "How do you get better grades ?", "sentence2": "How can I dramatically improve my grades ?", "text_q_seg": ["How", "do", "you", "get", "better", "grades", "?"], "text_t_seg": ["How", "can", "I", "dramatically", "improve", "my", "grades", "?"], "sample_type": "ori", "rel_ids": [1676]}
 {"id": 18, "sentence1": "What is your new year resolution , short term and long term goal for 2017 ?", "sentence2": "What will be your New Year 's resolution for 2017 ?", "text_q_seg": ["What", "is", "your", "new", "year", "resolution", ",", "short", "term", "and", "long", "term", "goal", "for", "2017", "?"], "text_t_seg": ["What", "will", "be", "your", "New", "Year", "'s", "resolution", "for", "2017", "?"], "sample_type": "ori", "rel_ids": [1677]}
 {"id": 19, "sentence1": "What will happen to the next Star Wars movies after Carrie Fisher 's death ?", "sentence2": "What will Carrie Fisher 's death mean for the next Star Wars movies ?", "text_q_seg": ["What", "will", "happen", "to", "the", "next", "Star", "Wars", "movies", "after", "Carrie", "Fisher", "'s", "death", "?"], "text_t_seg": ["What", "will", "Carrie", "Fisher", "'s", "death", "mean", "for", "the", "next", "Star", "Wars", "movies", "?"], "sample_type": "ori", "rel_ids": [1678]}
@@ -52,7 +52,7 @@
 {"id": 1661, "sentence1": "I am 25 year old guy and never had a girlfriend . Is this odd ?", "sentence2": "I am 25 years old . I have never had a girlfriend . Is something wrong with me ?", "text_q_seg": ["I", "am", "25", "year", "old", "guy", "and", "never", "had", "a", "girlfriend", ".", "Is", "this", "odd", "?"], "text_t_seg": ["I", "am", "25", "years", "old", ".", "I", "have", "never", "had", "a", "girlfriend", ".", "Is", "something", "wrong", "with", "me", "?"], "sample_type": "disturb"}
 {"id": 1662, "sentence1": "what is a good answer on Quora that is helpful ？", "sentence2": "How do you write a good answer on Quora ?", "text_q_seg": ["what", "is", "a", "good", "answer", "on", "Quora", "that", "is", "helpful", "？"], "text_t_seg": ["How", "do", "you", "write", "a", "good", "answer", "on", "Quora", "?"], "sample_type": "disturb"}
 {"id": 1663, "sentence1": "What was the most fatal battle in history ?", "sentence2": "What was the bloodiest battle in history ?", "text_q_seg": ["What", "was", "the", "most", "fatal", "battle", "in", "history", "?"], "text_t_seg": ["What", "was", "the", "bloodiest", "battle", "in", "history", "?"], "sample_type": "disturb"}
-{"id": 1664, "sentence1": "What are your opions on demonetisation in India ?", "sentence2": "What do you think about the ban on 500 and 1000 denomination notes in India ?", "text_q_seg": ["What", "are", "your", "opions", "on", "demonetisation", "in", "India", "?"], "text_t_seg": ["What", "do", "you", "think", "about", "the", "ban", "on", "500", "and", "1000", "denomination", "notes", "in", "India", "?"], "sample_type": "disturb"}
+{"id": 1664, "sentence1": "What are your options on demonetisation in India ?", "sentence2": "What do you think about the ban on 500 and 1000 denomination notes in India ?", "text_q_seg": ["What", "are", "your", "options", "on", "demonetisation", "in", "India", "?"], "text_t_seg": ["What", "do", "you", "think", "about", "the", "ban", "on", "500", "and", "1000", "denomination", "notes", "in", "India", "?"], "sample_type": "disturb"}
 {"id": 1665, "sentence1": "Is it a bad time to buy a condo or a house in the Bay Area in 2017 ?", "sentence2": "Is 2017 a good time to buy a house in Bay Area ?", "text_q_seg": ["Is", "it", "a", "bad", "time", "to", "buy", "a", "condo", "or", "a", "house", "in", "the", "Bay", "Area", "in", "2017", "?"], "text_t_seg": ["Is", "2017", "a", "good", "time", "to", "buy", "a", "house", "in", "Bay", "Area", "?"], "sample_type": "disturb"}
 {"id": 1666, "sentence1": "What books should an aspiring entrepreneur read ?", "sentence2": "What are the top books an aspiring teen entrepreneur should read ?", "text_q_seg": ["What", "books", "should", "an", "aspiring", "entrepreneur", "read", "?"], "text_t_seg": ["What", "are", "the", "top", "books", "an", "aspiring", "teen", "entrepreneur", "should", "read", "?"], "sample_type": "disturb"}
 {"id": 1667, "sentence1": "If universe is expanding infinitely and dark and vacuum energy are created as it expands … ?", "sentence2": "If universe can expand without limit and it creates dark / vacuum / gravitational energy with it , then is the potential energy infinite ?", "text_q_seg": ["If", "universe", "is", "expanding", "infinitely", "and", "dark", "and", "vacuum", "energy", "are", "created", "as", "it", "expands", "…", "?"], "text_t_seg": ["If", "universe", "can", "expand", "without", "limit", "and", "it", "creates", "dark", "/", "vacuum", "/", "gravitational", "energy", "with", "it", ",", "then", "is", "the", "potential", "energy", "infinite", "?"], "sample_type": "disturb"}