brain-score · yarikoptic · Aug 21, 2025 · Aug 21, 2025 · Aug 21, 2025 · Aug 21, 2025
diff --git a/.github/workflows/codespell.yml b/.github/workflows/codespell.yml
@@ -0,0 +1,25 @@
+# Codespell configuration is within pyproject.toml
+---
+name: Codespell
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+
+permissions:
+  contents: read
+
+jobs:
+  codespell:
+    name: Check for spelling errors
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v4
+      - name: Annotate locations with typos
+        uses: codespell-project/codespell-problem-matcher@v1
+      - name: Codespell
+        uses: codespell-project/actions-codespell@v2
diff --git a/brainscore_language/metrics/cka/metric.py b/brainscore_language/metrics/cka/metric.py
@@ -19,7 +19,7 @@ def centering(K):
 
     return np.dot(np.dot(H, K), H)
     # HKH are the same with KH, KH is the first centering, H(KH) do the second time,
-    # results are the sme with one time centering
+    # results are the same with one time centering
     # return np.dot(H, K)  # KH
 
 

diff --git a/brainscore_language/model_helpers/container.py b/brainscore_language/model_helpers/container.py
@@ -24,7 +24,7 @@
 
 class ContainerSubject(ArtificialSubject):
     """
-    Evaluation interface for arbitary containerized models.
+    Evaluation interface for arbitrary containerized models.
     User must install either 'Singularity' or 'Docker' to evaluate container models.
 
     To add new model, build a container with an entry point that supports the following interface:
@@ -73,7 +73,7 @@ def __init__(
         """
         :param container: Container name, e.g., "USERNAME/CONTAINER:TAG"
         :param entrypoint: Entrypoint to run inside container, e.g., "python /path/to/entrypoint.py"
-        :param identifier: Model identifer passed to entrypoint, e.g., "model_name"
+        :param identifier: Model identifier passed to entrypoint, e.g., "model_name"
         :param region_layer_mapping: Mapping from brain region to requested measure, e.g., {"language_system": "model_layer_name"}
         :param task_heads: Mapping from task to callable that takes the output of the container and returns a score, e.g., {ArtificialSubject.Task.next_word: predict_next_word_function}
         """

diff --git a/brainscore_language/model_helpers/modeling_suma.py b/brainscore_language/model_helpers/modeling_suma.py
@@ -1124,7 +1124,7 @@ def prepare_inputs_for_generation(
 
             # Keep only the unprocessed tokens:
             # 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
-            # some of the inputs are exclusivelly passed as part of the cache (e.g. when passing input_embeds as
+            # some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as
             # input)
             if attention_mask is not None and attention_mask.shape[1] > input_ids.shape[1]:
                 input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :]

diff --git a/brainscore_language/models/earley_parser/parser.py b/brainscore_language/models/earley_parser/parser.py
@@ -221,7 +221,7 @@ def create_grammar(
         :param treebank_path: a path to a treebank corpus
         :param grammar_string: one or more file names to be parsed in the grammar. If None, all files will be parsed
         :param unk_low_frequency: if True, replaces all words that appear less than k times by <unk>
-        :param k: the <unk> replacement threshold (min number of occurances for a word to NOT be replaced by <unk>)
+        :param k: the <unk> replacement threshold (min number of occurrences for a word to NOT be replaced by <unk>)
         """
 
         # Load PTB annotations
@@ -230,7 +230,7 @@ def create_grammar(
             r".*",
         )
 
-        # First, get all productions and count the occurances of each lexical in all productions
+        # First, get all productions and count the occurrences of each lexical in all productions
         productions = []
         lexical_counts = {}
         for tree in treebank.parsed_sents(fileids):

diff --git a/brainscore_language/models/earley_parser/utils.py b/brainscore_language/models/earley_parser/utils.py
@@ -1,6 +1,6 @@
 """
 Modified rule definitions for the NLTK abstract chart rules to work with a probabilistic context-free grammar.
-Added a probabilstic Earley chart parser by applying incremental chart parsing with the probabilistic rules.
+Added a probabilistic Earley chart parser by applying incremental chart parsing with the probabilistic rules.
 Adapted from: https://www.nltk.org/api/nltk.parse.chart.html
 """
 

diff --git a/pyproject.toml b/pyproject.toml
@@ -75,3 +75,10 @@ markers = [
 "brainscore_language.data" = ["**"]
 "brainscore_language.metrics" = ["**"]
 "brainscore_language.models" = ["**"]
+
+[tool.codespell]
+# Ref: https://github.com/codespell-project/codespell#using-a-config-file
+skip = '.git*,*.csv,*.json,data'
+check-hidden = true
+ignore-regex = '^\s*"image/\S+": ".*'
+# ignore-words-list = ''