Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions .github/workflows/codespell.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# Codespell configuration is within pyproject.toml
---
name: Codespell

on:
push:
branches: [main]
pull_request:
branches: [main]

permissions:
contents: read

jobs:
codespell:
name: Check for spelling errors
runs-on: ubuntu-latest

steps:
- name: Checkout
uses: actions/checkout@v4
- name: Annotate locations with typos
uses: codespell-project/codespell-problem-matcher@v1
- name: Codespell
uses: codespell-project/actions-codespell@v2
2 changes: 1 addition & 1 deletion brainscore_language/metrics/cka/metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def centering(K):

return np.dot(np.dot(H, K), H)
# HKH are the same with KH, KH is the first centering, H(KH) do the second time,
# results are the sme with one time centering
# results are the same with one time centering
# return np.dot(H, K) # KH


Expand Down
4 changes: 2 additions & 2 deletions brainscore_language/model_helpers/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

class ContainerSubject(ArtificialSubject):
"""
Evaluation interface for arbitary containerized models.
Evaluation interface for arbitrary containerized models.
User must install either 'Singularity' or 'Docker' to evaluate container models.

To add new model, build a container with an entry point that supports the following interface:
Expand Down Expand Up @@ -73,7 +73,7 @@ def __init__(
"""
:param container: Container name, e.g., "USERNAME/CONTAINER:TAG"
:param entrypoint: Entrypoint to run inside container, e.g., "python /path/to/entrypoint.py"
:param identifier: Model identifer passed to entrypoint, e.g., "model_name"
:param identifier: Model identifier passed to entrypoint, e.g., "model_name"
:param region_layer_mapping: Mapping from brain region to requested measure, e.g., {"language_system": "model_layer_name"}
:param task_heads: Mapping from task to callable that takes the output of the container and returns a score, e.g., {ArtificialSubject.Task.next_word: predict_next_word_function}
"""
Expand Down
2 changes: 1 addition & 1 deletion brainscore_language/model_helpers/modeling_suma.py
Original file line number Diff line number Diff line change
Expand Up @@ -1124,7 +1124,7 @@ def prepare_inputs_for_generation(

# Keep only the unprocessed tokens:
# 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
# some of the inputs are exclusivelly passed as part of the cache (e.g. when passing input_embeds as
# some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as
# input)
if attention_mask is not None and attention_mask.shape[1] > input_ids.shape[1]:
input_ids = input_ids[:, -(attention_mask.shape[1] - past_length) :]
Expand Down
4 changes: 2 additions & 2 deletions brainscore_language/models/earley_parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,7 @@ def create_grammar(
:param treebank_path: a path to a treebank corpus
:param grammar_string: one or more file names to be parsed in the grammar. If None, all files will be parsed
:param unk_low_frequency: if True, replaces all words that appear less than k times by <unk>
:param k: the <unk> replacement threshold (min number of occurances for a word to NOT be replaced by <unk>)
:param k: the <unk> replacement threshold (min number of occurrences for a word to NOT be replaced by <unk>)
"""

# Load PTB annotations
Expand All @@ -230,7 +230,7 @@ def create_grammar(
r".*",
)

# First, get all productions and count the occurances of each lexical in all productions
# First, get all productions and count the occurrences of each lexical in all productions
productions = []
lexical_counts = {}
for tree in treebank.parsed_sents(fileids):
Expand Down
2 changes: 1 addition & 1 deletion brainscore_language/models/earley_parser/utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""
Modified rule definitions for the NLTK abstract chart rules to work with a probabilistic context-free grammar.
Added a probabilstic Earley chart parser by applying incremental chart parsing with the probabilistic rules.
Added a probabilistic Earley chart parser by applying incremental chart parsing with the probabilistic rules.
Adapted from: https://www.nltk.org/api/nltk.parse.chart.html
"""

Expand Down
7 changes: 7 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,3 +75,10 @@ markers = [
"brainscore_language.data" = ["**"]
"brainscore_language.metrics" = ["**"]
"brainscore_language.models" = ["**"]

[tool.codespell]
# Ref: https://github.com/codespell-project/codespell#using-a-config-file
skip = '.git*,*.csv,*.json,data'
check-hidden = true
ignore-regex = '^\s*"image/\S+": ".*'
# ignore-words-list = ''