diff --git a/harper-comments/tests/language_support.rs b/harper-comments/tests/language_support.rs index 6cdfccbf1..6540c82c9 100644 --- a/harper-comments/tests/language_support.rs +++ b/harper-comments/tests/language_support.rs @@ -49,7 +49,7 @@ create_test!(merged_lines.ts, 1); create_test!(javadoc_clean_simple.java, 0); create_test!(javadoc_complex.java, 5); create_test!(issue_132.rs, 1); -create_test!(laravel_app.php, 2); +create_test!(laravel_app.php, 3); create_test!(ignore_shebang_1.sh, 0); create_test!(ignore_shebang_2.sh, 0); create_test!(ignore_shebang_3.sh, 0); @@ -58,7 +58,8 @@ create_test!(common.mill, 1); // Checks that some comments are masked out create_test!(ignore_comments.rs, 1); -create_test!(ignore_comments.c, 1); +// Both spell_check and split_words linters flag this now +create_test!(ignore_comments.c, 2); // These are to make sure nothing crashes. create_test!(empty.js, 0); diff --git a/harper-comments/tests/language_support_sources/javadoc_complex.java b/harper-comments/tests/language_support_sources/javadoc_complex.java index 054f4c464..2b79c0ec5 100644 --- a/harper-comments/tests/language_support_sources/javadoc_complex.java +++ b/harper-comments/tests/language_support_sources/javadoc_complex.java @@ -17,7 +17,7 @@ public static void main(String[] args) { } /** - * This doc has a link in it: {@link this sould b ignor} but not tis + * This doc has a link in it: {@link this sould b ignor} but not thsi * * @param name this is anoher test. */ diff --git a/harper-comments/tests/language_support_sources/jsdoc.ts b/harper-comments/tests/language_support_sources/jsdoc.ts index f46178f33..d9aac8587 100644 --- a/harper-comments/tests/language_support_sources/jsdoc.ts +++ b/harper-comments/tests/language_support_sources/jsdoc.ts @@ -1,14 +1,14 @@ /** This is a doc comment. - * Since there are no keywords it _sould_ be checked. */ + * Since there are no keywords it _shuld_ be checked. */ function test(){} /** This is also a doc comment. * @class this sould be unchecked. */ class Clazz { } -/** Here is another example: {@link this sould also b unchecked}. But this _sould_ be.*/ +/** Here is another example: {@link this sould also b unchecked}. But this _shuold_ be.*/ -/** However, tis should be checked, while {@link tis should not} */ +/** However, thsi should be checked, while {@link tis should not} */ /** * The following examples should be ignored by Harper. diff --git a/harper-comments/tests/language_support_sources/multiline_comments.cpp b/harper-comments/tests/language_support_sources/multiline_comments.cpp index 5eeeffcb0..65dcde19a 100644 --- a/harper-comments/tests/language_support_sources/multiline_comments.cpp +++ b/harper-comments/tests/language_support_sources/multiline_comments.cpp @@ -9,4 +9,4 @@ int test() {} */ int arbitrary() {} -/// Let's aadd a cuple spelling errors for good measure. +/// Let's putin a cuple spelling errors for good measure. diff --git a/harper-comments/tests/language_support_sources/multiline_comments.ts b/harper-comments/tests/language_support_sources/multiline_comments.ts index 07f3a8cfa..e5c4939c0 100644 --- a/harper-comments/tests/language_support_sources/multiline_comments.ts +++ b/harper-comments/tests/language_support_sources/multiline_comments.ts @@ -9,5 +9,5 @@ function test() {} */ function arbitrary() {} -// Let's aadd a cuple spelling errors for good measure. +// Let's putin a cuple spelling errors for good measure. diff --git a/harper-core/src/linting/lint_group.rs b/harper-core/src/linting/lint_group.rs index e4086b8d9..8c813a552 100644 --- a/harper-core/src/linting/lint_group.rs +++ b/harper-core/src/linting/lint_group.rs @@ -60,6 +60,7 @@ use super::somewhat_something::SomewhatSomething; use super::spaces::Spaces; use super::spell_check::SpellCheck; use super::spelled_numbers::SpelledNumbers; +use super::split_words::SplitWords; use super::that_which::ThatWhich; use super::the_how_why::TheHowWhy; use super::the_my::TheMy; @@ -350,6 +351,7 @@ impl LintGroup { insert_pattern_rule!(SomewhatSomething, true); insert_struct_rule!(Spaces, true); insert_struct_rule!(SpelledNumbers, false); + insert_struct_rule!(SplitWords, true); insert_pattern_rule!(ThatWhich, true); insert_pattern_rule!(TheHowWhy, true); insert_struct_rule!(TheHowWhy, true); diff --git a/harper-core/src/linting/mod.rs b/harper-core/src/linting/mod.rs index 2e60f5d96..9f0928fc6 100644 --- a/harper-core/src/linting/mod.rs +++ b/harper-core/src/linting/mod.rs @@ -64,6 +64,7 @@ mod somewhat_something; mod spaces; mod spell_check; mod spelled_numbers; +mod split_words; mod suggestion; mod that_which; mod the_how_why; @@ -130,6 +131,7 @@ pub use somewhat_something::SomewhatSomething; pub use spaces::Spaces; pub use spell_check::SpellCheck; pub use spelled_numbers::SpelledNumbers; +pub use split_words::SplitWords; pub use suggestion::Suggestion; pub use that_which::ThatWhich; pub use the_how_why::TheHowWhy; diff --git a/harper-core/src/linting/split_words.rs b/harper-core/src/linting/split_words.rs new file mode 100644 index 000000000..b629432c4 --- /dev/null +++ b/harper-core/src/linting/split_words.rs @@ -0,0 +1,128 @@ +use std::sync::Arc; + +use crate::{CharString, Dictionary, Document, FstDictionary}; + +use super::{Lint, LintKind, Linter, Suggestion}; + +pub struct SplitWords { + dict: Arc, +} + +impl SplitWords { + pub fn new() -> Self { + Self { + dict: FstDictionary::curated(), + } + } +} + +impl Default for SplitWords { + fn default() -> Self { + Self::new() + } +} + +impl Linter for SplitWords { + fn lint(&mut self, document: &Document) -> Vec { + let mut lints = Vec::new(); + + let (mut word1, mut word2) = (CharString::new(), CharString::new()); + + for w in document.tokens() { + if !w.kind.is_word() { + continue; + } + + if w.span.len() < 2 { + continue; + } + + let w_chars = document.get_span_content(&w.span); + + if self.dict.contains_word(w_chars) { + continue; + } + + let mut found = false; + + for i in 1..w_chars.len() { + let midpoint = w_chars.len() / 2; + let midpoint = if i & 1 == 0 { + midpoint + i / 2 + } else { + midpoint - i / 2 + }; + + let first_half = &w_chars[..midpoint]; + let second_half = &w_chars[midpoint..]; + + word1.clear(); + word1.extend_from_slice(first_half); + word2.clear(); + word2.extend_from_slice(second_half); + + if self.dict.contains_exact_word(&word1) && self.dict.contains_exact_word(&word2) { + let mut open = word1.clone(); + open.push(' '); + open.extend_from_slice(second_half); + + lints.push(Lint { + span: w.span, + lint_kind: LintKind::WordChoice, + suggestions: vec![Suggestion::ReplaceWith(open.to_vec())], + message: "It seems this is actually two words joined together.".to_owned(), + priority: 63, + }); + found = true; + } + + // The following logic won't be useful unless and until hyphenated words are added to the dictionary + + let mut hyphenated = word1.clone(); + hyphenated.push('-'); + hyphenated.extend_from_slice(second_half); + + if self.dict.contains_exact_word(&hyphenated) { + lints.push(Lint { + span: w.span, + lint_kind: LintKind::WordChoice, + suggestions: vec![Suggestion::ReplaceWith(hyphenated.to_vec())], + message: "It seems this is actually two words joined together.".to_owned(), + priority: 63, + }); + found = true; + } + + if found { + break; + } + } + } + lints + } + + fn description(&self) -> &str { + "Accidentally forgetting a space between words is common. This rule looks for valid words that are joined together without whitespace." + } +} + +#[cfg(test)] +mod tests { + use crate::linting::tests::{assert_lint_count, assert_suggestion_result}; + + use super::SplitWords; + + #[test] + fn heretofore() { + assert_lint_count( + "onetwo threefour fivesix seveneight nineten.", + SplitWords::default(), + 5, + ); + } + + #[test] + fn foobar() { + assert_suggestion_result("moreso", SplitWords::default(), "more so"); + } +} diff --git a/harper-core/tests/test_sources/chinese_lorem_ipsum.md b/harper-core/tests/test_sources/chinese_lorem_ipsum.md index 75c60f701..371e4eac2 100644 --- a/harper-core/tests/test_sources/chinese_lorem_ipsum.md +++ b/harper-core/tests/test_sources/chinese_lorem_ipsum.md @@ -1,4 +1,4 @@ -The following text was generated using [a Chinese lorem ipsum generator](https://pinkylam.me/generator/chinese-lorem-ipsum/). +The following text was generated using [a Chinese lorrm ipsum generator](https://pinkylam.me/generator/chinese-lorem-ipsum/). 食棵支每躲種。奶象打星爪子二細喜才記行在發像原斤!頁固點子衣點豆看身蝴看苗急午公何足,筆娘經色蝶行元香也要。麻了綠尼固世,色北書目登功;因告黑。 diff --git a/harper-core/tests/test_sources/pr_504.md b/harper-core/tests/test_sources/pr_504.md index 3ff95566b..915c9e9d1 100644 --- a/harper-core/tests/test_sources/pr_504.md +++ b/harper-core/tests/test_sources/pr_504.md @@ -4,4 +4,4 @@ These say "This is in Greek/Georgian/Thai" in those languages: ეს ქართულად. นี่มันภาษาไทย -This is English with misstakes. +This is English with erors. diff --git a/harper-typst/tests/test_sources/complex_document_with_spelling_mistakes.typ b/harper-typst/tests/test_sources/complex_document_with_spelling_mistakes.typ index 47d0bbf16..4738c71c0 100644 --- a/harper-typst/tests/test_sources/complex_document_with_spelling_mistakes.typ +++ b/harper-typst/tests/test_sources/complex_document_with_spelling_mistakes.typ @@ -55,7 +55,7 @@ #titleblock( title: "A fluid dynamic model for glaier flow", authors: ("Grant Lemons", "John Doe", "Jane Doe"), - abstract: lorem(80), + abstract: lorrm(80), doc, ) ] @@ -63,5 +63,5 @@ = Introduction #lorem(300) -= Related ork += Related wrk #lorem(200)