diff --git a/README.md b/README.md index ff23efc..7a0e609 100644 --- a/README.md +++ b/README.md @@ -123,11 +123,14 @@ Here's how you interact with the file: The section can also be specified via the ``-s`` / ``--section`` option: ```shell - $ blurb add -s Library - # or - $ blurb add -s library + $ blurb add -s 'Library' + # or equivalently + $ blurb add -s lib ``` + The match is performed case insensitively and partial matching is + supported as long as the match is unique. + * Finally, go to the end of the file, and enter your `NEWS` entry. This should be a single paragraph of English text using simple reST markup. diff --git a/src/blurb/_add.py b/src/blurb/_add.py index c70fd42..09c4d5d 100644 --- a/src/blurb/_add.py +++ b/src/blurb/_add.py @@ -2,6 +2,7 @@ import atexit import os +import re import shlex import shutil import subprocess @@ -11,7 +12,12 @@ from blurb._blurb_file import BlurbError, Blurbs from blurb._cli import error, prompt from blurb._git import flush_git_add_files, git_add_files -from blurb._template import sections, template +from blurb._template import ( + _section_names_lower_nosep, + _section_special_patterns, + sections, + template, +) TYPE_CHECKING = False if TYPE_CHECKING: @@ -35,8 +41,8 @@ def add(*, issue: str | None = None, section: str | None = None): Use -s/--section to specify the section name (case-insensitive), e.g.: blurb add -s Library - # or - blurb add -s library + # or, using a partial match + blurb add -s lib The known sections names are defined as follows and spaces in names can be substituted for underscores: @@ -176,6 +182,12 @@ def _extract_section_name(section: str | None, /) -> str | None: for section_name in sections: if section in {section_name, section_name.lower()}: matches.append(section_name) + if section_name.lower().startswith(section.lower()): + matches.append(section_name) + + if not matches: + # Try a more complex algorithm if we are unlucky + matches = _find_smart_matches(section) if not matches: section_list = '\n'.join(f'* {s}' for s in sections) @@ -190,6 +202,40 @@ def _extract_section_name(section: str | None, /) -> str | None: return matches[0] +def _find_smart_matches(section): + # '_', '-' and ' ' are the allowed (user) whitespace separators + sanitized = re.sub(r'[_\- ]', ' ', section).strip() + if not sanitized: + return [] + + matches = [] + section_words = re.split(r'\s+', sanitized) + # ' ' and '/' are the separators used by known sections + section_pattern = r'[ /]'.join(map(re.escape, section_words)) + section_pattern = re.compile(section_pattern, re.I) + + for section_name in sections: + # try to use the input as the pattern to match against known names + if section_pattern.match(section_name): + matches.append(section_name) + + if not matches: + for section_name, special_patterns in _section_special_patterns.items(): + for special_pattern in special_patterns: + if special_pattern.match(sanitized): + matches.append(section_name) + break + + if not matches: + # try to use the input as the prefix of a flattened section name + normalized_prefix = ''.join(section_words).lower() + for section_name, normalized in _section_names_lower_nosep.items(): + if normalized.startswith(normalized_prefix): + matches.append(section_name) + + return matches + + def _add_blurb_from_template(args: Sequence[str], tmp_path: str) -> Blurbs | None: subprocess.run(args) diff --git a/src/blurb/_template.py b/src/blurb/_template.py index 36429d7..8df695f 100644 --- a/src/blurb/_template.py +++ b/src/blurb/_template.py @@ -1,5 +1,7 @@ from __future__ import annotations +import re + # # This template is the canonical list of acceptable section names! # It's parsed internally into the "sections" set. @@ -82,3 +84,39 @@ def next_filename_unsanitize_sections(filename: str, /) -> str: value = f'{separator}{value}{separator}' filename = filename.replace(key, value) return filename + + +# Mapping from section names to additional allowed patterns +# which ignore whitespaces for composed section names. +# +# For instance, 'Core and Builtins' is represented by the +# pattern 'Core?and?Builtins' where are the +# allowed user separators '_', '-', ' ' and '/'. +_section_special_patterns = {__: set() for __ in sections} + +# Mapping from section names to sanitized names (no separators, lowercase). +# +# For instance, 'Core and Builtins' is mapped to 'coreandbuiltins', and +# passing a prefix of that would match to 'Core and Builtins'. Note that +# this is only used as a last resort. +_section_names_lower_nosep = {} + +for _section in sections: + # ' ' and '/' are the separators used by known sections + _sanitized = re.sub(r'[ /]', ' ', _section) + _section_words = re.split(r'\s+', _sanitized) + _section_names_lower_nosep[_section] = ''.join(_section_words).lower() + del _sanitized + # '_', '-', ' ' and '/' are the allowed (user) separators + _section_pattern = r'[_\- /]?'.join(map(re.escape, _section_words)) + # add '$' to avoid matching after the pattern + _section_pattern = f'{_section_pattern}$' + del _section_words + _section_pattern = re.compile(_section_pattern, re.I) + _section_special_patterns[_section].add(_section_pattern) + del _section_pattern, _section + +# the following statements will raise KeyError if the names are invalid +_section_special_patterns['C API'].add(re.compile(r'^((?<=c)[_\- /])?api$', re.I)) +_section_special_patterns['Core and Builtins'].add(re.compile('^builtins?$', re.I)) +_section_special_patterns['Tools/Demos'].add(re.compile('^dem(?:o|os)?$', re.I)) diff --git a/tests/test_add.py b/tests/test_add.py index 23eb404..bab79f5 100644 --- a/tests/test_add.py +++ b/tests/test_add.py @@ -133,6 +133,96 @@ def test_exact_names_lowercase(section_name, expected): _check_section_name(section_name, expected) +@pytest.mark.parametrize( + ('section', 'expect'), + [ + ('Sec', 'Security'), + ('sec', 'Security'), + ('security', 'Security'), + ('Core And', 'Core and Builtins'), + ('Core And Built', 'Core and Builtins'), + ('Core And Builtins', 'Core and Builtins'), + ('Lib', 'Library'), + ('doc', 'Documentation'), + ('document', 'Documentation'), + ('Tes', 'Tests'), + ('tes', 'Tests'), + ('Test', 'Tests'), + ('Tests', 'Tests'), + ('Buil', 'Build'), + ('buil', 'Build'), + ('build', 'Build'), + ('Tool', 'Tools/Demos'), + ('Tools', 'Tools/Demos'), + ('Tools/', 'Tools/Demos'), + ('core', 'Core and Builtins'), + ], +) +def test_partial_words(section, expect): + _check_section_name(section, expect) + + +@pytest.mark.parametrize( + ('section', 'expect'), + [ + ('builtin', 'Core and Builtins'), + ('builtins', 'Core and Builtins'), + ('api', 'C API'), + ('c-api', 'C API'), + ('c/api', 'C API'), + ('c api', 'C API'), + ('dem', 'Tools/Demos'), + ('demo', 'Tools/Demos'), + ('demos', 'Tools/Demos'), + ], +) +def test_partial_special_names(section, expect): + _check_section_name(section, expect) + + +@pytest.mark.parametrize( + ('section', 'expect'), + [ + ('Core-and-Builtins', 'Core and Builtins'), + ('Core_and_Builtins', 'Core and Builtins'), + ('Core_and-Builtins', 'Core and Builtins'), + ('Core and', 'Core and Builtins'), + ('Core_and', 'Core and Builtins'), + ('core_and', 'Core and Builtins'), + ('core-and', 'Core and Builtins'), + ('Core and Builtins', 'Core and Builtins'), + ('cOre _ and - bUILtins', 'Core and Builtins'), + ('Tools/demo', 'Tools/Demos'), + ('Tools-demo', 'Tools/Demos'), + ('Tools demo', 'Tools/Demos'), + ], +) +def test_partial_separators(section, expect): + # normalize the separtors '_', '-', ' ' and '/' + _check_section_name(section, expect) + + +@pytest.mark.parametrize( + ('prefix', 'expect'), + [ + ('corean', 'Core and Builtins'), + ('coreand', 'Core and Builtins'), + ('coreandbuilt', 'Core and Builtins'), + ('coreand Builtins', 'Core and Builtins'), + ('coreand Builtins', 'Core and Builtins'), + ('coreAnd Builtins', 'Core and Builtins'), + ('CoreAnd Builtins', 'Core and Builtins'), + ('Coreand', 'Core and Builtins'), + ('Coreand Builtins', 'Core and Builtins'), + ('Coreand builtin', 'Core and Builtins'), + ('Coreand buil', 'Core and Builtins'), + ], +) +def test_partial_prefix_words(prefix, expect): + # try to find a match using prefixes (without separators and lowercase) + _check_section_name(prefix, expect) + + @pytest.mark.parametrize( 'section', ( @@ -183,3 +273,26 @@ def test_invalid_section_name(section): with pytest.raises(SystemExit, match=error_message): _blurb_template_text(issue=None, section=section) + + +@pytest.mark.parametrize( + ('section', 'matches'), + [ + # 'matches' must be a sorted sequence of matching section names + ('c', ['C API', 'Core and Builtins']), + ('C', ['C API', 'Core and Builtins']), + ('t', ['Tests', 'Tools/Demos']), + ('T', ['Tests', 'Tools/Demos']), + ], +) +def test_ambiguous_section_name(section, matches): + matching_list = ', '.join(map(repr, matches)) + error_message = re.escape( + f'More than one match for: {section!r}\nMatches: {matching_list}' + ) + error_message = re.compile(rf'{error_message}', re.MULTILINE) + with pytest.raises(SystemExit, match=error_message): + _extract_section_name(section) + + with pytest.raises(SystemExit, match=error_message): + _blurb_template_text(issue=None, section=section)