Skip to content

Add partial and 'smart' matching for --section #65

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -123,11 +123,14 @@ Here's how you interact with the file:
The section can also be specified via the ``-s`` / ``--section`` option:

```shell
$ blurb add -s Library
# or
$ blurb add -s library
$ blurb add -s 'Library'
# or equivalently
$ blurb add -s lib
```

The match is performed case insensitively and partial matching is
supported as long as the match is unique.

* Finally, go to the end of the file, and enter your `NEWS` entry.
This should be a single paragraph of English text using
simple reST markup.
Expand Down
52 changes: 49 additions & 3 deletions src/blurb/_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import atexit
import os
import re
import shlex
import shutil
import subprocess
Expand All @@ -11,7 +12,12 @@
from blurb._blurb_file import BlurbError, Blurbs
from blurb._cli import error, prompt
from blurb._git import flush_git_add_files, git_add_files
from blurb._template import sections, template
from blurb._template import (
_section_names_lower_nosep,
_section_special_patterns,
sections,
template,
)

TYPE_CHECKING = False
if TYPE_CHECKING:
Expand All @@ -35,8 +41,8 @@ def add(*, issue: str | None = None, section: str | None = None):
Use -s/--section to specify the section name (case-insensitive), e.g.:

blurb add -s Library
# or
blurb add -s library
# or, using a partial match
blurb add -s lib

The known sections names are defined as follows and
spaces in names can be substituted for underscores:
Expand Down Expand Up @@ -176,6 +182,12 @@ def _extract_section_name(section: str | None, /) -> str | None:
for section_name in sections:
if section in {section_name, section_name.lower()}:
matches.append(section_name)
if section_name.lower().startswith(section.lower()):
matches.append(section_name)

if not matches:
# Try a more complex algorithm if we are unlucky
matches = _find_smart_matches(section)

if not matches:
section_list = '\n'.join(f'* {s}' for s in sections)
Expand All @@ -190,6 +202,40 @@ def _extract_section_name(section: str | None, /) -> str | None:
return matches[0]


def _find_smart_matches(section):
# '_', '-' and ' ' are the allowed (user) whitespace separators
sanitized = re.sub(r'[_\- ]', ' ', section).strip()
if not sanitized:
return []

matches = []
section_words = re.split(r'\s+', sanitized)
# ' ' and '/' are the separators used by known sections
section_pattern = r'[ /]'.join(map(re.escape, section_words))
section_pattern = re.compile(section_pattern, re.I)

for section_name in sections:
# try to use the input as the pattern to match against known names
if section_pattern.match(section_name):
matches.append(section_name)

if not matches:
for section_name, special_patterns in _section_special_patterns.items():
for special_pattern in special_patterns:
if special_pattern.match(sanitized):
matches.append(section_name)
break

if not matches:
# try to use the input as the prefix of a flattened section name
normalized_prefix = ''.join(section_words).lower()
for section_name, normalized in _section_names_lower_nosep.items():
if normalized.startswith(normalized_prefix):
matches.append(section_name)

return matches


def _add_blurb_from_template(args: Sequence[str], tmp_path: str) -> Blurbs | None:
subprocess.run(args)

Expand Down
38 changes: 38 additions & 0 deletions src/blurb/_template.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from __future__ import annotations

import re

#
# This template is the canonical list of acceptable section names!
# It's parsed internally into the "sections" set.
Expand Down Expand Up @@ -82,3 +84,39 @@ def next_filename_unsanitize_sections(filename: str, /) -> str:
value = f'{separator}{value}{separator}'
filename = filename.replace(key, value)
return filename


# Mapping from section names to additional allowed patterns
# which ignore whitespaces for composed section names.
#
# For instance, 'Core and Builtins' is represented by the
# pattern 'Core<SEP>?and<SEP>?Builtins' where <SEP> are the
# allowed user separators '_', '-', ' ' and '/'.
_section_special_patterns = {__: set() for __ in sections}

# Mapping from section names to sanitized names (no separators, lowercase).
#
# For instance, 'Core and Builtins' is mapped to 'coreandbuiltins', and
# passing a prefix of that would match to 'Core and Builtins'. Note that
# this is only used as a last resort.
_section_names_lower_nosep = {}

for _section in sections:
# ' ' and '/' are the separators used by known sections
_sanitized = re.sub(r'[ /]', ' ', _section)
_section_words = re.split(r'\s+', _sanitized)
_section_names_lower_nosep[_section] = ''.join(_section_words).lower()
del _sanitized
# '_', '-', ' ' and '/' are the allowed (user) separators
_section_pattern = r'[_\- /]?'.join(map(re.escape, _section_words))
# add '$' to avoid matching after the pattern
_section_pattern = f'{_section_pattern}$'
del _section_words
_section_pattern = re.compile(_section_pattern, re.I)
_section_special_patterns[_section].add(_section_pattern)
del _section_pattern, _section

# the following statements will raise KeyError if the names are invalid
_section_special_patterns['C API'].add(re.compile(r'^((?<=c)[_\- /])?api$', re.I))
_section_special_patterns['Core and Builtins'].add(re.compile('^builtins?$', re.I))
_section_special_patterns['Tools/Demos'].add(re.compile('^dem(?:o|os)?$', re.I))
113 changes: 113 additions & 0 deletions tests/test_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,96 @@ def test_exact_names_lowercase(section_name, expected):
_check_section_name(section_name, expected)


@pytest.mark.parametrize(
('section', 'expect'),
[
('Sec', 'Security'),
('sec', 'Security'),
('security', 'Security'),
('Core And', 'Core and Builtins'),
('Core And Built', 'Core and Builtins'),
('Core And Builtins', 'Core and Builtins'),
('Lib', 'Library'),
('doc', 'Documentation'),
('document', 'Documentation'),
('Tes', 'Tests'),
('tes', 'Tests'),
('Test', 'Tests'),
('Tests', 'Tests'),
('Buil', 'Build'),
('buil', 'Build'),
('build', 'Build'),
('Tool', 'Tools/Demos'),
('Tools', 'Tools/Demos'),
('Tools/', 'Tools/Demos'),
('core', 'Core and Builtins'),
],
)
def test_partial_words(section, expect):
_check_section_name(section, expect)


@pytest.mark.parametrize(
('section', 'expect'),
[
('builtin', 'Core and Builtins'),
('builtins', 'Core and Builtins'),
('api', 'C API'),
('c-api', 'C API'),
('c/api', 'C API'),
('c api', 'C API'),
('dem', 'Tools/Demos'),
('demo', 'Tools/Demos'),
('demos', 'Tools/Demos'),
],
)
def test_partial_special_names(section, expect):
_check_section_name(section, expect)


@pytest.mark.parametrize(
('section', 'expect'),
[
('Core-and-Builtins', 'Core and Builtins'),
('Core_and_Builtins', 'Core and Builtins'),
('Core_and-Builtins', 'Core and Builtins'),
('Core and', 'Core and Builtins'),
('Core_and', 'Core and Builtins'),
('core_and', 'Core and Builtins'),
('core-and', 'Core and Builtins'),
('Core and Builtins', 'Core and Builtins'),
('cOre _ and - bUILtins', 'Core and Builtins'),
('Tools/demo', 'Tools/Demos'),
('Tools-demo', 'Tools/Demos'),
('Tools demo', 'Tools/Demos'),
],
)
def test_partial_separators(section, expect):
# normalize the separtors '_', '-', ' ' and '/'
_check_section_name(section, expect)


@pytest.mark.parametrize(
('prefix', 'expect'),
[
('corean', 'Core and Builtins'),
('coreand', 'Core and Builtins'),
('coreandbuilt', 'Core and Builtins'),
('coreand Builtins', 'Core and Builtins'),
('coreand Builtins', 'Core and Builtins'),
('coreAnd Builtins', 'Core and Builtins'),
('CoreAnd Builtins', 'Core and Builtins'),
('Coreand', 'Core and Builtins'),
('Coreand Builtins', 'Core and Builtins'),
('Coreand builtin', 'Core and Builtins'),
('Coreand buil', 'Core and Builtins'),
],
)
def test_partial_prefix_words(prefix, expect):
# try to find a match using prefixes (without separators and lowercase)
_check_section_name(prefix, expect)


@pytest.mark.parametrize(
'section',
(
Expand Down Expand Up @@ -183,3 +273,26 @@ def test_invalid_section_name(section):

with pytest.raises(SystemExit, match=error_message):
_blurb_template_text(issue=None, section=section)


@pytest.mark.parametrize(
('section', 'matches'),
[
# 'matches' must be a sorted sequence of matching section names
('c', ['C API', 'Core and Builtins']),
('C', ['C API', 'Core and Builtins']),
('t', ['Tests', 'Tools/Demos']),
('T', ['Tests', 'Tools/Demos']),
],
)
def test_ambiguous_section_name(section, matches):
matching_list = ', '.join(map(repr, matches))
error_message = re.escape(
f'More than one match for: {section!r}\nMatches: {matching_list}'
)
error_message = re.compile(rf'{error_message}', re.MULTILINE)
with pytest.raises(SystemExit, match=error_message):
_extract_section_name(section)

with pytest.raises(SystemExit, match=error_message):
_blurb_template_text(issue=None, section=section)
Loading