Skip to content

Commit 58463f0

Browse files
committed
Use word boundaries for alnum seps and no boundaries for symbols
1 parent 66b3442 commit 58463f0

File tree

1 file changed

+16
-8
lines changed

1 file changed

+16
-8
lines changed

beetsplug/lastgenre/__init__.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -44,15 +44,15 @@
4444
)
4545

4646
DEFAULT_ARTIST_SEPARATORS = [
47-
"feat\\.",
47+
"feat.",
4848
"featuring",
4949
"&",
50-
"vs\\.",
51-
"\\bx\\b", # Match "x" only as whole word
50+
"vs.",
51+
"x", # Match "x" only as whole word
5252
"/",
5353
"+",
5454
"and",
55-
"\\|",
55+
"|",
5656
]
5757

5858

@@ -100,10 +100,18 @@ def split_on_separators(text, separators):
100100
if not seps:
101101
return [text]
102102

103-
# One regex: separators at token boundaries (no letters/digits touching)
104-
# (?<!\S) == start or whitespace; (?!\S) == end or whitespace
105-
alt = "|".join(re.escape(s) for s in seps) # escape special chars
106-
pattern = rf"(?<!\S)(?:{alt})(?!\S)"
103+
# Build patterns: word boundaries for pure alphanumeric, no boundaries for others
104+
patterns = []
105+
for s in seps:
106+
escaped = re.escape(s)
107+
if s.replace(" ", "").isalnum(): # treat spaced separators like symbols
108+
# Alphanumeric needs word boundaries (like "x", "and")
109+
patterns.append(rf"\b{escaped}\b")
110+
else:
111+
# Symbols like "/", " / " need no boundaries
112+
patterns.append(escaped)
113+
114+
pattern = "|".join(patterns)
107115

108116
if not re.search(pattern, text, flags=re.IGNORECASE):
109117
return [text]

0 commit comments

Comments
 (0)