GrandMoff100 · Alan-Chen99 · May 2, 2024 · May 2, 2024 · May 3, 2024 · May 3, 2024
@@ -0,0 +1,2 @@
+PATH_add $(nix build --impure --expr "(import <df>).default.python.withPackages (ps: [ps.pip ps.pyparsing ps.hypothesis ps.pytest ps.pytest-cov ps.setuptools ps.ipython ps.pylint])" --no-link --print-out-paths)/bin
+path_add PYTHONPATH $(pwd)/
@@ -9,20 +9,18 @@ Dynamically construct python regex patterns.
 Say you want a regex pattern to match the initials of someones name.
 
 ```python
-import re
-from regexfactory import Amount, Range
-
+from regexfactory import *
 
-pattern = Amount(Range("A", "Z"), 2, 3)
+pattern = amount(Range("A", "Z"), 2, 3)
 
-matches = pattern.findall(
-    "My initials are BDP. Valorie's are VO"
-)
+matches = pattern.findall("My initials are BDP. Valorie's are VO")
 
+print(pattern.regex)
 print(matches)
 ```
 
-```bash
+```
+[A-Z]{2,3}
 ['BDP', 'VO']
 ```
 
@@ -31,36 +29,24 @@ print(matches)
 Or how matching both uppercase and lowercase hex strings in a sentence.
 
 ```python
-import re
 from regexfactory import *
 
-pattern = Optional("#") + Or(
-    Amount(
-        Set(
-            Range("0", "9"),
-            Range("a", "f")
-        ),
-        6
-    ),
-    Amount(
-        Set(
-            Range("0", "9"),
-            Range("A", "F")
-        ),
-        6
-    ),
-
+pattern = optional("#") + or_(
+    (Range("0", "9") | Range("a", "f")) * 6,
+    (Range("0", "9") | Range("A", "F")) * 6,
 )
 
 sentence = """
 My favorite color is #000000. I also like 5fb8a0. My second favorite color is #FF21FF.
 """
 
+print(pattern.regex)
 matches = pattern.findall(sentence)
 print(matches)
 ```
 
-```bash
+```
+(?:#)?(?:[0-9a-f]{6}|[0-9A-F]{6})
 ['#000000', '5fb8a0', '#FF21FF']
 ```
 
@@ -71,30 +57,25 @@ Or what if you want to match urls in html content?
 ```python
 from regexfactory import *
 
-
-protocol = Amount(Range("a", "z"), 1, or_more=True)
-host = Amount(Set(WORD, DIGIT, '.'), 1, or_more=True)
-port = Optional(IfBehind(":") + Multi(DIGIT))
-path = Multi(
-    RegexPattern('/') + Multi(
-        NotSet('/', '#', '?', '&', WHITESPACE),
-        match_zero=True
-    ),
-    match_zero=True
+protocol = amount(Range("a", "z"), 1, or_more=True)
+host = amount(WORD | DIGIT | r"\.", 1, or_more=True)
+port = optional(":" + multi(DIGIT))
+path = multi(
+    "/" + multi(NotSet("/", "#", "?", "&", WHITESPACE), match_zero=True),
+    match_zero=True,
 )
-patt = protocol + RegexPattern("://") + host + port + path
-
+patt = protocol + "://" + host + port + path
 
 
 sentence = "This is a cool url, https://github.com/GrandMoff100/RegexFactory/ "
-print(patt)
+print(patt.regex)
 
 print(patt.search(sentence))
 ```
 
-```bash
-[a-z]{1,}://[\w\d.]{1,}(?:\d{1,})?(/([^/#?&\s]{0,})){0,}
-<re.Match object; span=(15, 51), match='https://github.com/GrandMoff100/RegexFactory/'>
+```
+[a-z]+://[\w\d\.]+(?::\d+)?(?:/[^/\#\?\&\s]*)*
+<re.Match object; span=(20, 65), match='https://github.com/GrandMoff100/RegexFactory/'>
 ```
 
 ## The Pitch

@@ -23,10 +23,21 @@
     WHITESPACE,
     WORD,
 )
-from .pattern import ESCAPED_CHARACTERS, RegexPattern, ValidPatternType, escape, join
+from .pattern import (
+    ESCAPED_CHARACTERS,
+    RegexPattern,
+    ValidPatternType,
+    amount,
+    escape,
+    join,
+    multi,
+    optional,
+    or_,
+)
 from .patterns import (
     Amount,
     Comment,
+    Concat,
     Extension,
     Group,
     IfAhead,
@@ -37,12 +48,10 @@
     Multi,
     NamedGroup,
     NamedReference,
-    NotSet,
     NumberedReference,
     Optional,
     Or,
-    Range,
-    Set,
 )
+from .sets import EMPTY, NEVER, NotSet, Range, Set
 
 __version__ = "1.0.1"
@@ -8,30 +8,38 @@
 """
 
 from .pattern import RegexPattern
+from .sets import CharClass
 
 #: (Dot.) In the default mode, this matches any character except a newline. If the :data:`re.DOTALL` flag has been specified, this matches any character including a newline.
-ANY = RegexPattern(r".")
+ANY = RegexPattern(r".", _precedence=10)
+ANY._desc = "ANY"
 
 #: (Caret.) Matches the start of the string, and in  :data:`re.MULTILINE` mode also matches immediately after each newline.
-ANCHOR_START = RegexPattern(r"^", _precedence=2)
+ANCHOR_START = RegexPattern(r"^", _precedence=0)
 
 #: Matches the end of the string or just before the newline at the end of the string, and in :data:`re.MULTILINE` mode also matches before a newline. foo matches both :code:`foo` and :code:`foobar`, while the regular expression :code:`foo$` matches only :code:`foo`. More interestingly, searching for :code:`foo.$` in :code:`foo1\nfoo2\n` matches :code:`foo2` normally, but :code:`foo1` in  :data:`re.MULTILINE` mode; searching for a single $ in :code:`foo\n` will find two (empty) matches: one just before the newline, and one at the end of the string.
-ANCHOR_END = RegexPattern(r"$", _precedence=2)
+ANCHOR_END = RegexPattern(r"$", _precedence=0)
 
 #: Matches Unicode whitespace characters (which includes :code:`[ \t\n\r\f\v]`, and also many other characters, for example the non-breaking spaces mandated by typography rules in many languages). If the :data:`re.ASCII` flag is used, only :code:`[ \t\n\r\f\v]` is matched.
-WHITESPACE = RegexPattern(r"\s")
+WHITESPACE = CharClass(r"\s")
+WHITESPACE._desc = "WHITESPACE"
 
 #: Matches any character which is not a whitespace character. This is the opposite of \s. If the :data:`re.ASCII` flag is used this becomes the equivalent of :code:`[^ \t\n\r\f\v]`.
-NOTWHITESPACE = RegexPattern(r"\S")
+NOTWHITESPACE = CharClass(r"\S")
+NOTWHITESPACE._desc = "NOTWHITESPACE"
 
 #: Matches Unicode word characters; this includes most characters that can be part of a word in any language, as well as numbers and the underscore. If the :data:`re.ASCII` flag is used, only :code:`[a-zA-Z0-9_]` is matched.
-WORD = RegexPattern(r"\w")
+WORD = CharClass(r"\w")
+WORD._desc = "WORD"
 
 #: Matches any character which is not a word character. This is the opposite of \w. If the :data:`re.ASCII` flag is used this becomes the equivalent of :code:`[^a-zA-Z0-9_]`. If the  :data:`re.LOCALE` flag is used, matches characters which are neither alphanumeric in the current locale nor the underscore.
-NOTWORD = RegexPattern(r"\W")
+NOTWORD = CharClass(r"\W")
+NOTWORD._desc = "NOTWORD"
 
 #: Matches any Unicode decimal digit (that is, any character in Unicode character category [Nd]). This includes :code:`[0-9]`, and also many other digit characters. If the :data:`re.ASCII` flag is used only :code:`[0-9]` is matched.
-DIGIT = RegexPattern(r"\d")
+DIGIT = CharClass(r"\d")
+DIGIT._desc = "DIGIT"
 
 #: Matches any character which is not a decimal digit. This is the opposite of \d. If the :data:`re.ASCII` flag is used this becomes the equivalent of :code:`[^0-9]`.
-NOTDIGIT = RegexPattern(r"\D")
+NOTDIGIT = CharClass(r"\D")
+NOTDIGIT._desc = "NOTDIGIT"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		PATH_add $(nix build --impure --expr "(import <df>).default.python.withPackages (ps: [ps.pip ps.pyparsing ps.hypothesis ps.pytest ps.pytest-cov ps.setuptools ps.ipython ps.pylint])" --no-link --print-out-paths)/bin
		path_add PYTHONPATH $(pwd)/