summaryrefslogtreecommitdiff
path: root/lib/python2.7/site-packages/django/utils/regex_helper.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/python2.7/site-packages/django/utils/regex_helper.py')
-rw-r--r--lib/python2.7/site-packages/django/utils/regex_helper.py342
1 files changed, 342 insertions, 0 deletions
diff --git a/lib/python2.7/site-packages/django/utils/regex_helper.py b/lib/python2.7/site-packages/django/utils/regex_helper.py
new file mode 100644
index 0000000..7b40d14
--- /dev/null
+++ b/lib/python2.7/site-packages/django/utils/regex_helper.py
@@ -0,0 +1,342 @@
+"""
+Functions for reversing a regular expression (used in reverse URL resolving).
+Used internally by Django and not intended for external use.
+
+This is not, and is not intended to be, a complete reg-exp decompiler. It
+should be good enough for a large class of URLS, however.
+"""
+from __future__ import unicode_literals
+
+from django.utils import six
+from django.utils.six.moves import zip
+
+# Mapping of an escape character to a representative of that class. So, e.g.,
+# "\w" is replaced by "x" in a reverse URL. A value of None means to ignore
+# this sequence. Any missing key is mapped to itself.
+ESCAPE_MAPPINGS = {
+ "A": None,
+ "b": None,
+ "B": None,
+ "d": "0",
+ "D": "x",
+ "s": " ",
+ "S": "x",
+ "w": "x",
+ "W": "!",
+ "Z": None,
+}
+
+class Choice(list):
+ """
+ Used to represent multiple possibilities at this point in a pattern string.
+ We use a distinguished type, rather than a list, so that the usage in the
+ code is clear.
+ """
+
+class Group(list):
+ """
+ Used to represent a capturing group in the pattern string.
+ """
+
+class NonCapture(list):
+ """
+ Used to represent a non-capturing group in the pattern string.
+ """
+
+def normalize(pattern):
+ """
+ Given a reg-exp pattern, normalizes it to an iterable of forms that
+ suffice for reverse matching. This does the following:
+
+ (1) For any repeating sections, keeps the minimum number of occurrences
+ permitted (this means zero for optional groups).
+ (2) If an optional group includes parameters, include one occurrence of
+ that group (along with the zero occurrence case from step (1)).
+ (3) Select the first (essentially an arbitrary) element from any character
+ class. Select an arbitrary character for any unordered class (e.g. '.'
+ or '\w') in the pattern.
+ (5) Ignore comments and any of the reg-exp flags that won't change
+ what we construct ("iLmsu"). "(?x)" is an error, however.
+ (6) Raise an error on all other non-capturing (?...) forms (e.g.
+ look-ahead and look-behind matches) and any disjunctive ('|')
+ constructs.
+
+ Django's URLs for forward resolving are either all positional arguments or
+ all keyword arguments. That is assumed here, as well. Although reverse
+ resolving can be done using positional args when keyword args are
+ specified, the two cannot be mixed in the same reverse() call.
+ """
+ # Do a linear scan to work out the special features of this pattern. The
+ # idea is that we scan once here and collect all the information we need to
+ # make future decisions.
+ result = []
+ non_capturing_groups = []
+ consume_next = True
+ pattern_iter = next_char(iter(pattern))
+ num_args = 0
+
+ # A "while" loop is used here because later on we need to be able to peek
+ # at the next character and possibly go around without consuming another
+ # one at the top of the loop.
+ try:
+ ch, escaped = next(pattern_iter)
+ except StopIteration:
+ return [('', [])]
+
+ try:
+ while True:
+ if escaped:
+ result.append(ch)
+ elif ch == '.':
+ # Replace "any character" with an arbitrary representative.
+ result.append(".")
+ elif ch == '|':
+ # FIXME: One day we'll should do this, but not in 1.0.
+ raise NotImplementedError
+ elif ch == "^":
+ pass
+ elif ch == '$':
+ break
+ elif ch == ')':
+ # This can only be the end of a non-capturing group, since all
+ # other unescaped parentheses are handled by the grouping
+ # section later (and the full group is handled there).
+ #
+ # We regroup everything inside the capturing group so that it
+ # can be quantified, if necessary.
+ start = non_capturing_groups.pop()
+ inner = NonCapture(result[start:])
+ result = result[:start] + [inner]
+ elif ch == '[':
+ # Replace ranges with the first character in the range.
+ ch, escaped = next(pattern_iter)
+ result.append(ch)
+ ch, escaped = next(pattern_iter)
+ while escaped or ch != ']':
+ ch, escaped = next(pattern_iter)
+ elif ch == '(':
+ # Some kind of group.
+ ch, escaped = next(pattern_iter)
+ if ch != '?' or escaped:
+ # A positional group
+ name = "_%d" % num_args
+ num_args += 1
+ result.append(Group((("%%(%s)s" % name), name)))
+ walk_to_end(ch, pattern_iter)
+ else:
+ ch, escaped = next(pattern_iter)
+ if ch in "iLmsu#":
+ # All of these are ignorable. Walk to the end of the
+ # group.
+ walk_to_end(ch, pattern_iter)
+ elif ch == ':':
+ # Non-capturing group
+ non_capturing_groups.append(len(result))
+ elif ch != 'P':
+ # Anything else, other than a named group, is something
+ # we cannot reverse.
+ raise ValueError("Non-reversible reg-exp portion: '(?%s'" % ch)
+ else:
+ ch, escaped = next(pattern_iter)
+ if ch not in ('<', '='):
+ raise ValueError("Non-reversible reg-exp portion: '(?P%s'" % ch)
+ # We are in a named capturing group. Extra the name and
+ # then skip to the end.
+ if ch == '<':
+ terminal_char = '>'
+ # We are in a named backreference.
+ else:
+ terminal_char = ')'
+ name = []
+ ch, escaped = next(pattern_iter)
+ while ch != terminal_char:
+ name.append(ch)
+ ch, escaped = next(pattern_iter)
+ param = ''.join(name)
+ # Named backreferences have already consumed the
+ # parenthesis.
+ if terminal_char != ')':
+ result.append(Group((("%%(%s)s" % param), param)))
+ walk_to_end(ch, pattern_iter)
+ else:
+ result.append(Group((("%%(%s)s" % param), None)))
+ elif ch in "*?+{":
+ # Quanitifers affect the previous item in the result list.
+ count, ch = get_quantifier(ch, pattern_iter)
+ if ch:
+ # We had to look ahead, but it wasn't need to compute the
+ # quanitifer, so use this character next time around the
+ # main loop.
+ consume_next = False
+
+ if count == 0:
+ if contains(result[-1], Group):
+ # If we are quantifying a capturing group (or
+ # something containing such a group) and the minimum is
+ # zero, we must also handle the case of one occurrence
+ # being present. All the quantifiers (except {0,0},
+ # which we conveniently ignore) that have a 0 minimum
+ # also allow a single occurrence.
+ result[-1] = Choice([None, result[-1]])
+ else:
+ result.pop()
+ elif count > 1:
+ result.extend([result[-1]] * (count - 1))
+ else:
+ # Anything else is a literal.
+ result.append(ch)
+
+ if consume_next:
+ ch, escaped = next(pattern_iter)
+ else:
+ consume_next = True
+ except StopIteration:
+ pass
+ except NotImplementedError:
+ # A case of using the disjunctive form. No results for you!
+ return [('', [])]
+
+ return list(zip(*flatten_result(result)))
+
+def next_char(input_iter):
+ """
+ An iterator that yields the next character from "pattern_iter", respecting
+ escape sequences. An escaped character is replaced by a representative of
+ its class (e.g. \w -> "x"). If the escaped character is one that is
+ skipped, it is not returned (the next character is returned instead).
+
+ Yields the next character, along with a boolean indicating whether it is a
+ raw (unescaped) character or not.
+ """
+ for ch in input_iter:
+ if ch != '\\':
+ yield ch, False
+ continue
+ ch = next(input_iter)
+ representative = ESCAPE_MAPPINGS.get(ch, ch)
+ if representative is None:
+ continue
+ yield representative, True
+
+def walk_to_end(ch, input_iter):
+ """
+ The iterator is currently inside a capturing group. We want to walk to the
+ close of this group, skipping over any nested groups and handling escaped
+ parentheses correctly.
+ """
+ if ch == '(':
+ nesting = 1
+ else:
+ nesting = 0
+ for ch, escaped in input_iter:
+ if escaped:
+ continue
+ elif ch == '(':
+ nesting += 1
+ elif ch == ')':
+ if not nesting:
+ return
+ nesting -= 1
+
+def get_quantifier(ch, input_iter):
+ """
+ Parse a quantifier from the input, where "ch" is the first character in the
+ quantifier.
+
+ Returns the minimum number of occurences permitted by the quantifier and
+ either None or the next character from the input_iter if the next character
+ is not part of the quantifier.
+ """
+ if ch in '*?+':
+ try:
+ ch2, escaped = next(input_iter)
+ except StopIteration:
+ ch2 = None
+ if ch2 == '?':
+ ch2 = None
+ if ch == '+':
+ return 1, ch2
+ return 0, ch2
+
+ quant = []
+ while ch != '}':
+ ch, escaped = next(input_iter)
+ quant.append(ch)
+ quant = quant[:-1]
+ values = ''.join(quant).split(',')
+
+ # Consume the trailing '?', if necessary.
+ try:
+ ch, escaped = next(input_iter)
+ except StopIteration:
+ ch = None
+ if ch == '?':
+ ch = None
+ return int(values[0]), ch
+
+def contains(source, inst):
+ """
+ Returns True if the "source" contains an instance of "inst". False,
+ otherwise.
+ """
+ if isinstance(source, inst):
+ return True
+ if isinstance(source, NonCapture):
+ for elt in source:
+ if contains(elt, inst):
+ return True
+ return False
+
+def flatten_result(source):
+ """
+ Turns the given source sequence into a list of reg-exp possibilities and
+ their arguments. Returns a list of strings and a list of argument lists.
+ Each of the two lists will be of the same length.
+ """
+ if source is None:
+ return [''], [[]]
+ if isinstance(source, Group):
+ if source[1] is None:
+ params = []
+ else:
+ params = [source[1]]
+ return [source[0]], [params]
+ result = ['']
+ result_args = [[]]
+ pos = last = 0
+ for pos, elt in enumerate(source):
+ if isinstance(elt, six.string_types):
+ continue
+ piece = ''.join(source[last:pos])
+ if isinstance(elt, Group):
+ piece += elt[0]
+ param = elt[1]
+ else:
+ param = None
+ last = pos + 1
+ for i in range(len(result)):
+ result[i] += piece
+ if param:
+ result_args[i].append(param)
+ if isinstance(elt, (Choice, NonCapture)):
+ if isinstance(elt, NonCapture):
+ elt = [elt]
+ inner_result, inner_args = [], []
+ for item in elt:
+ res, args = flatten_result(item)
+ inner_result.extend(res)
+ inner_args.extend(args)
+ new_result = []
+ new_args = []
+ for item, args in zip(result, result_args):
+ for i_item, i_args in zip(inner_result, inner_args):
+ new_result.append(item + i_item)
+ new_args.append(args[:] + i_args)
+ result = new_result
+ result_args = new_args
+ if pos >= last:
+ piece = ''.join(source[last:])
+ for i in range(len(result)):
+ result[i] += piece
+ return result, result_args
+