diff options
Diffstat (limited to 'lib/python2.7/site-packages/django/test/html.py')
-rw-r--r-- | lib/python2.7/site-packages/django/test/html.py | 238 |
1 files changed, 0 insertions, 238 deletions
diff --git a/lib/python2.7/site-packages/django/test/html.py b/lib/python2.7/site-packages/django/test/html.py deleted file mode 100644 index 0d30bd2..0000000 --- a/lib/python2.7/site-packages/django/test/html.py +++ /dev/null @@ -1,238 +0,0 @@ -""" -Comparing two html documents. -""" - -from __future__ import unicode_literals - -import re -from django.utils.encoding import force_text -from django.utils.html_parser import HTMLParser, HTMLParseError -from django.utils import six -from django.utils.encoding import python_2_unicode_compatible - - -WHITESPACE = re.compile('\s+') - - -def normalize_whitespace(string): - return WHITESPACE.sub(' ', string) - - -@python_2_unicode_compatible -class Element(object): - def __init__(self, name, attributes): - self.name = name - self.attributes = sorted(attributes) - self.children = [] - - def append(self, element): - if isinstance(element, six.string_types): - element = force_text(element) - element = normalize_whitespace(element) - if self.children: - if isinstance(self.children[-1], six.string_types): - self.children[-1] += element - self.children[-1] = normalize_whitespace(self.children[-1]) - return - elif self.children: - # removing last children if it is only whitespace - # this can result in incorrect dom representations since - # whitespace between inline tags like <span> is significant - if isinstance(self.children[-1], six.string_types): - if self.children[-1].isspace(): - self.children.pop() - if element: - self.children.append(element) - - def finalize(self): - def rstrip_last_element(children): - if children: - if isinstance(children[-1], six.string_types): - children[-1] = children[-1].rstrip() - if not children[-1]: - children.pop() - children = rstrip_last_element(children) - return children - - rstrip_last_element(self.children) - for i, child in enumerate(self.children): - if isinstance(child, six.string_types): - self.children[i] = child.strip() - elif hasattr(child, 'finalize'): - child.finalize() - - def __eq__(self, element): - if not hasattr(element, 'name'): - return False - if hasattr(element, 'name') and self.name != element.name: - return False - if len(self.attributes) != len(element.attributes): - return False - if self.attributes != element.attributes: - # attributes without a value is same as attribute with value that - # equals the attributes name: - # <input checked> == <input checked="checked"> - for i in range(len(self.attributes)): - attr, value = self.attributes[i] - other_attr, other_value = element.attributes[i] - if value is None: - value = attr - if other_value is None: - other_value = other_attr - if attr != other_attr or value != other_value: - return False - if self.children != element.children: - return False - return True - - def __hash__(self): - return hash((self.name,) + tuple(a for a in self.attributes)) - - def __ne__(self, element): - return not self.__eq__(element) - - def _count(self, element, count=True): - if not isinstance(element, six.string_types): - if self == element: - return 1 - i = 0 - for child in self.children: - # child is text content and element is also text content, then - # make a simple "text" in "text" - if isinstance(child, six.string_types): - if isinstance(element, six.string_types): - if count: - i += child.count(element) - elif element in child: - return 1 - else: - i += child._count(element, count=count) - if not count and i: - return i - return i - - def __contains__(self, element): - return self._count(element, count=False) > 0 - - def count(self, element): - return self._count(element, count=True) - - def __getitem__(self, key): - return self.children[key] - - def __str__(self): - output = '<%s' % self.name - for key, value in self.attributes: - if value: - output += ' %s="%s"' % (key, value) - else: - output += ' %s' % key - if self.children: - output += '>\n' - output += ''.join(six.text_type(c) for c in self.children) - output += '\n</%s>' % self.name - else: - output += ' />' - return output - - def __repr__(self): - return six.text_type(self) - - -@python_2_unicode_compatible -class RootElement(Element): - def __init__(self): - super(RootElement, self).__init__(None, ()) - - def __str__(self): - return ''.join(six.text_type(c) for c in self.children) - - -class Parser(HTMLParser): - SELF_CLOSING_TAGS = ('br' , 'hr', 'input', 'img', 'meta', 'spacer', - 'link', 'frame', 'base', 'col') - - def __init__(self): - HTMLParser.__init__(self) - self.root = RootElement() - self.open_tags = [] - self.element_positions = {} - - def error(self, msg): - raise HTMLParseError(msg, self.getpos()) - - def format_position(self, position=None, element=None): - if not position and element: - position = self.element_positions[element] - if position is None: - position = self.getpos() - if hasattr(position, 'lineno'): - position = position.lineno, position.offset - return 'Line %d, Column %d' % position - - @property - def current(self): - if self.open_tags: - return self.open_tags[-1] - else: - return self.root - - def handle_startendtag(self, tag, attrs): - self.handle_starttag(tag, attrs) - if tag not in self.SELF_CLOSING_TAGS: - self.handle_endtag(tag) - - def handle_starttag(self, tag, attrs): - # Special case handling of 'class' attribute, so that comparisons of DOM - # instances are not sensitive to ordering of classes. - attrs = [ - (name, " ".join(sorted(value.split(" ")))) - if name == "class" - else (name, value) - for name, value in attrs - ] - element = Element(tag, attrs) - self.current.append(element) - if tag not in self.SELF_CLOSING_TAGS: - self.open_tags.append(element) - self.element_positions[element] = self.getpos() - - def handle_endtag(self, tag): - if not self.open_tags: - self.error("Unexpected end tag `%s` (%s)" % ( - tag, self.format_position())) - element = self.open_tags.pop() - while element.name != tag: - if not self.open_tags: - self.error("Unexpected end tag `%s` (%s)" % ( - tag, self.format_position())) - element = self.open_tags.pop() - - def handle_data(self, data): - self.current.append(data) - - def handle_charref(self, name): - self.current.append('&%s;' % name) - - def handle_entityref(self, name): - self.current.append('&%s;' % name) - - -def parse_html(html): - """ - Takes a string that contains *valid* HTML and turns it into a Python object - structure that can be easily compared against other HTML on semantic - equivalence. Syntactical differences like which quotation is used on - arguments will be ignored. - - """ - parser = Parser() - parser.feed(html) - parser.close() - document = parser.root - document.finalize() - # Removing ROOT element if it's not necessary - if len(document.children) == 1: - if not isinstance(document.children[0], six.string_types): - document = document.children[0] - return document |