summaryrefslogtreecommitdiff
path: root/lib/python2.7/sets.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/python2.7/sets.py')
-rw-r--r--lib/python2.7/sets.py557
1 files changed, 557 insertions, 0 deletions
diff --git a/lib/python2.7/sets.py b/lib/python2.7/sets.py
new file mode 100644
index 0000000..fe31a0b
--- /dev/null
+++ b/lib/python2.7/sets.py
@@ -0,0 +1,557 @@
+"""Classes to represent arbitrary sets (including sets of sets).
+
+This module implements sets using dictionaries whose values are
+ignored. The usual operations (union, intersection, deletion, etc.)
+are provided as both methods and operators.
+
+Important: sets are not sequences! While they support 'x in s',
+'len(s)', and 'for x in s', none of those operations are unique for
+sequences; for example, mappings support all three as well. The
+characteristic operation for sequences is subscripting with small
+integers: s[i], for i in range(len(s)). Sets don't support
+subscripting at all. Also, sequences allow multiple occurrences and
+their elements have a definite order; sets on the other hand don't
+record multiple occurrences and don't remember the order of element
+insertion (which is why they don't support s[i]).
+
+The following classes are provided:
+
+BaseSet -- All the operations common to both mutable and immutable
+ sets. This is an abstract class, not meant to be directly
+ instantiated.
+
+Set -- Mutable sets, subclass of BaseSet; not hashable.
+
+ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
+ An iterable argument is mandatory to create an ImmutableSet.
+
+_TemporarilyImmutableSet -- A wrapper around a Set, hashable,
+ giving the same hash value as the immutable set equivalent
+ would have. Do not use this class directly.
+
+Only hashable objects can be added to a Set. In particular, you cannot
+really add a Set as an element to another Set; if you try, what is
+actually added is an ImmutableSet built from it (it compares equal to
+the one you tried adding).
+
+When you ask if `x in y' where x is a Set and y is a Set or
+ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
+what's tested is actually `z in y'.
+
+"""
+
+# Code history:
+#
+# - Greg V. Wilson wrote the first version, using a different approach
+# to the mutable/immutable problem, and inheriting from dict.
+#
+# - Alex Martelli modified Greg's version to implement the current
+# Set/ImmutableSet approach, and make the data an attribute.
+#
+# - Guido van Rossum rewrote much of the code, made some API changes,
+# and cleaned up the docstrings.
+#
+# - Raymond Hettinger added a number of speedups and other
+# improvements.
+
+from itertools import ifilter, ifilterfalse
+
+__all__ = ['BaseSet', 'Set', 'ImmutableSet']
+
+import warnings
+warnings.warn("the sets module is deprecated", DeprecationWarning,
+ stacklevel=2)
+
+class BaseSet(object):
+ """Common base class for mutable and immutable sets."""
+
+ __slots__ = ['_data']
+
+ # Constructor
+
+ def __init__(self):
+ """This is an abstract class."""
+ # Don't call this from a concrete subclass!
+ if self.__class__ is BaseSet:
+ raise TypeError, ("BaseSet is an abstract class. "
+ "Use Set or ImmutableSet.")
+
+ # Standard protocols: __len__, __repr__, __str__, __iter__
+
+ def __len__(self):
+ """Return the number of elements of a set."""
+ return len(self._data)
+
+ def __repr__(self):
+ """Return string representation of a set.
+
+ This looks like 'Set([<list of elements>])'.
+ """
+ return self._repr()
+
+ # __str__ is the same as __repr__
+ __str__ = __repr__
+
+ def _repr(self, sorted=False):
+ elements = self._data.keys()
+ if sorted:
+ elements.sort()
+ return '%s(%r)' % (self.__class__.__name__, elements)
+
+ def __iter__(self):
+ """Return an iterator over the elements or a set.
+
+ This is the keys iterator for the underlying dict.
+ """
+ return self._data.iterkeys()
+
+ # Three-way comparison is not supported. However, because __eq__ is
+ # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and
+ # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this
+ # case).
+
+ def __cmp__(self, other):
+ raise TypeError, "can't compare sets using cmp()"
+
+ # Equality comparisons using the underlying dicts. Mixed-type comparisons
+ # are allowed here, where Set == z for non-Set z always returns False,
+ # and Set != z always True. This allows expressions like "x in y" to
+ # give the expected result when y is a sequence of mixed types, not
+ # raising a pointless TypeError just because y contains a Set, or x is
+ # a Set and y contain's a non-set ("in" invokes only __eq__).
+ # Subtle: it would be nicer if __eq__ and __ne__ could return
+ # NotImplemented instead of True or False. Then the other comparand
+ # would get a chance to determine the result, and if the other comparand
+ # also returned NotImplemented then it would fall back to object address
+ # comparison (which would always return False for __eq__ and always
+ # True for __ne__). However, that doesn't work, because this type
+ # *also* implements __cmp__: if, e.g., __eq__ returns NotImplemented,
+ # Python tries __cmp__ next, and the __cmp__ here then raises TypeError.
+
+ def __eq__(self, other):
+ if isinstance(other, BaseSet):
+ return self._data == other._data
+ else:
+ return False
+
+ def __ne__(self, other):
+ if isinstance(other, BaseSet):
+ return self._data != other._data
+ else:
+ return True
+
+ # Copying operations
+
+ def copy(self):
+ """Return a shallow copy of a set."""
+ result = self.__class__()
+ result._data.update(self._data)
+ return result
+
+ __copy__ = copy # For the copy module
+
+ def __deepcopy__(self, memo):
+ """Return a deep copy of a set; used by copy module."""
+ # This pre-creates the result and inserts it in the memo
+ # early, in case the deep copy recurses into another reference
+ # to this same set. A set can't be an element of itself, but
+ # it can certainly contain an object that has a reference to
+ # itself.
+ from copy import deepcopy
+ result = self.__class__()
+ memo[id(self)] = result
+ data = result._data
+ value = True
+ for elt in self:
+ data[deepcopy(elt, memo)] = value
+ return result
+
+ # Standard set operations: union, intersection, both differences.
+ # Each has an operator version (e.g. __or__, invoked with |) and a
+ # method version (e.g. union).
+ # Subtle: Each pair requires distinct code so that the outcome is
+ # correct when the type of other isn't suitable. For example, if
+ # we did "union = __or__" instead, then Set().union(3) would return
+ # NotImplemented instead of raising TypeError (albeit that *why* it
+ # raises TypeError as-is is also a bit subtle).
+
+ def __or__(self, other):
+ """Return the union of two sets as a new set.
+
+ (I.e. all elements that are in either set.)
+ """
+ if not isinstance(other, BaseSet):
+ return NotImplemented
+ return self.union(other)
+
+ def union(self, other):
+ """Return the union of two sets as a new set.
+
+ (I.e. all elements that are in either set.)
+ """
+ result = self.__class__(self)
+ result._update(other)
+ return result
+
+ def __and__(self, other):
+ """Return the intersection of two sets as a new set.
+
+ (I.e. all elements that are in both sets.)
+ """
+ if not isinstance(other, BaseSet):
+ return NotImplemented
+ return self.intersection(other)
+
+ def intersection(self, other):
+ """Return the intersection of two sets as a new set.
+
+ (I.e. all elements that are in both sets.)
+ """
+ if not isinstance(other, BaseSet):
+ other = Set(other)
+ if len(self) <= len(other):
+ little, big = self, other
+ else:
+ little, big = other, self
+ common = ifilter(big._data.__contains__, little)
+ return self.__class__(common)
+
+ def __xor__(self, other):
+ """Return the symmetric difference of two sets as a new set.
+
+ (I.e. all elements that are in exactly one of the sets.)
+ """
+ if not isinstance(other, BaseSet):
+ return NotImplemented
+ return self.symmetric_difference(other)
+
+ def symmetric_difference(self, other):
+ """Return the symmetric difference of two sets as a new set.
+
+ (I.e. all elements that are in exactly one of the sets.)
+ """
+ result = self.__class__()
+ data = result._data
+ value = True
+ selfdata = self._data
+ try:
+ otherdata = other._data
+ except AttributeError:
+ otherdata = Set(other)._data
+ for elt in ifilterfalse(otherdata.__contains__, selfdata):
+ data[elt] = value
+ for elt in ifilterfalse(selfdata.__contains__, otherdata):
+ data[elt] = value
+ return result
+
+ def __sub__(self, other):
+ """Return the difference of two sets as a new Set.
+
+ (I.e. all elements that are in this set and not in the other.)
+ """
+ if not isinstance(other, BaseSet):
+ return NotImplemented
+ return self.difference(other)
+
+ def difference(self, other):
+ """Return the difference of two sets as a new Set.
+
+ (I.e. all elements that are in this set and not in the other.)
+ """
+ result = self.__class__()
+ data = result._data
+ try:
+ otherdata = other._data
+ except AttributeError:
+ otherdata = Set(other)._data
+ value = True
+ for elt in ifilterfalse(otherdata.__contains__, self):
+ data[elt] = value
+ return result
+
+ # Membership test
+
+ def __contains__(self, element):
+ """Report whether an element is a member of a set.
+
+ (Called in response to the expression `element in self'.)
+ """
+ try:
+ return element in self._data
+ except TypeError:
+ transform = getattr(element, "__as_temporarily_immutable__", None)
+ if transform is None:
+ raise # re-raise the TypeError exception we caught
+ return transform() in self._data
+
+ # Subset and superset test
+
+ def issubset(self, other):
+ """Report whether another set contains this set."""
+ self._binary_sanity_check(other)
+ if len(self) > len(other): # Fast check for obvious cases
+ return False
+ for elt in ifilterfalse(other._data.__contains__, self):
+ return False
+ return True
+
+ def issuperset(self, other):
+ """Report whether this set contains another set."""
+ self._binary_sanity_check(other)
+ if len(self) < len(other): # Fast check for obvious cases
+ return False
+ for elt in ifilterfalse(self._data.__contains__, other):
+ return False
+ return True
+
+ # Inequality comparisons using the is-subset relation.
+ __le__ = issubset
+ __ge__ = issuperset
+
+ def __lt__(self, other):
+ self._binary_sanity_check(other)
+ return len(self) < len(other) and self.issubset(other)
+
+ def __gt__(self, other):
+ self._binary_sanity_check(other)
+ return len(self) > len(other) and self.issuperset(other)
+
+ # We inherit object.__hash__, so we must deny this explicitly
+ __hash__ = None
+
+ # Assorted helpers
+
+ def _binary_sanity_check(self, other):
+ # Check that the other argument to a binary operation is also
+ # a set, raising a TypeError otherwise.
+ if not isinstance(other, BaseSet):
+ raise TypeError, "Binary operation only permitted between sets"
+
+ def _compute_hash(self):
+ # Calculate hash code for a set by xor'ing the hash codes of
+ # the elements. This ensures that the hash code does not depend
+ # on the order in which elements are added to the set. This is
+ # not called __hash__ because a BaseSet should not be hashable;
+ # only an ImmutableSet is hashable.
+ result = 0
+ for elt in self:
+ result ^= hash(elt)
+ return result
+
+ def _update(self, iterable):
+ # The main loop for update() and the subclass __init__() methods.
+ data = self._data
+
+ # Use the fast update() method when a dictionary is available.
+ if isinstance(iterable, BaseSet):
+ data.update(iterable._data)
+ return
+
+ value = True
+
+ if type(iterable) in (list, tuple, xrange):
+ # Optimized: we know that __iter__() and next() can't
+ # raise TypeError, so we can move 'try:' out of the loop.
+ it = iter(iterable)
+ while True:
+ try:
+ for element in it:
+ data[element] = value
+ return
+ except TypeError:
+ transform = getattr(element, "__as_immutable__", None)
+ if transform is None:
+ raise # re-raise the TypeError exception we caught
+ data[transform()] = value
+ else:
+ # Safe: only catch TypeError where intended
+ for element in iterable:
+ try:
+ data[element] = value
+ except TypeError:
+ transform = getattr(element, "__as_immutable__", None)
+ if transform is None:
+ raise # re-raise the TypeError exception we caught
+ data[transform()] = value
+
+
+class ImmutableSet(BaseSet):
+ """Immutable set class."""
+
+ __slots__ = ['_hashcode']
+
+ # BaseSet + hashing
+
+ def __init__(self, iterable=None):
+ """Construct an immutable set from an optional iterable."""
+ self._hashcode = None
+ self._data = {}
+ if iterable is not None:
+ self._update(iterable)
+
+ def __hash__(self):
+ if self._hashcode is None:
+ self._hashcode = self._compute_hash()
+ return self._hashcode
+
+ def __getstate__(self):
+ return self._data, self._hashcode
+
+ def __setstate__(self, state):
+ self._data, self._hashcode = state
+
+class Set(BaseSet):
+ """ Mutable set class."""
+
+ __slots__ = []
+
+ # BaseSet + operations requiring mutability; no hashing
+
+ def __init__(self, iterable=None):
+ """Construct a set from an optional iterable."""
+ self._data = {}
+ if iterable is not None:
+ self._update(iterable)
+
+ def __getstate__(self):
+ # getstate's results are ignored if it is not
+ return self._data,
+
+ def __setstate__(self, data):
+ self._data, = data
+
+ # In-place union, intersection, differences.
+ # Subtle: The xyz_update() functions deliberately return None,
+ # as do all mutating operations on built-in container types.
+ # The __xyz__ spellings have to return self, though.
+
+ def __ior__(self, other):
+ """Update a set with the union of itself and another."""
+ self._binary_sanity_check(other)
+ self._data.update(other._data)
+ return self
+
+ def union_update(self, other):
+ """Update a set with the union of itself and another."""
+ self._update(other)
+
+ def __iand__(self, other):
+ """Update a set with the intersection of itself and another."""
+ self._binary_sanity_check(other)
+ self._data = (self & other)._data
+ return self
+
+ def intersection_update(self, other):
+ """Update a set with the intersection of itself and another."""
+ if isinstance(other, BaseSet):
+ self &= other
+ else:
+ self._data = (self.intersection(other))._data
+
+ def __ixor__(self, other):
+ """Update a set with the symmetric difference of itself and another."""
+ self._binary_sanity_check(other)
+ self.symmetric_difference_update(other)
+ return self
+
+ def symmetric_difference_update(self, other):
+ """Update a set with the symmetric difference of itself and another."""
+ data = self._data
+ value = True
+ if not isinstance(other, BaseSet):
+ other = Set(other)
+ if self is other:
+ self.clear()
+ for elt in other:
+ if elt in data:
+ del data[elt]
+ else:
+ data[elt] = value
+
+ def __isub__(self, other):
+ """Remove all elements of another set from this set."""
+ self._binary_sanity_check(other)
+ self.difference_update(other)
+ return self
+
+ def difference_update(self, other):
+ """Remove all elements of another set from this set."""
+ data = self._data
+ if not isinstance(other, BaseSet):
+ other = Set(other)
+ if self is other:
+ self.clear()
+ for elt in ifilter(data.__contains__, other):
+ del data[elt]
+
+ # Python dict-like mass mutations: update, clear
+
+ def update(self, iterable):
+ """Add all values from an iterable (such as a list or file)."""
+ self._update(iterable)
+
+ def clear(self):
+ """Remove all elements from this set."""
+ self._data.clear()
+
+ # Single-element mutations: add, remove, discard
+
+ def add(self, element):
+ """Add an element to a set.
+
+ This has no effect if the element is already present.
+ """
+ try:
+ self._data[element] = True
+ except TypeError:
+ transform = getattr(element, "__as_immutable__", None)
+ if transform is None:
+ raise # re-raise the TypeError exception we caught
+ self._data[transform()] = True
+
+ def remove(self, element):
+ """Remove an element from a set; it must be a member.
+
+ If the element is not a member, raise a KeyError.
+ """
+ try:
+ del self._data[element]
+ except TypeError:
+ transform = getattr(element, "__as_temporarily_immutable__", None)
+ if transform is None:
+ raise # re-raise the TypeError exception we caught
+ del self._data[transform()]
+
+ def discard(self, element):
+ """Remove an element from a set if it is a member.
+
+ If the element is not a member, do nothing.
+ """
+ try:
+ self.remove(element)
+ except KeyError:
+ pass
+
+ def pop(self):
+ """Remove and return an arbitrary set element."""
+ return self._data.popitem()[0]
+
+ def __as_immutable__(self):
+ # Return a copy of self as an immutable set
+ return ImmutableSet(self)
+
+ def __as_temporarily_immutable__(self):
+ # Return self wrapped in a temporarily immutable set
+ return _TemporarilyImmutableSet(self)
+
+
+class _TemporarilyImmutableSet(BaseSet):
+ # Wrap a mutable set as if it was temporarily immutable.
+ # This only supplies hashing and equality comparisons.
+
+ def __init__(self, set):
+ self._set = set
+ self._data = set._data # Needed by ImmutableSet.__eq__()
+
+ def __hash__(self):
+ return self._set._compute_hash()