diff options
Diffstat (limited to 'lib/python2.7/sets.py')
-rw-r--r-- | lib/python2.7/sets.py | 557 |
1 files changed, 557 insertions, 0 deletions
diff --git a/lib/python2.7/sets.py b/lib/python2.7/sets.py new file mode 100644 index 0000000..fe31a0b --- /dev/null +++ b/lib/python2.7/sets.py @@ -0,0 +1,557 @@ +"""Classes to represent arbitrary sets (including sets of sets). + +This module implements sets using dictionaries whose values are +ignored. The usual operations (union, intersection, deletion, etc.) +are provided as both methods and operators. + +Important: sets are not sequences! While they support 'x in s', +'len(s)', and 'for x in s', none of those operations are unique for +sequences; for example, mappings support all three as well. The +characteristic operation for sequences is subscripting with small +integers: s[i], for i in range(len(s)). Sets don't support +subscripting at all. Also, sequences allow multiple occurrences and +their elements have a definite order; sets on the other hand don't +record multiple occurrences and don't remember the order of element +insertion (which is why they don't support s[i]). + +The following classes are provided: + +BaseSet -- All the operations common to both mutable and immutable + sets. This is an abstract class, not meant to be directly + instantiated. + +Set -- Mutable sets, subclass of BaseSet; not hashable. + +ImmutableSet -- Immutable sets, subclass of BaseSet; hashable. + An iterable argument is mandatory to create an ImmutableSet. + +_TemporarilyImmutableSet -- A wrapper around a Set, hashable, + giving the same hash value as the immutable set equivalent + would have. Do not use this class directly. + +Only hashable objects can be added to a Set. In particular, you cannot +really add a Set as an element to another Set; if you try, what is +actually added is an ImmutableSet built from it (it compares equal to +the one you tried adding). + +When you ask if `x in y' where x is a Set and y is a Set or +ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and +what's tested is actually `z in y'. + +""" + +# Code history: +# +# - Greg V. Wilson wrote the first version, using a different approach +# to the mutable/immutable problem, and inheriting from dict. +# +# - Alex Martelli modified Greg's version to implement the current +# Set/ImmutableSet approach, and make the data an attribute. +# +# - Guido van Rossum rewrote much of the code, made some API changes, +# and cleaned up the docstrings. +# +# - Raymond Hettinger added a number of speedups and other +# improvements. + +from itertools import ifilter, ifilterfalse + +__all__ = ['BaseSet', 'Set', 'ImmutableSet'] + +import warnings +warnings.warn("the sets module is deprecated", DeprecationWarning, + stacklevel=2) + +class BaseSet(object): + """Common base class for mutable and immutable sets.""" + + __slots__ = ['_data'] + + # Constructor + + def __init__(self): + """This is an abstract class.""" + # Don't call this from a concrete subclass! + if self.__class__ is BaseSet: + raise TypeError, ("BaseSet is an abstract class. " + "Use Set or ImmutableSet.") + + # Standard protocols: __len__, __repr__, __str__, __iter__ + + def __len__(self): + """Return the number of elements of a set.""" + return len(self._data) + + def __repr__(self): + """Return string representation of a set. + + This looks like 'Set([<list of elements>])'. + """ + return self._repr() + + # __str__ is the same as __repr__ + __str__ = __repr__ + + def _repr(self, sorted=False): + elements = self._data.keys() + if sorted: + elements.sort() + return '%s(%r)' % (self.__class__.__name__, elements) + + def __iter__(self): + """Return an iterator over the elements or a set. + + This is the keys iterator for the underlying dict. + """ + return self._data.iterkeys() + + # Three-way comparison is not supported. However, because __eq__ is + # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and + # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this + # case). + + def __cmp__(self, other): + raise TypeError, "can't compare sets using cmp()" + + # Equality comparisons using the underlying dicts. Mixed-type comparisons + # are allowed here, where Set == z for non-Set z always returns False, + # and Set != z always True. This allows expressions like "x in y" to + # give the expected result when y is a sequence of mixed types, not + # raising a pointless TypeError just because y contains a Set, or x is + # a Set and y contain's a non-set ("in" invokes only __eq__). + # Subtle: it would be nicer if __eq__ and __ne__ could return + # NotImplemented instead of True or False. Then the other comparand + # would get a chance to determine the result, and if the other comparand + # also returned NotImplemented then it would fall back to object address + # comparison (which would always return False for __eq__ and always + # True for __ne__). However, that doesn't work, because this type + # *also* implements __cmp__: if, e.g., __eq__ returns NotImplemented, + # Python tries __cmp__ next, and the __cmp__ here then raises TypeError. + + def __eq__(self, other): + if isinstance(other, BaseSet): + return self._data == other._data + else: + return False + + def __ne__(self, other): + if isinstance(other, BaseSet): + return self._data != other._data + else: + return True + + # Copying operations + + def copy(self): + """Return a shallow copy of a set.""" + result = self.__class__() + result._data.update(self._data) + return result + + __copy__ = copy # For the copy module + + def __deepcopy__(self, memo): + """Return a deep copy of a set; used by copy module.""" + # This pre-creates the result and inserts it in the memo + # early, in case the deep copy recurses into another reference + # to this same set. A set can't be an element of itself, but + # it can certainly contain an object that has a reference to + # itself. + from copy import deepcopy + result = self.__class__() + memo[id(self)] = result + data = result._data + value = True + for elt in self: + data[deepcopy(elt, memo)] = value + return result + + # Standard set operations: union, intersection, both differences. + # Each has an operator version (e.g. __or__, invoked with |) and a + # method version (e.g. union). + # Subtle: Each pair requires distinct code so that the outcome is + # correct when the type of other isn't suitable. For example, if + # we did "union = __or__" instead, then Set().union(3) would return + # NotImplemented instead of raising TypeError (albeit that *why* it + # raises TypeError as-is is also a bit subtle). + + def __or__(self, other): + """Return the union of two sets as a new set. + + (I.e. all elements that are in either set.) + """ + if not isinstance(other, BaseSet): + return NotImplemented + return self.union(other) + + def union(self, other): + """Return the union of two sets as a new set. + + (I.e. all elements that are in either set.) + """ + result = self.__class__(self) + result._update(other) + return result + + def __and__(self, other): + """Return the intersection of two sets as a new set. + + (I.e. all elements that are in both sets.) + """ + if not isinstance(other, BaseSet): + return NotImplemented + return self.intersection(other) + + def intersection(self, other): + """Return the intersection of two sets as a new set. + + (I.e. all elements that are in both sets.) + """ + if not isinstance(other, BaseSet): + other = Set(other) + if len(self) <= len(other): + little, big = self, other + else: + little, big = other, self + common = ifilter(big._data.__contains__, little) + return self.__class__(common) + + def __xor__(self, other): + """Return the symmetric difference of two sets as a new set. + + (I.e. all elements that are in exactly one of the sets.) + """ + if not isinstance(other, BaseSet): + return NotImplemented + return self.symmetric_difference(other) + + def symmetric_difference(self, other): + """Return the symmetric difference of two sets as a new set. + + (I.e. all elements that are in exactly one of the sets.) + """ + result = self.__class__() + data = result._data + value = True + selfdata = self._data + try: + otherdata = other._data + except AttributeError: + otherdata = Set(other)._data + for elt in ifilterfalse(otherdata.__contains__, selfdata): + data[elt] = value + for elt in ifilterfalse(selfdata.__contains__, otherdata): + data[elt] = value + return result + + def __sub__(self, other): + """Return the difference of two sets as a new Set. + + (I.e. all elements that are in this set and not in the other.) + """ + if not isinstance(other, BaseSet): + return NotImplemented + return self.difference(other) + + def difference(self, other): + """Return the difference of two sets as a new Set. + + (I.e. all elements that are in this set and not in the other.) + """ + result = self.__class__() + data = result._data + try: + otherdata = other._data + except AttributeError: + otherdata = Set(other)._data + value = True + for elt in ifilterfalse(otherdata.__contains__, self): + data[elt] = value + return result + + # Membership test + + def __contains__(self, element): + """Report whether an element is a member of a set. + + (Called in response to the expression `element in self'.) + """ + try: + return element in self._data + except TypeError: + transform = getattr(element, "__as_temporarily_immutable__", None) + if transform is None: + raise # re-raise the TypeError exception we caught + return transform() in self._data + + # Subset and superset test + + def issubset(self, other): + """Report whether another set contains this set.""" + self._binary_sanity_check(other) + if len(self) > len(other): # Fast check for obvious cases + return False + for elt in ifilterfalse(other._data.__contains__, self): + return False + return True + + def issuperset(self, other): + """Report whether this set contains another set.""" + self._binary_sanity_check(other) + if len(self) < len(other): # Fast check for obvious cases + return False + for elt in ifilterfalse(self._data.__contains__, other): + return False + return True + + # Inequality comparisons using the is-subset relation. + __le__ = issubset + __ge__ = issuperset + + def __lt__(self, other): + self._binary_sanity_check(other) + return len(self) < len(other) and self.issubset(other) + + def __gt__(self, other): + self._binary_sanity_check(other) + return len(self) > len(other) and self.issuperset(other) + + # We inherit object.__hash__, so we must deny this explicitly + __hash__ = None + + # Assorted helpers + + def _binary_sanity_check(self, other): + # Check that the other argument to a binary operation is also + # a set, raising a TypeError otherwise. + if not isinstance(other, BaseSet): + raise TypeError, "Binary operation only permitted between sets" + + def _compute_hash(self): + # Calculate hash code for a set by xor'ing the hash codes of + # the elements. This ensures that the hash code does not depend + # on the order in which elements are added to the set. This is + # not called __hash__ because a BaseSet should not be hashable; + # only an ImmutableSet is hashable. + result = 0 + for elt in self: + result ^= hash(elt) + return result + + def _update(self, iterable): + # The main loop for update() and the subclass __init__() methods. + data = self._data + + # Use the fast update() method when a dictionary is available. + if isinstance(iterable, BaseSet): + data.update(iterable._data) + return + + value = True + + if type(iterable) in (list, tuple, xrange): + # Optimized: we know that __iter__() and next() can't + # raise TypeError, so we can move 'try:' out of the loop. + it = iter(iterable) + while True: + try: + for element in it: + data[element] = value + return + except TypeError: + transform = getattr(element, "__as_immutable__", None) + if transform is None: + raise # re-raise the TypeError exception we caught + data[transform()] = value + else: + # Safe: only catch TypeError where intended + for element in iterable: + try: + data[element] = value + except TypeError: + transform = getattr(element, "__as_immutable__", None) + if transform is None: + raise # re-raise the TypeError exception we caught + data[transform()] = value + + +class ImmutableSet(BaseSet): + """Immutable set class.""" + + __slots__ = ['_hashcode'] + + # BaseSet + hashing + + def __init__(self, iterable=None): + """Construct an immutable set from an optional iterable.""" + self._hashcode = None + self._data = {} + if iterable is not None: + self._update(iterable) + + def __hash__(self): + if self._hashcode is None: + self._hashcode = self._compute_hash() + return self._hashcode + + def __getstate__(self): + return self._data, self._hashcode + + def __setstate__(self, state): + self._data, self._hashcode = state + +class Set(BaseSet): + """ Mutable set class.""" + + __slots__ = [] + + # BaseSet + operations requiring mutability; no hashing + + def __init__(self, iterable=None): + """Construct a set from an optional iterable.""" + self._data = {} + if iterable is not None: + self._update(iterable) + + def __getstate__(self): + # getstate's results are ignored if it is not + return self._data, + + def __setstate__(self, data): + self._data, = data + + # In-place union, intersection, differences. + # Subtle: The xyz_update() functions deliberately return None, + # as do all mutating operations on built-in container types. + # The __xyz__ spellings have to return self, though. + + def __ior__(self, other): + """Update a set with the union of itself and another.""" + self._binary_sanity_check(other) + self._data.update(other._data) + return self + + def union_update(self, other): + """Update a set with the union of itself and another.""" + self._update(other) + + def __iand__(self, other): + """Update a set with the intersection of itself and another.""" + self._binary_sanity_check(other) + self._data = (self & other)._data + return self + + def intersection_update(self, other): + """Update a set with the intersection of itself and another.""" + if isinstance(other, BaseSet): + self &= other + else: + self._data = (self.intersection(other))._data + + def __ixor__(self, other): + """Update a set with the symmetric difference of itself and another.""" + self._binary_sanity_check(other) + self.symmetric_difference_update(other) + return self + + def symmetric_difference_update(self, other): + """Update a set with the symmetric difference of itself and another.""" + data = self._data + value = True + if not isinstance(other, BaseSet): + other = Set(other) + if self is other: + self.clear() + for elt in other: + if elt in data: + del data[elt] + else: + data[elt] = value + + def __isub__(self, other): + """Remove all elements of another set from this set.""" + self._binary_sanity_check(other) + self.difference_update(other) + return self + + def difference_update(self, other): + """Remove all elements of another set from this set.""" + data = self._data + if not isinstance(other, BaseSet): + other = Set(other) + if self is other: + self.clear() + for elt in ifilter(data.__contains__, other): + del data[elt] + + # Python dict-like mass mutations: update, clear + + def update(self, iterable): + """Add all values from an iterable (such as a list or file).""" + self._update(iterable) + + def clear(self): + """Remove all elements from this set.""" + self._data.clear() + + # Single-element mutations: add, remove, discard + + def add(self, element): + """Add an element to a set. + + This has no effect if the element is already present. + """ + try: + self._data[element] = True + except TypeError: + transform = getattr(element, "__as_immutable__", None) + if transform is None: + raise # re-raise the TypeError exception we caught + self._data[transform()] = True + + def remove(self, element): + """Remove an element from a set; it must be a member. + + If the element is not a member, raise a KeyError. + """ + try: + del self._data[element] + except TypeError: + transform = getattr(element, "__as_temporarily_immutable__", None) + if transform is None: + raise # re-raise the TypeError exception we caught + del self._data[transform()] + + def discard(self, element): + """Remove an element from a set if it is a member. + + If the element is not a member, do nothing. + """ + try: + self.remove(element) + except KeyError: + pass + + def pop(self): + """Remove and return an arbitrary set element.""" + return self._data.popitem()[0] + + def __as_immutable__(self): + # Return a copy of self as an immutable set + return ImmutableSet(self) + + def __as_temporarily_immutable__(self): + # Return self wrapped in a temporarily immutable set + return _TemporarilyImmutableSet(self) + + +class _TemporarilyImmutableSet(BaseSet): + # Wrap a mutable set as if it was temporarily immutable. + # This only supplies hashing and equality comparisons. + + def __init__(self, set): + self._set = set + self._data = set._data # Needed by ImmutableSet.__eq__() + + def __hash__(self): + return self._set._compute_hash() |