sparse_set.py 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. # SparseSet is meant to act just like a set object, but without actually
  2. # storing discrete values for every item in the set
  3. #
  4. # The contents of this file are subject to the Python Software Foundation
  5. # License Version 2.3 (the License). You may not copy or use this file, in
  6. # either source code or executable form, except in compliance with the License.
  7. # You may obtain a copy of the License at http://www.python.org/license.
  8. #
  9. # Software distributed under the License is distributed on an AS IS basis,
  10. # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  11. # for the specific language governing rights and limitations under the
  12. # License.
  13. #
  14. # by Greg Hazel
  15. from __future__ import generators
  16. from bisect import bisect_left
  17. from itertools import izip
  18. try:
  19. from blist import blist
  20. except ImportError:
  21. list_base = list
  22. else:
  23. list_base = blist
  24. class SparseSet(object):
  25. def __init__(self, s = None):
  26. self._begins = list_base()
  27. # ends are non-inclusive
  28. self._ends = list_base()
  29. if s is not None:
  30. if isinstance(s, SparseSet):
  31. self._begins = list_base(s._begins)
  32. self._ends = list_base(s._ends)
  33. else:
  34. self.add_range(s)
  35. def _collapse_range(self, l):
  36. last = None
  37. begins = list_base()
  38. ends = list_base()
  39. if len(l) == 0:
  40. return begins, ends
  41. begins.append(l[0])
  42. for i in l:
  43. if last and i > (last + 1):
  44. ends.append(last + 1)
  45. begins.append(i)
  46. last = i
  47. if last is not None:
  48. ends.append(last + 1)
  49. return begins, ends
  50. def subtract_range(self, l):
  51. begins, ends = self._collapse_range(l)
  52. for b, e in izip(begins, ends):
  53. self.subtract(b, e)
  54. def add_range(self, l):
  55. begins, ends = self._collapse_range(l)
  56. for b, e in izip(begins, ends):
  57. self.add(b, e)
  58. def add(self, begin, end=None):
  59. if end is None:
  60. end = begin + 1
  61. elif begin >= end:
  62. raise ValueError("begin(%d) >= end(%d)" % (begin, end))
  63. if len(self._begins) == 0:
  64. self._begins.append(begin)
  65. self._ends.append(end)
  66. return
  67. b_i = bisect_left(self._begins, begin)
  68. if b_i == 0:
  69. if begin >= self._begins[b_i]:
  70. begin = self._begins[b_i]
  71. elif begin <= self._ends[b_i - 1]:
  72. b_i -= 1
  73. begin = self._begins[b_i]
  74. e_i = bisect_left(self._ends, end, b_i)
  75. if e_i < len(self._ends):
  76. if end >= self._begins[e_i]:
  77. end = self._ends[e_i]
  78. else:
  79. e_i -= 1
  80. # small optimization
  81. if b_i == e_i:
  82. if b_i == len(self._begins):
  83. self._begins.append(begin)
  84. self._ends.append(end)
  85. else:
  86. self._begins[b_i] = begin
  87. self._ends[b_i] = end
  88. return
  89. # small optimization
  90. if b_i == e_i + 1:
  91. self._begins.insert(b_i, begin)
  92. self._ends.insert(b_i, end)
  93. return
  94. self._begins[b_i:e_i + 1] = (begin,)
  95. self._ends[b_i:e_i + 1] = (end,)
  96. def discard(self, begin, end=None):
  97. if end is None:
  98. end = begin + 1
  99. elif begin >= end:
  100. raise ValueError("begin(%d) >= end(%d)" % (begin, end))
  101. b_i = bisect_left(self._begins, begin)
  102. s_b_i = max(b_i - 1, 0)
  103. e_i = bisect_left(self._ends, end, s_b_i)
  104. beginning_is_an_end = False
  105. end_is_an_end = False
  106. if b_i > 0 and begin < self._ends[b_i - 1]:
  107. beginning_is_an_end = True
  108. if e_i < len(self._ends):
  109. if end == self._ends[e_i]:
  110. e_i += 1
  111. elif end > self._begins[e_i]:
  112. end_is_an_end = True
  113. del self._begins[b_i:e_i]
  114. del self._ends[b_i:e_i]
  115. if beginning_is_an_end:
  116. old_end = self._ends[b_i - 1]
  117. self._ends[b_i - 1] = begin
  118. if end_is_an_end:
  119. if beginning_is_an_end and b_i > e_i:
  120. self._begins.insert(b_i, end)
  121. self._ends.insert(b_i, old_end)
  122. self._begins[b_i] = end
  123. remove = discard
  124. subtract = discard
  125. def is_range_in(self, x, y):
  126. assert y > x
  127. i = bisect_left(self._begins, x)
  128. if i > 0 and x < self._ends[i - 1]:
  129. if y <= self._ends[i - 1]:
  130. return True
  131. if i < len(self._begins) and x >= self._begins[i] and x < self._ends[i]:
  132. if y <= self._ends[i]:
  133. return True
  134. return False
  135. def offset(self, x):
  136. for i in xrange(len(self._begins)):
  137. self._begins[i] += x
  138. self._ends[i] += x
  139. def __getitem__(self, i):
  140. r = i
  141. if r < 0:
  142. r = len(self) + i
  143. for b, e in izip(self._begins, self._ends):
  144. l = e - b
  145. if r < 0:
  146. break
  147. if l > r:
  148. return b + r
  149. r -= l
  150. raise IndexError("SparseSet index '%s' out of range" % i)
  151. def __iter__(self):
  152. for b, e in izip(self._begins, self._ends):
  153. for i in xrange(b, e):
  154. yield i
  155. def iterneg(self, begin, end):
  156. ranges = list_base()
  157. b_i = bisect_left(self._begins, begin)
  158. for b, e in izip(self._begins[b_i:], self._ends[b_i:]):
  159. for i in xrange(begin, b):
  160. yield i
  161. begin = e
  162. if begin < end:
  163. for i in xrange(begin, end):
  164. yield i
  165. def iterrange(self):
  166. for b, e in izip(self._begins, self._ends):
  167. yield (b, e)
  168. def largest_range(self):
  169. m = None
  170. r = None
  171. for b, e in izip(self._begins, self._ends):
  172. if b - e > m:
  173. m = b - e
  174. r = (b, e)
  175. return r
  176. def __eq__(self, s):
  177. if not isinstance(s, SparseSet):
  178. return False
  179. return (self._begins == s._begins) and (self._ends == s._ends)
  180. def __ne__(self, s):
  181. if not isinstance(s, SparseSet):
  182. return True
  183. return (self._begins != s._begins) or (self._ends != s._ends)
  184. def __contains__(self, x):
  185. i = bisect_left(self._begins, x)
  186. if i > 0 and x < self._ends[i - 1]:
  187. return True
  188. if i < len(self._begins) and x == self._begins[i]:
  189. return True
  190. return False
  191. def __len__(self):
  192. l = 0
  193. for b, e in izip(self._begins, self._ends):
  194. l += e - b
  195. return l
  196. def __sub__(self, s):
  197. n = SparseSet(self)
  198. if isinstance(s, SparseSet):
  199. for b, e in izip(s._begins, s._ends):
  200. n.subtract(b, e)
  201. else:
  202. n.subtract_range(list_base(s))
  203. return n
  204. def __add__(self, s):
  205. n = SparseSet(self)
  206. if isinstance(s, SparseSet):
  207. for b, e in izip(s._begins, s._ends):
  208. n.add(b, e)
  209. else:
  210. n.add_range(list_base(s))
  211. return n
  212. def __repr__(self):
  213. return 'SparseSet(%s)' % str(zip(self._begins, self._ends))
  214. def __str__(self):
  215. return str(zip(self._begins, self._ends))