CMultiMap.py 42 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273
  1. #!/usr/bin/env python
  2. # The contents of this file are subject to the BitTorrent Open Source License
  3. # Version 1.1 (the License). You may not copy or use this file, in either
  4. # source code or executable form, except in compliance with the License. You
  5. # may obtain a copy of the License at http://www.bittorrent.com/license/.
  6. #
  7. # Software distributed under the License is distributed on an AS IS basis,
  8. # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  9. # for the specific language governing rights and limitations under the
  10. # License.
  11. #
  12. # By David Harrison
  13. # I was playing with doctest when I wrote this. I still haven't
  14. # decided how useful doctest is as opposed to implementing unit tests
  15. # directly. --Dave
  16. if __name__ == '__main__':
  17. import sys
  18. sys.path = ['.','..'] + sys.path # HACK to simplify unit testing.
  19. from BTL.translation import _
  20. class BEGIN: # represents special BEGIN location before first next.
  21. pass
  22. from UserDict import DictMixin
  23. from cmultimap_swig import *
  24. import sys
  25. from weakref import WeakKeyDictionary
  26. LEAK_TEST = False
  27. class CMultiMap(object, DictMixin):
  28. """In-order mapping. Similar to a dict, except that it provides in-order
  29. iteration and searches for the nearest key <= or >= a given key.
  30. Distinguishes itself from CMap in that CMultiMap instances allows
  31. multiple entries with the same key, thus __getitem__ and get always
  32. return a list. If there are no matching keys then __getitem__
  33. or get returns an empty list, one match a single-element list, etc.
  34. Values with the same key have arbitrary order.
  35. LIMITATION: The key must be a double. The value can be anything.
  36. Item insertion: O(log n) append, __setitem__
  37. Item deletion: O(log n + k) erase
  38. O(log n + k + m) __delitem__
  39. Key search: O(log n) find, __contains__
  40. O(log n + m) __getitem__, get
  41. Value search: n/a
  42. Iteration step: amortized O(1), worst-case O(log n)
  43. Memory: O(n)
  44. n = number of elements in map. k = number of iterators pointing
  45. into map. The assumption here is that there are few iterators
  46. in existence at any given time. m = number of elements matching
  47. the key.
  48. Iterators are not invalidated by insertions. Iterators are invalidated
  49. by deletions only when the key-value pair referenced is deleted.
  50. Deletion has a '+k' because __delitem__ searches linearly
  51. through the set of iterators to find any iterator pointing at the
  52. deleted item and then invalidates the iterator.
  53. This class is backed by the C++ STL map class, but conforms
  54. to the Python container interface."""
  55. class _AbstractIterator:
  56. """Iterates over elements in the map in order."""
  57. def __init__(self, m, si = BEGIN ): # "s.." implies swig object.
  58. """Creates an iterator pointing to element si in map m.
  59. Do not instantiate directly. Use iterkeys, itervalues, or
  60. iteritems.
  61. The _AbstractIterator takes ownership of any C++ iterator
  62. (i.e., the swig object 'si') and will deallocate it when
  63. the iterator is deallocated.
  64. Examples of typical behavior:
  65. >>> from CMultiMap import CMultiMap
  66. >>> m = CMultiMap()
  67. >>> m[12] = 6
  68. >>> m[9] = 4
  69. >>> for k in m:
  70. ... print int(k)
  71. ...
  72. 9
  73. 12
  74. >>>
  75. Example edge cases (empty map):
  76. >>> from CMultiMap import CMultiMap
  77. >>> m = CMultiMap()
  78. >>> try:
  79. ... i = m.__iter__()
  80. ... i.value()
  81. ... except IndexError:
  82. ... print 'IndexError.'
  83. ...
  84. IndexError.
  85. >>> try:
  86. ... i.next()
  87. ... except StopIteration:
  88. ... print 'stopped'
  89. ...
  90. stopped
  91. @param mmap: CMultiMap.
  92. @param node: Node that this iterator will point at. If None
  93. then the iterator points to end(). If BEGIN
  94. then the iterator points to one before the beginning.
  95. """
  96. assert isinstance(m, CMultiMap)
  97. assert not isinstance(si, CMultiMap._AbstractIterator)
  98. if si == None:
  99. self._si = mmap_end(m._smmap)
  100. else:
  101. self._si = si # C++ iterator wrapped by swig.
  102. self._mmap = m
  103. m._iterators[self] = 1 # using map as set of weak references.
  104. def __hash__(self):
  105. return id(self)
  106. def __cmp__(self, other):
  107. if not self._si or not other._si:
  108. raise RuntimeError( _("invalid iterator") )
  109. if self._si == BEGIN and other._si == BEGIN: return 0
  110. if self._si == BEGIN and other._si != BEGIN: return -1
  111. elif self._si != BEGIN and other._si == BEGIN: return 1
  112. return iiter_cmp(self._mmap._smmap, self._si, other._si )
  113. def at_begin(self):
  114. """equivalent to self == m.begin() where m is a CMultiMap.
  115. >>> from CMultiMap import CMultiMap
  116. >>> m = CMultiMap()
  117. >>> i = m.begin()
  118. >>> i == m.begin()
  119. True
  120. >>> i.at_begin()
  121. True
  122. >>> i == m.end() # no elements so begin()==end()
  123. True
  124. >>> i.at_end()
  125. True
  126. >>> m[6] = 'foo' # insertion does not invalidate iterators.
  127. >>> i = m.begin()
  128. >>> i == m.end()
  129. False
  130. >>> i.value()
  131. 'foo'
  132. >>> try: # test at_begin when not at beginning.
  133. ... i.next()
  134. ... except StopIteration:
  135. ... print 'ok'
  136. ok
  137. >>> i.at_begin()
  138. False
  139. """
  140. if not self._si:
  141. raise RuntimeError( _("invalid iterator") )
  142. if self._si == BEGIN: # BEGIN is one before begin(). Yuck!!
  143. return False
  144. return mmap_iiter_at_begin(self._mmap._smmap, self._si)
  145. def at_end(self):
  146. """equivalent to self == m.end() where m is a CMap, but
  147. at_end is faster because it avoids the dynamic memory
  148. alloation in m.end().
  149. >>> from CMultiMap import CMultiMap
  150. >>> m = CMultiMap()
  151. >>> m[6] = 'foo'
  152. >>> i = m.end() # test when at end.
  153. >>> i == m.end()
  154. True
  155. >>> i.at_end()
  156. True
  157. >>> int(i.prev())
  158. 6
  159. >>> i.at_end() # testing when not at end.
  160. False
  161. """
  162. if not self._si:
  163. raise RuntimeError( _("invalid iterator") )
  164. if self._si == BEGIN:
  165. return False
  166. return mmap_iiter_at_end(self._mmap._smmap, self._si)
  167. def key(self):
  168. """@return: the key of the key-value pair referenced by this
  169. iterator.
  170. """
  171. if not self._si:
  172. raise RuntimeError( _("invalid iterator") )
  173. if self._si == BEGIN:
  174. raise IndexError(_("Cannot dereference iterator until after "
  175. "first call to .next."))
  176. elif mmap_iiter_at_end(self._mmap._smmap, self._si):
  177. raise IndexError()
  178. return iiter_key(self._si)
  179. def value(self):
  180. """@return: the value of the key-value pair currently referenced
  181. by this iterator.
  182. """
  183. if not self._si:
  184. raise RuntimeError( _("invalid iterator") )
  185. if self._si == BEGIN:
  186. raise IndexError(_("Cannot dereference iterator until after "
  187. "first call to next."))
  188. elif mmap_iiter_at_end(self._mmap._smmap, self._si):
  189. raise IndexError()
  190. return iiter_value(self._si)
  191. def item(self):
  192. """@return the key-value pair referenced by this iterator.
  193. """
  194. if not self._si:
  195. raise RuntimeError( _("invalid iterator") )
  196. return self.key(), self.value()
  197. def _next(self):
  198. if not self._si:
  199. raise RuntimeError( _("invalid iterator") )
  200. if self._si == BEGIN:
  201. self._si = mmap_begin(self._mmap._smmap)
  202. if mmap_iiter_at_end(self._mmap._smmap,self._si):
  203. raise StopIteration
  204. return
  205. if mmap_iiter_at_end(self._mmap._smmap,self._si):
  206. raise StopIteration
  207. iiter_incr(self._si)
  208. if mmap_iiter_at_end(self._mmap._smmap,self._si):
  209. raise StopIteration
  210. def _prev(self):
  211. if not self._si:
  212. raise RuntimeError( _("invalid iterator") )
  213. if self._si == BEGIN:
  214. raise StopIteration()
  215. elif mmap_iiter_at_begin(self._mmap._smmap, self._si):
  216. self._si = BEGIN
  217. raise StopIteration
  218. iiter_decr(self._si)
  219. def __del__(self):
  220. # Python note: "del x" merely eliminates one reference to an
  221. # object. __del__ isn't called until the ref count goes to 0.
  222. # Only when the last reference is gone is __del__ called.
  223. self._invalidate()
  224. def _invalidate(self):
  225. if self._si == None: # if already invalidated...
  226. return
  227. try:
  228. del self._mmap._iterators[self]
  229. except KeyError:
  230. pass # could've been removed because weak reference,
  231. # and because _invalidate is called from __del__.
  232. if self._si != BEGIN:
  233. iiter_delete(self._si)
  234. self._si = None
  235. def __iter__(self):
  236. """If the iterator is itself iteratable then we do things like:
  237. >>> from CMultiMap import CMultiMap
  238. >>> m = CMultiMap()
  239. >>> m[10] = 'foo'
  240. >>> m[11] = 'bar'
  241. >>> for x in m.itervalues():
  242. ... print x
  243. ...
  244. foo
  245. bar
  246. """
  247. return self
  248. def __len__(self):
  249. return len(self._mmap)
  250. class KeyIterator(_AbstractIterator):
  251. def next(self):
  252. """Returns the next key in the mmap.
  253. Insertion does not invalidate iterators. Deletion only
  254. invalidates an iterator if the iterator pointed at the
  255. key-value pair being deleted.
  256. This is implemented by moving the iterator and then
  257. dereferencing it. If we dereferenced and then moved
  258. then we would get the odd behavior:
  259. Ex: I have keys [1,2,3]. The iterator i points at 1.
  260. print i.next() # prints 1
  261. print i.next() # prints 2
  262. print i.prev() # prints 3
  263. print i.prev() # prints 2
  264. However, because we move and then dereference, when an
  265. iterator is first created it points to nowhere
  266. so that the first next moves to the first element.
  267. Ex:
  268. >>> from CMultiMap import *
  269. >>> m = CMultiMap()
  270. >>> m[5] = 1
  271. >>> m[8] = 4
  272. >>> i = m.__iter__()
  273. >>> print int(i.next())
  274. 5
  275. >>> print int(i.next())
  276. 8
  277. >>> print int(i.prev())
  278. 5
  279. We are still left with the odd behavior that an
  280. iterator cannot be dereferenced until after the first next().
  281. Ex edge cases:
  282. >>> from CMultiMap import CMultiMap
  283. >>> m = CMultiMap()
  284. >>> i = m.__iter__()
  285. >>> try:
  286. ... i.prev()
  287. ... except StopIteration:
  288. ... print 'StopIteration'
  289. ...
  290. StopIteration
  291. >>> m[5]='a'
  292. >>> i = m.iterkeys()
  293. >>> int(i.next())
  294. 5
  295. >>> try: i.next()
  296. ... except StopIteration: print 'StopIteration'
  297. ...
  298. StopIteration
  299. >>> int(i.prev())
  300. 5
  301. >>> try: int(i.prev())
  302. ... except StopIteration: print 'StopIteration'
  303. ...
  304. StopIteration
  305. >>> int(i.next())
  306. 5
  307. """
  308. self._next()
  309. return self.key()
  310. def prev(self):
  311. """Returns the previous key in the mmap.
  312. See next() for more detail and examples.
  313. """
  314. self._prev()
  315. return self.key()
  316. class ValueIterator(_AbstractIterator):
  317. def next(self):
  318. """@return: next value in the mmap.
  319. >>> from CMultiMap import *
  320. >>> m = CMultiMap()
  321. >>> m[5] = 10
  322. >>> m[6] = 3
  323. >>> i = m.itervalues()
  324. >>> int(i.next())
  325. 10
  326. >>> int(i.next())
  327. 3
  328. """
  329. self._next()
  330. return self.value()
  331. def prev(self):
  332. self._prev()
  333. return self.value()
  334. class ItemIterator(_AbstractIterator):
  335. def next(self):
  336. """@return: next item in the mmap's key ordering.
  337. >>> from CMultiMap import CMultiMap
  338. >>> m = CMultiMap()
  339. >>> m[5] = 10
  340. >>> m[6] = 3
  341. >>> i = m.iteritems()
  342. >>> k,v = i.next()
  343. >>> int(k)
  344. 5
  345. >>> int(v)
  346. 10
  347. >>> k,v = i.next()
  348. >>> int(k)
  349. 6
  350. >>> int(v)
  351. 3
  352. """
  353. self._next()
  354. return self.key(), self.value()
  355. def prev(self):
  356. self._prev()
  357. return self.key(), self.value()
  358. def __init__(self, d={} ):
  359. """Instantiate RBTree containing values from passed dict and
  360. ordered based on cmp.
  361. >>> m = CMultiMap()
  362. >>> len(m)
  363. 0
  364. >>> m[5]=2
  365. >>> len(m)
  366. 1
  367. >>> print m[5]
  368. [2]
  369. """
  370. self._smmap = mmap_constructor() # C++ mmap wrapped by swig.
  371. for key, value in d.items():
  372. self[key]=value
  373. self._iterators = WeakKeyDictionary()
  374. # whenever node is deleted. search iterators
  375. # for any iterator that becomes invalid.
  376. def __contains__(self,x):
  377. return self.has_key(x)
  378. def __iter__(self):
  379. """@return: KeyIterator positioned one before the beginning of the
  380. key ordering so that the first next() returns the first key."""
  381. return CMultiMap.KeyIterator(self)
  382. def begin(self):
  383. """Returns an iterator pointing at first key-value pair. This
  384. differs from iterkeys, itervalues, and iteritems which return an
  385. iterator pointing one before the first key-value pair.
  386. @return: key iterator to first key-value.
  387. >>> from CMultiMap import *
  388. >>> m = CMultiMap()
  389. >>> m[5.0] = 'a'
  390. >>> i = m.begin()
  391. >>> int(i.key()) # raises no IndexError.
  392. 5
  393. >>> i = m.iterkeys()
  394. >>> try:
  395. ... i.key()
  396. ... except IndexError:
  397. ... print 'IndexError raised'
  398. ...
  399. IndexError raised
  400. """
  401. i = CMultiMap.KeyIterator(self, mmap_begin(self._smmap) )
  402. return i
  403. def end(self):
  404. """Returns an iterator pointing after end of key ordering.
  405. The iterator's prev method will move to the last
  406. key-value pair in the ordering. This in keeping with
  407. the notion that a range is specified as [i,j) where
  408. j is not in the range, and the range [i,j) where i==j
  409. is an empty range.
  410. This operation takes O(1) time.
  411. @return: key iterator one after end.
  412. """
  413. i = CMultiMap.KeyIterator(self,None) # None means one after last node.
  414. return i
  415. def iterkeys(self):
  416. return CMultiMap.KeyIterator(self)
  417. def itervalues(self):
  418. return CMultiMap.ValueIterator(self)
  419. def iteritems(self):
  420. return CMultiMap.ItemIterator(self)
  421. def __len__(self):
  422. return mmap_size(self._smmap)
  423. def __str__(self):
  424. s = "{"
  425. first = True
  426. for k,v in self.items():
  427. if first:
  428. first = False
  429. else:
  430. s += ", "
  431. if type(v) == str:
  432. s += "%s: '%s'" % (k,v)
  433. else:
  434. s += "%s: %s" % (k,v)
  435. s += "}"
  436. return s
  437. def __repr__(self):
  438. return self.__str__()
  439. def __getitem__(self, key):
  440. """Returns a list containing all matching values or the empty list
  441. if the key is not found.
  442. This differs in behavior from CMap which simply returns the value or
  443. throws a KeyError if it is not present.
  444. """
  445. si = mmap_find_iiter(self._smmap,key) # raises KeyError if key not found
  446. result = []
  447. while not mmap_iiter_at_end(self._smmap, si) and iiter_key(si) == key:
  448. result.append( iiter_value(si) )
  449. iiter_incr(si)
  450. iiter_delete(si)
  451. return result
  452. def __setitem__(self, key, value):
  453. """
  454. >>> from CMultiMap import CMultiMap
  455. >>> m = CMultiMap()
  456. >>> m[6] = 'bar'
  457. >>> m[6]
  458. ['bar']
  459. >>>
  460. """
  461. assert type(key) == int or type(key) == float
  462. mmap_insert(self._smmap,key,value)
  463. def __delitem__(self, key):
  464. """Deletes all items with matching key from the mmap.
  465. This takes O(log n + km) where n is the number of elements
  466. in the mmap and k is the number of iterators pointing into the mmap,
  467. and m is the number of items matching the key.
  468. Before deleting ab item it linearly searches through
  469. all iterators pointing into the mmap and invalidates any that
  470. are pointing at the item about to be deleted.
  471. Raises a KeyError if the key is not found.
  472. >>> from CMultiMap import CMultiMap
  473. >>> m = CMultiMap()
  474. >>> m[12] = 'foo'
  475. >>> m[13] = 'bar'
  476. >>> m[14] = 'boo'
  477. >>> del m[12]
  478. >>> m[12]
  479. []
  480. >>> j = m.begin()
  481. >>> int(j.next())
  482. 14
  483. >>> i = m.begin()
  484. >>> i.value()
  485. 'bar'
  486. >>> del m[13] # delete object referenced by an iterator
  487. >>> try:
  488. ... i.value()
  489. ... except RuntimeError:
  490. ... print 'ok'
  491. ok
  492. >>> j.value() # deletion should not invalidate other iterators.
  493. 'boo'
  494. """
  495. si = sprev = None
  496. try:
  497. #mmap_erase( self._smmap, key ) # mmap_erase is dangerous. It could
  498. # delete the node causing an
  499. # iterator to become invalid. --Dave
  500. si = mmap_find_iiter( self._smmap, key ) # si = swig'd iterator.
  501. if mmap_iiter_at_end(self._smmap, si):
  502. raise KeyError(key)
  503. sprev = iiter_copy(si)
  504. # HERE this could be written more efficiently. --Dave.
  505. while not mmap_iiter_at_end(self._smmap, si) and \
  506. iiter_key(si) == key:
  507. for i in list(self._iterators):
  508. if iiter_cmp( self._smmap, i._si, si ) == 0:
  509. i._invalidate()
  510. iiter_incr(si)
  511. mmap_iiter_erase( self._smmap, sprev )
  512. iiter_assign(sprev, si)
  513. finally:
  514. if si:
  515. iiter_delete(si)
  516. if sprev:
  517. iiter_delete(sprev)
  518. def erase(self, iter):
  519. """Remove item pointed to by the iterator. All iterators that
  520. point at the erased item including the passed iterator
  521. are immediately invalidated after the deletion completes.
  522. >>> from CMultiMap import CMultiMap
  523. >>> m = CMultiMap()
  524. >>> m[12] = 'foo'
  525. >>> i = m.find(12)
  526. >>> m.erase(i)
  527. >>> len(m) == 0
  528. True
  529. """
  530. if not iter._si:
  531. raise RuntimeError( _("invalid iterator") )
  532. if iter._si == BEGIN:
  533. raise IndexError(_("Iterator does not point at key-value pair" ))
  534. if self is not iter._mmap:
  535. raise IndexError(_("Iterator points into a different CMultiMap."))
  536. if mmap_iiter_at_end(self._smmap, iter._si):
  537. raise IndexError( _("Cannot erase end() iterator.") )
  538. # invalidate iterators.
  539. for i in list(self._iterators):
  540. if iter._si is not i._si and iiter_cmp( self._smmap, iter._si, i._si ) == 0:
  541. i._invalidate()
  542. # remove item from the map.
  543. mmap_iiter_erase( self._smmap, iter._si )
  544. # invalidate last iterator pointing to the deleted location in the map.
  545. iter._invalidate()
  546. def __del__(self):
  547. # invalidate all iterators.
  548. for i in list(self._iterators):
  549. i._invalidate()
  550. mmap_delete(self._smmap)
  551. def get(self, key, default=None):
  552. """
  553. @return list containing values corresponding to specified key or
  554. return a single-element list containing 'default'
  555. if the key is not found. If 'default' is None then the
  556. empty list is returned when the key is not found.
  557. >>> from CMultiMap import *
  558. >>> m = CMultiMap()
  559. >>> m[5] = 'a'
  560. >>> m.get(5)
  561. ['a']
  562. >>> m[5] = 'b'
  563. >>> m.get(5)
  564. ['a', 'b']
  565. >>> m.get(6)
  566. []
  567. >>> m.get(6,'c')
  568. ['c']
  569. """
  570. if self.has_key(key):
  571. return self[key]
  572. if default is None:
  573. return []
  574. return [default]
  575. def keys(self):
  576. """
  577. >>> from CMultiMap import *
  578. >>> m = CMultiMap()
  579. >>> m[4.0] = 7
  580. >>> m[6.0] = 3
  581. >>> [int(x) for x in m.keys()] # m.keys() but guaranteed integers.
  582. [4, 6]
  583. """
  584. k = []
  585. for key in self:
  586. k.append(key)
  587. return k
  588. def values(self):
  589. """
  590. >>> from CMultiMap import CMultiMap
  591. >>> m = CMultiMap()
  592. >>> m[4.0] = 7
  593. >>> m[6.0] = 3
  594. >>> m.values()
  595. [7, 3]
  596. """
  597. i = self.itervalues()
  598. v = []
  599. try:
  600. while True:
  601. v.append(i.next())
  602. except StopIteration:
  603. pass
  604. return v
  605. def items(self):
  606. """
  607. >>> from CMultiMap import CMultiMap
  608. >>> m = CMultiMap()
  609. >>> m[4.0] = 7
  610. >>> m[6.0] = 3
  611. >>> [(int(x[0]),int(x[1])) for x in m.items()]
  612. [(4, 7), (6, 3)]
  613. """
  614. i = self.iteritems()
  615. itms = []
  616. try:
  617. while True:
  618. itms.append(i.next())
  619. except StopIteration:
  620. pass
  621. return itms
  622. def has_key(self, key):
  623. """
  624. >>> from CMultiMap import CMultiMap
  625. >>> m = CMultiMap()
  626. >>> m[4.0] = 7
  627. >>> if m.has_key(4): print 'ok'
  628. ...
  629. ok
  630. >>> if not m.has_key(7): print 'ok'
  631. ...
  632. ok
  633. """
  634. try:
  635. mmap_find(self._smmap, key)
  636. except KeyError:
  637. return False
  638. return True
  639. def clear(self):
  640. """delete all entries
  641. >>> from CMultiMap import CMultiMap
  642. >>> m = CMultiMap()
  643. >>> m[4] = 7
  644. >>> m.clear()
  645. >>> print len(m)
  646. 0
  647. """
  648. self.__del__()
  649. self._smmap = mmap_constructor()
  650. def copy(self):
  651. """return shallow copy"""
  652. return CMultiMap(self)
  653. def lower_bound(self,key):
  654. """
  655. Finds smallest key equal to or above the lower bound.
  656. Takes O(log n) time.
  657. @param x: Key of (key, value) pair to be located.
  658. @return: Key Iterator pointing to first item equal to or greater
  659. than key, or end() if no such item exists.
  660. >>> from CMultiMap import CMultiMap
  661. >>> m = CMultiMap()
  662. >>> m[10] = 'foo'
  663. >>> m[15] = 'bar'
  664. >>> i = m.lower_bound(11) # iterator.
  665. >>> int(i.key())
  666. 15
  667. >>> i.value()
  668. 'bar'
  669. Edge cases:
  670. >>> from CMultiMap import CMultiMap
  671. >>> m = CMultiMap()
  672. >>> i = m.lower_bound(11)
  673. >>> if i == m.end(): print 'ok'
  674. ...
  675. ok
  676. >>> m[10] = 'foo'
  677. >>> i = m.lower_bound(11)
  678. >>> if i == m.end(): print 'ok'
  679. ...
  680. ok
  681. >>> i = m.lower_bound(9)
  682. >>> if i == m.begin(): print 'ok'
  683. ...
  684. ok
  685. """
  686. return CMultiMap.KeyIterator(self, mmap_lower_bound( self._smmap, key ))
  687. def upper_bound(self, key):
  688. """
  689. Finds largest key equal to or below the upper bound. In keeping
  690. with the [begin,end) convention, the returned iterator
  691. actually points to the key one above the upper bound.
  692. Takes O(log n) time.
  693. @param x: Key of (key, value) pair to be located.
  694. @return: Iterator pointing to first element equal to or greater than
  695. key, or end() if no such item exists.
  696. >>> from CMultiMap import CMultiMap
  697. >>> m = CMultiMap()
  698. >>> m[10] = 'foo'
  699. >>> m[15] = 'bar'
  700. >>> m[17] = 'choo'
  701. >>> i = m.upper_bound(11) # iterator.
  702. >>> i.value()
  703. 'bar'
  704. Edge cases:
  705. >>> from CMultiMap import CMultiMap
  706. >>> m = CMultiMap()
  707. >>> i = m.upper_bound(11)
  708. >>> if i == m.end(): print 'ok'
  709. ...
  710. ok
  711. >>> m[10] = 'foo'
  712. >>> i = m.upper_bound(9)
  713. >>> i.value()
  714. 'foo'
  715. >>> i = m.upper_bound(11)
  716. >>> if i == m.end(): print 'ok'
  717. ...
  718. ok
  719. """
  720. return CMultiMap.KeyIterator(self, mmap_upper_bound( self._smmap, key ))
  721. def find(self,key):
  722. """
  723. Finds the first item with matching key and returns a KeyIterator
  724. pointing at the item. If no match is found then returns end().
  725. Takes O(log n) time.
  726. >>> from CMultiMap import CMultiMap
  727. >>> m = CMultiMap()
  728. >>> i = m.find(10)
  729. >>> if i == m.end(): print 'ok'
  730. ...
  731. ok
  732. >>> m[10] = 'foo'
  733. >>> i = m.find(10)
  734. >>> int(i.key())
  735. 10
  736. >>> i.value()
  737. 'foo'
  738. """
  739. return CMultiMap.KeyIterator(self, mmap_find_iiter( self._smmap, key ))
  740. def update_key( self, iter, key ):
  741. """
  742. Modifies the key of the item referenced by iter. If the
  743. key change is small enough that no reordering occurs then
  744. this takes amortized O(1) time. If a reordering occurs then
  745. this takes O(log n).
  746. WARNING!!! The passed iterator MUST be assumed to be invalid
  747. upon return. Any further operation on the passed iterator other than
  748. deallocation results in a RuntimeError exception.
  749. Typical use:
  750. >>> from CMultiMap import CMultiMap
  751. >>> m = CMultiMap()
  752. >>> m[10] = 'foo'
  753. >>> m[8] = 'bar'
  754. >>> i = m.find(10)
  755. >>> m.update_key(i,7) # i is assumed to be invalid upon return.
  756. >>> del i
  757. >>> [(int(x[0]),x[1]) for x in m.items()] # reordering occurred.
  758. [(7, 'foo'), (8, 'bar')]
  759. >>> i = m.find(8)
  760. >>> m.update_key(i,9) # no reordering.
  761. >>> del i
  762. >>> [(int(x[0]),x[1]) for x in m.items()]
  763. [(7, 'foo'), (9, 'bar')]
  764. Edge cases:
  765. >>> i = m.find(7)
  766. >>> i.value()
  767. 'foo'
  768. >>> m.update_key(i,9) # update to key already in the mmap.
  769. >>> m[7]
  770. []
  771. >>> m[9]
  772. ['foo', 'bar']
  773. >>> i = m.iterkeys()
  774. >>> try: # updating an iter pointing at BEGIN.
  775. ... m.update_key(i,10)
  776. ... except IndexError:
  777. ... print 'ok'
  778. ...
  779. ok
  780. >>> i = m.end()
  781. >>> try: # updating an iter pointing at end().
  782. ... m.update_key(i,10)
  783. ... except IndexError:
  784. ... print 'ok'
  785. ...
  786. ok
  787. """
  788. assert isinstance(iter,CMultiMap._AbstractIterator)
  789. if not iter._si:
  790. raise RuntimeError( _("invalid iterator") )
  791. if iter._si == BEGIN:
  792. raise IndexError(_("Iterator does not point at key-value pair" ))
  793. if self is not iter._mmap:
  794. raise IndexError(_("Iterator points into a different CMultiMap."))
  795. if mmap_iiter_at_end(self._smmap, iter._si):
  796. raise IndexError( _("Cannot erase end() iterator.") )
  797. mmap_iiter_update_key(self._smmap, iter._si, key)
  798. def append(self, key, value):
  799. """Performs an insertion with the hint that it probably should
  800. go at the end.
  801. Raises KeyError if the key is already in the mmap.
  802. >>> from CMultiMap import CMultiMap
  803. >>> m = CMultiMap()
  804. >>> m.append(5.0,'foo') # append to empty mmap.
  805. >>> len(m)
  806. 1
  807. >>> [int(x) for x in m.keys()] # see note (1)
  808. [5]
  809. >>> m.append(10.0, 'bar') # append in-order
  810. >>> [(int(x[0]),x[1]) for x in m.items()]
  811. [(5, 'foo'), (10, 'bar')]
  812. >>> m.append(3.0, 'coo') # out-of-order.
  813. >>> [(int(x[0]),x[1]) for x in m.items()]
  814. [(3, 'coo'), (5, 'foo'), (10, 'bar')]
  815. >>> m.append(10.0, 'blah') # append key already in mmap.
  816. >>> [(int(x[0]),x[1]) for x in m.items()]
  817. [(3, 'coo'), (5, 'foo'), (10, 'bar'), (10, 'blah')]
  818. >>>
  819. note (1): int(x[0]) is used because 5.0 can appear as either 5
  820. or 5.0 depending on the version of python.
  821. """
  822. mmap_append(self._smmap,key,value)
  823. class CIndexedMultiMap(CMultiMap):
  824. """This is an ordered mmapping, exactly like CMultiMap except that it
  825. provides a cross-index allowing average O(1) searches based on value.
  826. This adds the constraint that values must be unique (multiple equal
  827. keys can still be exist in the map).
  828. Item insertion: O(log n) append, __setitem__
  829. Item deletion: O(log n)
  830. Key search: O(log n) __getitem__, get, find, __contains__
  831. Value search: average O(1) as per dict
  832. Iteration step: amortized O(1), worst-case O(log n)
  833. Memory: O(n)
  834. The hash table increases the factor in the
  835. O(n) memory cost of the Map by a constant
  836. """
  837. def __init__(self, dict={} ):
  838. CMultiMap.__init__(self,dict)
  839. self._value_index = {} # cross-index. mmaps value->iterator.
  840. def __setitem__(self, key, value):
  841. """
  842. >>> from CMultiMap import *
  843. >>> m = CIndexedMultiMap()
  844. >>> m[6] = 'bar'
  845. >>> m[6]
  846. ['bar']
  847. >>> int(m.get_key_by_value('bar'))
  848. 6
  849. >>> try:
  850. ... m[7] = 'bar' # values must be unique!
  851. ... except ValueError:
  852. ... print 'value error'
  853. value error
  854. >>> m[6] = 'foo'
  855. >>> m[6]
  856. ['bar', 'foo']
  857. >>> try:
  858. ... m[7] = 'bar' # 2 values to 1 key. Values still unique!
  859. ... except ValueError:
  860. ... print 'value error'
  861. value error
  862. >>> m[7]
  863. []
  864. >>> int(m.get_key_by_value('bar'))
  865. 6
  866. """
  867. assert type(key) == int or type(key) == float
  868. if self._value_index.has_key(value) and \
  869. iiter_key(self._value_index[value]) != key:
  870. raise ValueError( _("Value %s already exists. Values must be "
  871. "unique.") % str(value) )
  872. si = mmap_insert_iiter(self._smmap,key,value) # si points where insert
  873. # should occur whether
  874. # insert succeeded or not.
  875. # si == "swig iterator"
  876. sival = iiter_value(si)
  877. if sival != value: # if insert failed because k already exists
  878. iiter_set(si,value) # then force set.
  879. self._value_index[value] = si
  880. viter = self._value_index[sival]
  881. iiter_delete(viter) # remove old value from index
  882. del self._value_index[sival]
  883. else: # else insert succeeded so update index.
  884. self._value_index[value] = si
  885. def __delitem__(self, key):
  886. """
  887. >>> from CMultiMap import CIndexedMultiMap
  888. >>> m = CIndexedMultiMap()
  889. >>> m[6] = 'bar'
  890. >>> m[6]
  891. ['bar']
  892. >>> int(m.get_key_by_value('bar'))
  893. 6
  894. >>> del m[6]
  895. >>> if m.get_key_by_value('bar'):
  896. ... print 'found'
  897. ... else:
  898. ... print 'not found.'
  899. not found.
  900. """
  901. i = mmap_find_iiter( self._smmap, key )
  902. if mmap_iiter_at_end( self._smmap, i ):
  903. iiter_delete(i)
  904. raise KeyError(key)
  905. else:
  906. value = iiter_value(i)
  907. for i in list(self._iterators):
  908. if iiter_cmp( self._smmap, i._si, iter._si ) == 0:
  909. i._invalidate()
  910. mmap_iiter_erase( self._smmap, i )
  911. viter = self._value_index[value]
  912. iiter_delete(i)
  913. iiter_delete( viter )
  914. del self._value_index[value]
  915. assert mmap_size(self._smmap) == len(self._value_index)
  916. def has_value(self, value):
  917. return self._value_index.has_key(value)
  918. def get_key_by_value(self, value):
  919. """Returns the key cross-indexed from the passed unique value, or
  920. returns None if the value is not in the mmap."""
  921. si = self._value_index.get(value) # si == "swig iterator"
  922. if si == None: return None
  923. return iiter_key(si)
  924. def append( self, key, value ):
  925. """See CMultiMap.append
  926. >>> from CMultiMap import CIndexedMultiMap
  927. >>> m = CIndexedMultiMap()
  928. >>> m.append(5,'foo')
  929. >>> [(int(x[0]),x[1]) for x in m.items()]
  930. [(5, 'foo')]
  931. >>> m.append(10, 'bar')
  932. >>> [(int(x[0]),x[1]) for x in m.items()]
  933. [(5, 'foo'), (10, 'bar')]
  934. >>> m.append(3, 'coo') # out-of-order.
  935. >>> [(int(x[0]),x[1]) for x in m.items()]
  936. [(3, 'coo'), (5, 'foo'), (10, 'bar')]
  937. >>> int(m.get_key_by_value( 'bar' ))
  938. 10
  939. >>> m.append(10, 'blah') # append key already in mmap.
  940. >>> [(int(x[0]),x[1]) for x in m.items()]
  941. [(3, 'coo'), (5, 'foo'), (10, 'bar'), (10, 'blah')]
  942. >>> try:
  943. ... m.append(10, 'coo') # append value already in mmap.
  944. ... except ValueError:
  945. ... print 'ok'
  946. ...
  947. ok
  948. """
  949. if self._value_index.has_key(value) and \
  950. iiter_key(self._value_index[value]) != key:
  951. raise ValueError(_("Value %s already exists and value must be "
  952. "unique.") % str(value) )
  953. si = mmap_append_iiter(self._smmap,key,value)
  954. if iiter_value(si) != value:
  955. iiter_delete(si)
  956. raise KeyError(key)
  957. self._value_index[value] = si
  958. def find_key_by_value(self, value):
  959. """Returns a key iterator cross-indexed from the passed unique value
  960. or end() if no value found.
  961. >>> from CMultiMap import CIndexedMultiMap
  962. >>> m = CIndexedMultiMap()
  963. >>> m[6] = 'abc'
  964. >>> i = m.find_key_by_value('abc')
  965. >>> int(i.key())
  966. 6
  967. >>> i = m.find_key_by_value('xyz')
  968. >>> if i == m.end(): print 'i points at end()'
  969. i points at end()
  970. """
  971. si = self._value_index.get(value) # si == "swig iterator."
  972. if si != None:
  973. si = iiter_copy(si); # copy else operations like increment on the
  974. # KeyIterator would modify the value index.
  975. return CMultiMap.KeyIterator(self,si)
  976. def copy(self):
  977. """return shallow copy"""
  978. return CIndexedMultiMap(self)
  979. def update_key( self, iter, key ):
  980. """
  981. see CMultiMap.update_key.
  982. WARNING!! You MUST assume that the passed iterator is invalidated
  983. upon return.
  984. Typical use:
  985. >>> from CMultiMap import CIndexedMultiMap
  986. >>> m = CIndexedMultiMap()
  987. >>> m[10] = 'foo'
  988. >>> m[8] = 'bar'
  989. >>> i = m.find(10)
  990. >>> m.update_key(i,7) # i is assumed to be invalid upon return.
  991. >>> del i
  992. >>> int(m.get_key_by_value('foo'))
  993. 7
  994. >>> [(int(x[0]),x[1]) for x in m.items()] # reordering occurred.
  995. [(7, 'foo'), (8, 'bar')]
  996. >>> i = m.find(8)
  997. >>> m.update_key(i,9) # no reordering.
  998. >>> del i
  999. >>> [(int(x[0]),x[1]) for x in m.items()]
  1000. [(7, 'foo'), (9, 'bar')]
  1001. Edge cases:
  1002. >>> i = m.find(7)
  1003. >>> i.value()
  1004. 'foo'
  1005. >>> m.update_key(i,9)
  1006. >>> m[7]
  1007. []
  1008. >>> m[9]
  1009. ['foo', 'bar']
  1010. >>> int(m.get_key_by_value('foo'))
  1011. 9
  1012. >>> i = m.iterkeys()
  1013. >>> try: # updating an iter pointing at BEGIN.
  1014. ... m.update_key(i,10)
  1015. ... except IndexError:
  1016. ... print 'ok'
  1017. ...
  1018. ok
  1019. >>> i = m.end()
  1020. >>> try: # updating an iter pointing at end().
  1021. ... m.update_key(i,10)
  1022. ... except IndexError:
  1023. ... print 'ok'
  1024. ...
  1025. ok
  1026. """
  1027. if not iter._si:
  1028. raise RuntimeError( _("invalid iterator") )
  1029. if iter._si == BEGIN:
  1030. raise IndexError(_("Iterator does not point at key-value pair" ))
  1031. if self is not iter._mmap:
  1032. raise IndexError(_("Iterator points into a different "
  1033. "CIndexedMultiMap."))
  1034. if mmap_iiter_at_end(self._smmap, iter._si):
  1035. raise IndexError( _("Cannot update end() iterator.") )
  1036. si = mmap_iiter_update_key_iiter(self._smmap, iter._si, key)
  1037. # raises KeyError if key already in mmap.
  1038. if si != iter._si: # if mmap is reordered...
  1039. value = iter.value();
  1040. val_si = self._value_index[value]
  1041. iiter_delete(val_si)
  1042. self._value_index[value] = si
  1043. def erase(self, iter):
  1044. """Remove item pointed to by the iterator. Iterator is immediately
  1045. invalidated after the deletion completes."""
  1046. value = iter.value()
  1047. CMultiMap.erase(self,iter)
  1048. del self._value_index[value]
  1049. if __name__ == "__main__":
  1050. import sys, doctest
  1051. ############
  1052. # UNIT TESTS
  1053. print "Testing module"
  1054. doctest.testmod(sys.modules[__name__])
  1055. if LEAK_TEST:
  1056. # Now test for memory leaks.
  1057. print "testing for memory leaks. Loop at top to see if process memory allocation grows."
  1058. print "CTRL-C to stop test."
  1059. # Run > top
  1060. # Does memory consumption for the process continuously increase? Yes == leak.
  1061. m = CMultiMap()
  1062. # insert and delete repeatedly.
  1063. i = 0
  1064. import gc
  1065. class X:
  1066. x = range(1000) # something moderately big.
  1067. #while True:
  1068. # i += 1
  1069. # mmap_insert(m._smmap,10,X())
  1070. # it = mmap_find_iiter( m._smmap, 10 )
  1071. # mmap_iiter_erase( m._smmap, it )
  1072. # iiter_delete(it)
  1073. # assert len(m) == 0
  1074. # assert mmap_size(m._smmap) == 0
  1075. # if i % 100 == 0:
  1076. # gc.collect()
  1077. while True:
  1078. i += 1
  1079. m[10] = X()
  1080. del m[10]
  1081. assert len(m) == 0
  1082. if i % 100 == 0:
  1083. gc.collect()