cache_map.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370
  1. # The contents of this file are subject to the BitTorrent Open Source License
  2. # Version 1.1 (the License). You may not copy or use this file, in either
  3. # source code or executable form, except in compliance with the License. You
  4. # may obtain a copy of the License at http://www.bittorrent.com/license/.
  5. #
  6. # Software distributed under the License is distributed on an AS IS basis,
  7. # WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  8. # for the specific language governing rights and limitations under the
  9. # License.
  10. #
  11. # author: David Harrison
  12. if __name__ == "__main__":
  13. import sys
  14. sys.path.append("..")
  15. import sys #DEBUG
  16. from BTL.platform import bttime as time
  17. from BTL.reactor_magic import reactor
  18. from BTL.Map import IndexedMultiMap
  19. #from refcnt import getrc
  20. RECENT_SIZE = 10 # number of items in the recently accessed set.
  21. LEAK_TEST = False
  22. class CacheMap:
  23. """this cache class allows caching with arbitrary expiration times. This is different
  24. from BTL.Cache which assumes all items placed in the cache remain valid for the same
  25. duration.
  26. Like a BTL.Map, there can only be one instance of any given key in
  27. the cache at a time. Subsequent inserts (__setitem__ or set)
  28. with the same key, will update the ttl and value for that
  29. key.
  30. Unlike a BTL.Map, CacheMap does not perform in-order iteration based on key, and
  31. key lookups (__getitem__) take average O(1) time.
  32. The map also has the option to have bounded size in which case it imposes the
  33. following replacement algorithm: remove the oldest entries first unless
  34. those entries are in the recent access set. Here 'old' refers to duration
  35. in the cache. Recent set has bounded size.
  36. """
  37. # BTL.Cache places the cache entries in a queue. We instead maintain an
  38. # IndexedMultiMap ordered based on expiration times. The index allows nodes in the
  39. # map to be looked up in O(1) time based on value.
  40. def __init__(self, default_ttl = None, expire_interval = 60, touch_on_access = False,
  41. max_items = None, recent_items = RECENT_SIZE ):
  42. """
  43. @param default_ttl: time to live when using __setitem__ rather than set.
  44. @param expire_interval: time between removals of expired items in seconds. Otherwise,
  45. expired items are removed lazily.
  46. @param touch_on_access: refresh item expire time by ttl when item is accessed.
  47. @param max_items: maximum size of cache. (see replacement algorithm above)
  48. """
  49. self._exp = IndexedMultiMap() # expiration times. Multiple items can have the same expiration
  50. # times, but there can only be one instance of any one key
  51. # in the CacheMap.
  52. self._data = {}
  53. self._ttl = default_ttl
  54. self._touch = touch_on_access
  55. self._max_items = max_items
  56. self._expire_interval = expire_interval
  57. if max_items is not None:
  58. self._recent = _BoundedCacheSet(int(min(recent_items,max_items)))
  59. else:
  60. self._recent = None
  61. reactor.callLater(self._expire_interval, self._expire)
  62. def __getitem__(self, key): # O(1) if not touch and not newly expired, else O(log n)
  63. """Raises KeyError if the key is not in the cache.
  64. This can happen if the entry was deleted or expired."""
  65. ttl,v = self._data[key] # ttl is duration, not absolute time.
  66. i = self._exp.find_key_by_value(key) # O(1). Key in exp is time. 'key' variable
  67. # is exp's value. :-)
  68. if i.at_end():
  69. raise KeyError()
  70. t = time()
  71. if i.key() < t: # expired.
  72. del self[key] # O(log n)
  73. raise KeyError()
  74. if self._recent:
  75. self._recent.add(key)
  76. if self._touch:
  77. self._exp.update_key(i,t+ttl) # O(1) if no reordering else O(log n)
  78. return v
  79. def __setitem__(self, key, value): # O(log n). actually O(log n + RECENT_SIZE)
  80. assert self._ttl > 0, "no default TTL defined. Perhaps the caller should call set " \
  81. "rather than __setitem__."
  82. t = time()
  83. if self._data.has_key(key):
  84. ttl,_ = self._data[key]
  85. else:
  86. ttl = self._ttl
  87. self.set(key,value,ttl)
  88. if self._recent:
  89. self._recent.add(key)
  90. # perform cache replacement if necessary.
  91. if self._max_items is not None and len(self._data) > self._max_items:
  92. to_remove = []
  93. for t,k in self._exp.iteritems():
  94. # worst case is O(RECENT_SIZE), but it is highly unlikely
  95. # that all members of the recent access set are the oldest
  96. # in the cache.
  97. if k not in self._recent:
  98. to_remove.append(k)
  99. if len(to_remove) >= len(self._data) - self._max_items:
  100. break
  101. for k in to_remove:
  102. del self[k]
  103. def set(self, key, value, ttl):
  104. """Set using non-default TTL. ttl is a duration, not an absolute
  105. time."""
  106. t = time()
  107. self._data[key] = (ttl, value)
  108. i = self._exp.find_key_by_value(key)
  109. if i.at_end():
  110. self._exp[t+ttl] = key
  111. else:
  112. assert i.value() == key
  113. self._exp.update_key(i,t+ttl)
  114. def __delitem__(self, key): # O(log n)
  115. del self._data[key]
  116. i = self._exp.find_key_by_value(key)
  117. if not i.at_end(): # No KeyError is generated if item is not in
  118. # Cache because it could've been expired.
  119. self._exp.erase(i)
  120. def __len__(self):
  121. """Returns number of entries in the cache. Includes any
  122. expired entries that haven't been removed yet.
  123. Takes O(1) time."""
  124. return len(self._data)
  125. def num_unexpired(self):
  126. """Returns number of unexpired entries in the cache.
  127. Any expired entries are removed before computing the length.
  128. Takes worst case O(n) time where n = the number of expired
  129. entries in the cache when this is called."""
  130. self._expire2()
  131. return len(self._data)
  132. def has_key(self, key):
  133. return self._data.has_key(key)
  134. def __contains__(self, key):
  135. return self._data.has_key(key)
  136. def keys(self):
  137. return self._data.keys()
  138. def _expire(self):
  139. self._expire2()
  140. reactor.callLater(self._expire_interval, self._expire)
  141. def _expire2(self):
  142. t = time()
  143. #try:
  144. while True:
  145. i = self._exp.begin()
  146. if i.at_end():
  147. break
  148. if i.key() < t:
  149. key = i.value()
  150. self._exp.erase(i)
  151. del self._data[key]
  152. else:
  153. break
  154. assert len(self._data) == len(self._exp)
  155. #except:
  156. # pass # for example if an iterator is invalidated
  157. # while expiring.
  158. class _BoundedCacheSet:
  159. # implements LRU. I could've implemented this using
  160. # a set and then removed a random item from the set whenever the set
  161. # got too large. Hmmm...
  162. def __init__(self, max_items):
  163. assert max_items > 1
  164. self._max_items = max_items
  165. self._data = IndexedMultiMap() # recent accesses.
  166. def add(self, key): # O(log n)
  167. i = self._data.find_key_by_value(key)
  168. t = time()
  169. if i.at_end():
  170. self._data[t] = key
  171. else:
  172. self._data.update_key(i,t)
  173. while len(self._data) > self._max_items:
  174. j = self._data.begin()
  175. assert not j.at_end()
  176. self._data.erase(j)
  177. def __contains__(self, key):
  178. i = self._data.find_key_by_value(key)
  179. return not i.at_end()
  180. def remove(self, key):
  181. i = self._data.find_key_by_value(key)
  182. if i.at_end():
  183. raise KeyError()
  184. self._data.erase(i)
  185. def __str__(self):
  186. return str(self._data)
  187. if __name__ == "__main__":
  188. from defer import Deferred
  189. import random
  190. from yielddefer import launch_coroutine, wrap_task
  191. def coro(f, *args, **kwargs):
  192. return launch_coroutine(wrap_task(reactor.callLater), f, *args, **kwargs)
  193. def run():
  194. coro(_run)
  195. def _run():
  196. TTL = 1
  197. SET_TTL = 2 # TTL used when explicitly setting TTL using "def set."
  198. EXPIRE_INTERVAL = .3
  199. EPSILON = .5
  200. ###
  201. # BoundedCacheSet correctness tests.
  202. c = _BoundedCacheSet(2)
  203. c.add(10)
  204. assert 10 in c
  205. c.add(15)
  206. assert 15 in c
  207. c.add(16)
  208. assert 16 in c
  209. assert 10 not in c
  210. assert 15 in c
  211. c.remove(15)
  212. assert 15 not in c
  213. try:
  214. c.remove(23)
  215. assert False
  216. except KeyError:
  217. pass
  218. ###
  219. # basic CacheMap correctness tests.
  220. c = CacheMap(default_ttl=TTL,expire_interval=EPSILON)
  221. class K(object):
  222. def __init__(self):
  223. self.x = range(10000)
  224. class V(object):
  225. def __init__(self):
  226. self.x = range(10000)
  227. k = K()
  228. v = V()
  229. t = time()
  230. c.set(k, v, SET_TTL)
  231. assert len(c) == 1
  232. assert c.num_unexpired() == 1
  233. assert c._exp.begin().key() < t + SET_TTL + EPSILON and \
  234. c._exp.begin().key() > t + SET_TTL - EPSILON, \
  235. "First item in c._exp should have expiration time that is close to the " \
  236. "current time + SET_TTL which is %s, but the expiration time is %s." \
  237. % (t+SET_TTL, c._exp.begin().key())
  238. assert c.has_key(k)
  239. assert not c.has_key( "blah" )
  240. assert c[k] == v
  241. c._expire2() # should not expire anything because little time has passed.
  242. assert len(c) == 1
  243. assert c.num_unexpired() == 1
  244. try:
  245. y = c[10]
  246. assert False, "should've raised KeyError."
  247. except KeyError:
  248. pass
  249. v2 = V()
  250. c[k] = v2
  251. assert c._exp.begin().key() < t + SET_TTL + EPSILON and \
  252. c._exp.begin().key() > t + SET_TTL - EPSILON, \
  253. "First item in c._exp should have expiration time that is close to the " \
  254. "current time + SET_TTL, but the expiration time is %s." % c._exp.begin().key()
  255. assert not c[k] == v
  256. assert c[k] == v2
  257. assert len(c) == 1
  258. assert c.num_unexpired() == 1
  259. k2 = K()
  260. t = time()
  261. c[k2] = v2
  262. assert c._exp.begin().key() < t + TTL + EPSILON and \
  263. c._exp.begin().key() > t + TTL - EPSILON, \
  264. "First item in c._exp should have expiration time that is close to the " \
  265. "current time + TTL, but the expiration time is %s." % c._exp.begin().key()
  266. assert c[k2] == v2
  267. assert not c[k] == v # shouldn't be a problem with two items having the same value.
  268. assert len(c) == 2
  269. assert c.num_unexpired() == 2
  270. # wait long enough for the cache entries to expire.
  271. df = Deferred()
  272. reactor.callLater(SET_TTL+EPSILON, df.callback, None)
  273. yield df
  274. df.getResult()
  275. assert c.num_unexpired() == 0, "Should have expired all entries, but there are %d " \
  276. "unexpired items and %d items in c._data. " % (c.num_unexpired(), len(c._data))
  277. assert len(c) == 0
  278. assert len(c._exp) == 0
  279. assert len(c._data) == 0
  280. assert k not in c
  281. assert k2 not in c
  282. # basic correctness of bounded-size cache map.
  283. c = CacheMap(default_ttl=TTL,expire_interval=1000,max_items = 2)
  284. c[k] = v
  285. assert len(c) == 1
  286. assert c[k] == v
  287. c[k2] = v2
  288. assert len(c) == 2
  289. assert c[k2] == v2
  290. c[10] = 15
  291. assert len(c) == 2
  292. assert c[10] == 15
  293. assert c[k2] == v2 # order from most recent access is now [(k2,v2), (10,15), (k,v)].
  294. try:
  295. a = c[k]
  296. assert False, "when cache with size bound of 2 exceeded 2 elements, " \
  297. "the oldest should've been removed."
  298. except KeyError:
  299. pass
  300. c[56] = 1 # order from most recent access ...
  301. assert len(c) == 2
  302. assert 56 in c
  303. assert 10 not in c
  304. ###
  305. # test expirations and for memory leaks.
  306. # Watch memory consumption (e.g., using top) and see if it grows.
  307. if LEAK_TEST:
  308. c = CacheMap(default_ttl=TTL,expire_interval=EPSILON)
  309. i = 0
  310. while True:
  311. for x in xrange(100):
  312. i += 1
  313. if i % 20 == 0:
  314. print len(c)
  315. c[i] = K()
  316. if i % 5 == 0:
  317. try:
  318. l = len(c)
  319. del c[i]
  320. assert len(c) == l-1
  321. except KeyError:
  322. pass
  323. # allow time for expirations.
  324. df = Deferred()
  325. reactor.callLater(TTL+EPSILON,df.callback,None)
  326. yield df
  327. df.getResult()
  328. reactor.callLater(0,run)
  329. reactor.run()