cache.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714
  1. # -*- coding: utf-8 -*-
  2. """
  3. werkzeug.contrib.cache
  4. ~~~~~~~~~~~~~~~~~~~~~~
  5. The main problem with dynamic Web sites is, well, they're dynamic. Each
  6. time a user requests a page, the webserver executes a lot of code, queries
  7. the database, renders templates until the visitor gets the page he sees.
  8. This is a lot more expensive than just loading a file from the file system
  9. and sending it to the visitor.
  10. For most Web applications, this overhead isn't a big deal but once it
  11. becomes, you will be glad to have a cache system in place.
  12. How Caching Works
  13. =================
  14. Caching is pretty simple. Basically you have a cache object lurking around
  15. somewhere that is connected to a remote cache or the file system or
  16. something else. When the request comes in you check if the current page
  17. is already in the cache and if so, you're returning it from the cache.
  18. Otherwise you generate the page and put it into the cache. (Or a fragment
  19. of the page, you don't have to cache the full thing)
  20. Here is a simple example of how to cache a sidebar for a template::
  21. def get_sidebar(user):
  22. identifier = 'sidebar_for/user%d' % user.id
  23. value = cache.get(identifier)
  24. if value is not None:
  25. return value
  26. value = generate_sidebar_for(user=user)
  27. cache.set(identifier, value, timeout=60 * 5)
  28. return value
  29. Creating a Cache Object
  30. =======================
  31. To create a cache object you just import the cache system of your choice
  32. from the cache module and instantiate it. Then you can start working
  33. with that object:
  34. >>> from werkzeug.contrib.cache import SimpleCache
  35. >>> c = SimpleCache()
  36. >>> c.set("foo", "value")
  37. >>> c.get("foo")
  38. 'value'
  39. >>> c.get("missing") is None
  40. True
  41. Please keep in mind that you have to create the cache and put it somewhere
  42. you have access to it (either as a module global you can import or you just
  43. put it into your WSGI application).
  44. :copyright: (c) 2014 by the Werkzeug Team, see AUTHORS for more details.
  45. :license: BSD, see LICENSE for more details.
  46. """
  47. import os
  48. import re
  49. import errno
  50. import tempfile
  51. from hashlib import md5
  52. from time import time
  53. try:
  54. import cPickle as pickle
  55. except ImportError: # pragma: no cover
  56. import pickle
  57. from werkzeug._compat import iteritems, string_types, text_type, \
  58. integer_types, to_native
  59. from werkzeug.posixemulation import rename
  60. def _items(mappingorseq):
  61. """Wrapper for efficient iteration over mappings represented by dicts
  62. or sequences::
  63. >>> for k, v in _items((i, i*i) for i in xrange(5)):
  64. ... assert k*k == v
  65. >>> for k, v in _items(dict((i, i*i) for i in xrange(5))):
  66. ... assert k*k == v
  67. """
  68. if hasattr(mappingorseq, 'items'):
  69. return iteritems(mappingorseq)
  70. return mappingorseq
  71. class BaseCache(object):
  72. """Baseclass for the cache systems. All the cache systems implement this
  73. API or a superset of it.
  74. :param default_timeout: the default timeout (in seconds) that is used if no
  75. timeout is specified on :meth:`set`.
  76. """
  77. def __init__(self, default_timeout=300):
  78. self.default_timeout = default_timeout
  79. def get(self, key):
  80. """Look up key in the cache and return the value for it.
  81. :param key: the key to be looked up.
  82. :returns: The value if it exists and is readable, else ``None``.
  83. """
  84. return None
  85. def delete(self, key):
  86. """Delete `key` from the cache.
  87. :param key: the key to delete.
  88. :returns: Whether the key existed and has been deleted.
  89. :rtype: boolean
  90. """
  91. return True
  92. def get_many(self, *keys):
  93. """Returns a list of values for the given keys.
  94. For each key a item in the list is created::
  95. foo, bar = cache.get_many("foo", "bar")
  96. Has the same error handling as :meth:`get`.
  97. :param keys: The function accepts multiple keys as positional
  98. arguments.
  99. """
  100. return map(self.get, keys)
  101. def get_dict(self, *keys):
  102. """Like :meth:`get_many` but return a dict::
  103. d = cache.get_dict("foo", "bar")
  104. foo = d["foo"]
  105. bar = d["bar"]
  106. :param keys: The function accepts multiple keys as positional
  107. arguments.
  108. """
  109. return dict(zip(keys, self.get_many(*keys)))
  110. def set(self, key, value, timeout=None):
  111. """Add a new key/value to the cache (overwrites value, if key already
  112. exists in the cache).
  113. :param key: the key to set
  114. :param value: the value for the key
  115. :param timeout: the cache timeout for the key (if not specified,
  116. it uses the default timeout).
  117. :returns: ``True`` if key has been updated, ``False`` for backend
  118. errors. Pickling errors, however, will raise a subclass of
  119. ``pickle.PickleError``.
  120. :rtype: boolean
  121. """
  122. return True
  123. def add(self, key, value, timeout=None):
  124. """Works like :meth:`set` but does not overwrite the values of already
  125. existing keys.
  126. :param key: the key to set
  127. :param value: the value for the key
  128. :param timeout: the cache timeout for the key or the default
  129. timeout if not specified.
  130. :returns: Same as :meth:`set`, but also ``False`` for already
  131. existing keys.
  132. :rtype: boolean
  133. """
  134. return True
  135. def set_many(self, mapping, timeout=None):
  136. """Sets multiple keys and values from a mapping.
  137. :param mapping: a mapping with the keys/values to set.
  138. :param timeout: the cache timeout for the key (if not specified,
  139. it uses the default timeout).
  140. :returns: Whether all given keys have been set.
  141. :rtype: boolean
  142. """
  143. rv = True
  144. for key, value in _items(mapping):
  145. if not self.set(key, value, timeout):
  146. rv = False
  147. return rv
  148. def delete_many(self, *keys):
  149. """Deletes multiple keys at once.
  150. :param keys: The function accepts multiple keys as positional
  151. arguments.
  152. :returns: Whether all given keys have been deleted.
  153. :rtype: boolean
  154. """
  155. return all(self.delete(key) for key in keys)
  156. def clear(self):
  157. """Clears the cache. Keep in mind that not all caches support
  158. completely clearing the cache.
  159. :returns: Whether the cache has been cleared.
  160. :rtype: boolean
  161. """
  162. return True
  163. def inc(self, key, delta=1):
  164. """Increments the value of a key by `delta`. If the key does
  165. not yet exist it is initialized with `delta`.
  166. For supporting caches this is an atomic operation.
  167. :param key: the key to increment.
  168. :param delta: the delta to add.
  169. :returns: The new value or ``None`` for backend errors.
  170. """
  171. value = (self.get(key) or 0) + delta
  172. return value if self.set(key, value) else None
  173. def dec(self, key, delta=1):
  174. """Decrements the value of a key by `delta`. If the key does
  175. not yet exist it is initialized with `-delta`.
  176. For supporting caches this is an atomic operation.
  177. :param key: the key to increment.
  178. :param delta: the delta to subtract.
  179. :returns: The new value or `None` for backend errors.
  180. """
  181. value = (self.get(key) or 0) - delta
  182. return value if self.set(key, value) else None
  183. class NullCache(BaseCache):
  184. """A cache that doesn't cache. This can be useful for unit testing.
  185. :param default_timeout: a dummy parameter that is ignored but exists
  186. for API compatibility with other caches.
  187. """
  188. class SimpleCache(BaseCache):
  189. """Simple memory cache for single process environments. This class exists
  190. mainly for the development server and is not 100% thread safe. It tries
  191. to use as many atomic operations as possible and no locks for simplicity
  192. but it could happen under heavy load that keys are added multiple times.
  193. :param threshold: the maximum number of items the cache stores before
  194. it starts deleting some.
  195. :param default_timeout: the default timeout that is used if no timeout is
  196. specified on :meth:`~BaseCache.set`.
  197. """
  198. def __init__(self, threshold=500, default_timeout=300):
  199. BaseCache.__init__(self, default_timeout)
  200. self._cache = {}
  201. self.clear = self._cache.clear
  202. self._threshold = threshold
  203. def _prune(self):
  204. if len(self._cache) > self._threshold:
  205. now = time()
  206. toremove = []
  207. for idx, (key, (expires, _)) in enumerate(self._cache.items()):
  208. if expires <= now or idx % 3 == 0:
  209. toremove.append(key)
  210. for key in toremove:
  211. self._cache.pop(key, None)
  212. def get(self, key):
  213. try:
  214. expires, value = self._cache[key]
  215. if expires > time():
  216. return pickle.loads(value)
  217. except (KeyError, pickle.PickleError):
  218. return None
  219. def set(self, key, value, timeout=None):
  220. if timeout is None:
  221. timeout = self.default_timeout
  222. self._prune()
  223. self._cache[key] = (time() + timeout, pickle.dumps(value,
  224. pickle.HIGHEST_PROTOCOL))
  225. return True
  226. def add(self, key, value, timeout=None):
  227. if timeout is None:
  228. timeout = self.default_timeout
  229. self._prune()
  230. item = (time() + timeout, pickle.dumps(value,
  231. pickle.HIGHEST_PROTOCOL))
  232. if key in self._cache:
  233. return False
  234. self._cache.setdefault(key, item)
  235. return True
  236. def delete(self, key):
  237. return self._cache.pop(key, None) is not None
  238. _test_memcached_key = re.compile(r'[^\x00-\x21\xff]{1,250}$').match
  239. class MemcachedCache(BaseCache):
  240. """A cache that uses memcached as backend.
  241. The first argument can either be an object that resembles the API of a
  242. :class:`memcache.Client` or a tuple/list of server addresses. In the
  243. event that a tuple/list is passed, Werkzeug tries to import the best
  244. available memcache library.
  245. This cache looks into the following packages/modules to find bindings for
  246. memcached:
  247. - ``pylibmc``
  248. - ``google.appengine.api.memcached``
  249. - ``memcached``
  250. Implementation notes: This cache backend works around some limitations in
  251. memcached to simplify the interface. For example unicode keys are encoded
  252. to utf-8 on the fly. Methods such as :meth:`~BaseCache.get_dict` return
  253. the keys in the same format as passed. Furthermore all get methods
  254. silently ignore key errors to not cause problems when untrusted user data
  255. is passed to the get methods which is often the case in web applications.
  256. :param servers: a list or tuple of server addresses or alternatively
  257. a :class:`memcache.Client` or a compatible client.
  258. :param default_timeout: the default timeout that is used if no timeout is
  259. specified on :meth:`~BaseCache.set`.
  260. :param key_prefix: a prefix that is added before all keys. This makes it
  261. possible to use the same memcached server for different
  262. applications. Keep in mind that
  263. :meth:`~BaseCache.clear` will also clear keys with a
  264. different prefix.
  265. """
  266. def __init__(self, servers=None, default_timeout=300, key_prefix=None):
  267. BaseCache.__init__(self, default_timeout)
  268. if servers is None or isinstance(servers, (list, tuple)):
  269. if servers is None:
  270. servers = ['127.0.0.1:11211']
  271. self._client = self.import_preferred_memcache_lib(servers)
  272. if self._client is None:
  273. raise RuntimeError('no memcache module found')
  274. else:
  275. # NOTE: servers is actually an already initialized memcache
  276. # client.
  277. self._client = servers
  278. self.key_prefix = to_native(key_prefix)
  279. def _normalize_key(self, key):
  280. key = to_native(key, 'utf-8')
  281. if self.key_prefix:
  282. key = self.key_prefix + key
  283. return key
  284. def _normalize_timeout(self, timeout):
  285. return int(time()) + timeout
  286. def get(self, key):
  287. key = self._normalize_key(key)
  288. # memcached doesn't support keys longer than that. Because often
  289. # checks for so long keys can occur because it's tested from user
  290. # submitted data etc we fail silently for getting.
  291. if _test_memcached_key(key):
  292. return self._client.get(key)
  293. def get_dict(self, *keys):
  294. key_mapping = {}
  295. have_encoded_keys = False
  296. for key in keys:
  297. encoded_key = self._normalize_key(key)
  298. if not isinstance(key, str):
  299. have_encoded_keys = True
  300. if _test_memcached_key(key):
  301. key_mapping[encoded_key] = key
  302. d = rv = self._client.get_multi(key_mapping.keys())
  303. if have_encoded_keys or self.key_prefix:
  304. rv = {}
  305. for key, value in iteritems(d):
  306. rv[key_mapping[key]] = value
  307. if len(rv) < len(keys):
  308. for key in keys:
  309. if key not in rv:
  310. rv[key] = None
  311. return rv
  312. def add(self, key, value, timeout=None):
  313. if timeout is None:
  314. timeout = self.default_timeout
  315. key = self._normalize_key(key)
  316. timeout = self._normalize_timeout(timeout)
  317. return self._client.add(key, value, timeout)
  318. def set(self, key, value, timeout=None):
  319. if timeout is None:
  320. timeout = self.default_timeout
  321. key = self._normalize_key(key)
  322. timeout = self._normalize_timeout(timeout)
  323. return self._client.set(key, value, timeout)
  324. def get_many(self, *keys):
  325. d = self.get_dict(*keys)
  326. return [d[key] for key in keys]
  327. def set_many(self, mapping, timeout=None):
  328. if timeout is None:
  329. timeout = self.default_timeout
  330. new_mapping = {}
  331. for key, value in _items(mapping):
  332. key = self._normalize_key(key)
  333. new_mapping[key] = value
  334. timeout = self._normalize_timeout(timeout)
  335. failed_keys = self._client.set_multi(new_mapping, timeout)
  336. return not failed_keys
  337. def delete(self, key):
  338. key = self._normalize_key(key)
  339. if _test_memcached_key(key):
  340. return self._client.delete(key)
  341. def delete_many(self, *keys):
  342. new_keys = []
  343. for key in keys:
  344. key = self._normalize_key(key)
  345. if _test_memcached_key(key):
  346. new_keys.append(key)
  347. return self._client.delete_multi(new_keys)
  348. def clear(self):
  349. return self._client.flush_all()
  350. def inc(self, key, delta=1):
  351. key = self._normalize_key(key)
  352. return self._client.incr(key, delta)
  353. def dec(self, key, delta=1):
  354. key = self._normalize_key(key)
  355. return self._client.decr(key, delta)
  356. def import_preferred_memcache_lib(self, servers):
  357. """Returns an initialized memcache client. Used by the constructor."""
  358. try:
  359. import pylibmc
  360. except ImportError:
  361. pass
  362. else:
  363. return pylibmc.Client(servers)
  364. try:
  365. from google.appengine.api import memcache
  366. except ImportError:
  367. pass
  368. else:
  369. return memcache.Client()
  370. try:
  371. import memcache
  372. except ImportError:
  373. pass
  374. else:
  375. return memcache.Client(servers)
  376. # backwards compatibility
  377. GAEMemcachedCache = MemcachedCache
  378. class RedisCache(BaseCache):
  379. """Uses the Redis key-value store as a cache backend.
  380. The first argument can be either a string denoting address of the Redis
  381. server or an object resembling an instance of a redis.Redis class.
  382. Note: Python Redis API already takes care of encoding unicode strings on
  383. the fly.
  384. .. versionadded:: 0.7
  385. .. versionadded:: 0.8
  386. `key_prefix` was added.
  387. .. versionchanged:: 0.8
  388. This cache backend now properly serializes objects.
  389. .. versionchanged:: 0.8.3
  390. This cache backend now supports password authentication.
  391. .. versionchanged:: 0.10
  392. ``**kwargs`` is now passed to the redis object.
  393. :param host: address of the Redis server or an object which API is
  394. compatible with the official Python Redis client (redis-py).
  395. :param port: port number on which Redis server listens for connections.
  396. :param password: password authentication for the Redis server.
  397. :param db: db (zero-based numeric index) on Redis Server to connect.
  398. :param default_timeout: the default timeout that is used if no timeout is
  399. specified on :meth:`~BaseCache.set`.
  400. :param key_prefix: A prefix that should be added to all keys.
  401. Any additional keyword arguments will be passed to ``redis.Redis``.
  402. """
  403. def __init__(self, host='localhost', port=6379, password=None,
  404. db=0, default_timeout=300, key_prefix=None, **kwargs):
  405. BaseCache.__init__(self, default_timeout)
  406. if isinstance(host, string_types):
  407. try:
  408. import redis
  409. except ImportError:
  410. raise RuntimeError('no redis module found')
  411. if kwargs.get('decode_responses', None):
  412. raise ValueError('decode_responses is not supported by '
  413. 'RedisCache.')
  414. self._client = redis.Redis(host=host, port=port, password=password,
  415. db=db, **kwargs)
  416. else:
  417. self._client = host
  418. self.key_prefix = key_prefix or ''
  419. def dump_object(self, value):
  420. """Dumps an object into a string for redis. By default it serializes
  421. integers as regular string and pickle dumps everything else.
  422. """
  423. t = type(value)
  424. if t in integer_types:
  425. return str(value).encode('ascii')
  426. return b'!' + pickle.dumps(value)
  427. def load_object(self, value):
  428. """The reversal of :meth:`dump_object`. This might be callde with
  429. None.
  430. """
  431. if value is None:
  432. return None
  433. if value.startswith(b'!'):
  434. try:
  435. return pickle.loads(value[1:])
  436. except pickle.PickleError:
  437. return None
  438. try:
  439. return int(value)
  440. except ValueError:
  441. # before 0.8 we did not have serialization. Still support that.
  442. return value
  443. def get(self, key):
  444. return self.load_object(self._client.get(self.key_prefix + key))
  445. def get_many(self, *keys):
  446. if self.key_prefix:
  447. keys = [self.key_prefix + key for key in keys]
  448. return [self.load_object(x) for x in self._client.mget(keys)]
  449. def set(self, key, value, timeout=None):
  450. if timeout is None:
  451. timeout = self.default_timeout
  452. dump = self.dump_object(value)
  453. return self._client.setex(name=self.key_prefix + key,
  454. value=dump, time=timeout)
  455. def add(self, key, value, timeout=None):
  456. if timeout is None:
  457. timeout = self.default_timeout
  458. dump = self.dump_object(value)
  459. return (
  460. self._client.setnx(name=self.key_prefix + key, value=dump) and
  461. self._client.expire(name=self.key_prefix + key, time=timeout)
  462. )
  463. def set_many(self, mapping, timeout=None):
  464. if timeout is None:
  465. timeout = self.default_timeout
  466. pipe = self._client.pipeline()
  467. for key, value in _items(mapping):
  468. dump = self.dump_object(value)
  469. pipe.setex(name=self.key_prefix + key, value=dump, time=timeout)
  470. return pipe.execute()
  471. def delete(self, key):
  472. return self._client.delete(self.key_prefix + key)
  473. def delete_many(self, *keys):
  474. if not keys:
  475. return
  476. if self.key_prefix:
  477. keys = [self.key_prefix + key for key in keys]
  478. return self._client.delete(*keys)
  479. def clear(self):
  480. status = False
  481. if self.key_prefix:
  482. keys = self._client.keys(self.key_prefix + '*')
  483. if keys:
  484. status = self._client.delete(*keys)
  485. else:
  486. status = self._client.flushdb()
  487. return status
  488. def inc(self, key, delta=1):
  489. return self._client.incr(name=self.key_prefix + key, amount=delta)
  490. def dec(self, key, delta=1):
  491. return self._client.decr(name=self.key_prefix + key, amount=delta)
  492. class FileSystemCache(BaseCache):
  493. """A cache that stores the items on the file system. This cache depends
  494. on being the only user of the `cache_dir`. Make absolutely sure that
  495. nobody but this cache stores files there or otherwise the cache will
  496. randomly delete files therein.
  497. :param cache_dir: the directory where cache files are stored.
  498. :param threshold: the maximum number of items the cache stores before
  499. it starts deleting some.
  500. :param default_timeout: the default timeout that is used if no timeout is
  501. specified on :meth:`~BaseCache.set`.
  502. :param mode: the file mode wanted for the cache files, default 0600
  503. """
  504. #: used for temporary files by the FileSystemCache
  505. _fs_transaction_suffix = '.__wz_cache'
  506. def __init__(self, cache_dir, threshold=500, default_timeout=300, mode=0o600):
  507. BaseCache.__init__(self, default_timeout)
  508. self._path = cache_dir
  509. self._threshold = threshold
  510. self._mode = mode
  511. try:
  512. os.makedirs(self._path)
  513. except OSError as ex:
  514. if ex.errno != errno.EEXIST:
  515. raise
  516. def _list_dir(self):
  517. """return a list of (fully qualified) cache filenames
  518. """
  519. return [os.path.join(self._path, fn) for fn in os.listdir(self._path)
  520. if not fn.endswith(self._fs_transaction_suffix)]
  521. def _prune(self):
  522. entries = self._list_dir()
  523. if len(entries) > self._threshold:
  524. now = time()
  525. try:
  526. for idx, fname in enumerate(entries):
  527. remove = False
  528. with open(fname, 'rb') as f:
  529. expires = pickle.load(f)
  530. remove = expires <= now or idx % 3 == 0
  531. if remove:
  532. os.remove(fname)
  533. except (IOError, OSError):
  534. pass
  535. def clear(self):
  536. for fname in self._list_dir():
  537. try:
  538. os.remove(fname)
  539. except (IOError, OSError):
  540. return False
  541. return True
  542. def _get_filename(self, key):
  543. if isinstance(key, text_type):
  544. key = key.encode('utf-8') #XXX unicode review
  545. hash = md5(key).hexdigest()
  546. return os.path.join(self._path, hash)
  547. def get(self, key):
  548. filename = self._get_filename(key)
  549. try:
  550. with open(filename, 'rb') as f:
  551. if pickle.load(f) >= time():
  552. return pickle.load(f)
  553. else:
  554. os.remove(filename)
  555. return None
  556. except (IOError, OSError, pickle.PickleError):
  557. return None
  558. def add(self, key, value, timeout=None):
  559. filename = self._get_filename(key)
  560. if not os.path.exists(filename):
  561. return self.set(key, value, timeout)
  562. return False
  563. def set(self, key, value, timeout=None):
  564. if timeout is None:
  565. timeout = self.default_timeout
  566. filename = self._get_filename(key)
  567. self._prune()
  568. try:
  569. fd, tmp = tempfile.mkstemp(suffix=self._fs_transaction_suffix,
  570. dir=self._path)
  571. with os.fdopen(fd, 'wb') as f:
  572. pickle.dump(int(time() + timeout), f, 1)
  573. pickle.dump(value, f, pickle.HIGHEST_PROTOCOL)
  574. rename(tmp, filename)
  575. os.chmod(filename, self._mode)
  576. except (IOError, OSError):
  577. return False
  578. else:
  579. return True
  580. def delete(self, key):
  581. try:
  582. os.remove(self._get_filename(key))
  583. except (IOError, OSError):
  584. return False
  585. else:
  586. return True