6The httplib2 algorithms ported for use with requests.
16from .cache
import DictCache, SeparateBodyBaseCache
17from .serialize
import Serializer
22URI =
re.compile(
r"^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?")
24PERMANENT_REDIRECT_STATUSES = (301, 308)
28 """Parses a URI using the regex given in Appendix B of RFC 3986.
30 (scheme, authority, path, query, fragment) = parse_uri(uri)
33 return (groups[1], groups[3], groups[4], groups[6], groups[8])
37 """An interface to see if request should cached or not."""
40 self, cache=None, cache_etags=True, serializer=None, status_codes=None
49 """Normalize the URL to create a safe key for the cache"""
50 (scheme, authority, path, query, fragment) =
parse_uri(uri)
51 if not scheme
or not authority:
52 raise Exception(
"Only absolute URIs are allowed. uri = %s" % uri)
62 request_uri = query
and "?".join([path, query])
or path
63 defrag_uri = scheme +
"://" + authority + request_uri
74 "max-age": (int,
True),
75 "max-stale": (int,
False),
76 "min-fresh": (int,
True),
77 "no-cache": (
None,
False),
78 "no-store": (
None,
False),
79 "no-transform": (
None,
False),
80 "only-if-cached": (
None,
False),
81 "must-revalidate": (
None,
False),
82 "public": (
None,
False),
83 "private": (
None,
False),
84 "proxy-revalidate": (
None,
False),
85 "s-maxage": (int,
True),
97 directive = parts[0].strip()
100 typ, required = known_directives[directive]
102 logger.debug(
"Ignoring unknown cache-control directive: %s", directive)
105 if not typ
or not required:
106 retval[directive] =
None
109 retval[directive] =
typ(parts[1].strip())
113 "Missing value for cache-control " "directive: %s",
118 "Invalid value for cache-control directive " "%s, must be %s",
127 Return a cached response if it exists in the cache, otherwise
136 logger.debug(
'Request header has "no-cache", cache bypassed')
139 if "max-age" in cc
and cc[
"max-age"] == 0:
140 logger.debug(
'Request header has "max_age" as 0, cache bypassed')
144 cache_data = self.
cache.get(cache_url)
145 if cache_data
is None:
150 body_file = self.
cache.get_body(cache_url)
155 resp = self.
serializer.loads(request, cache_data, body_file)
157 logger.warning(
"Cache entry deserialization failed, entry ignored")
169 if int(
resp.status)
in PERMANENT_REDIRECT_STATUSES:
171 "Returning cached permanent redirect response "
172 "(ignoring date and etag information)"
178 if not headers
or "date" not in headers:
179 if "etag" not in headers:
182 logger.debug(
"Purging cached response: no date or etag")
183 self.
cache.delete(cache_url)
189 current_age = max(0, now - date)
190 logger.debug(
"Current age based on date: %i", current_age)
199 freshness_lifetime = 0
202 if "max-age" in resp_cc:
203 freshness_lifetime = resp_cc[
"max-age"]
204 logger.debug(
"Freshness lifetime from max-age: %i", freshness_lifetime)
207 elif "expires" in headers:
208 expires = parsedate_tz(headers[
"expires"])
209 if expires
is not None:
211 freshness_lifetime = max(0, expire_time)
212 logger.debug(
"Freshness lifetime from expires: %i", freshness_lifetime)
217 freshness_lifetime = cc[
"max-age"]
219 "Freshness lifetime from request max-age: %i", freshness_lifetime
222 if "min-fresh" in cc:
223 min_fresh = cc[
"min-fresh"]
225 current_age += min_fresh
226 logger.debug(
"Adjusted current age from min-fresh: %i", current_age)
229 if freshness_lifetime > current_age:
230 logger.debug(
'The response is "fresh", returning cached response')
231 logger.debug(
"%i > %i", freshness_lifetime, current_age)
235 if "etag" not in headers:
236 logger.debug(
'The cached response is "stale" with no etag, purging')
237 self.
cache.delete(cache_url)
250 if "etag" in headers:
251 new_headers[
"If-None-Match"] = headers[
"ETag"]
253 if "last-modified" in headers:
254 new_headers[
"If-Modified-Since"] = headers[
"Last-Modified"]
258 def _cache_set(self, cache_url, request, response, body=None, expires_time=None):
260 Store the data in the cache.
267 self.
serializer.dumps(request, response, b
""),
268 expires=expires_time,
270 self.
cache.set_body(cache_url, body)
274 self.
serializer.dumps(request, response, body),
275 expires=expires_time,
280 Algorithm for caching requests.
282 This assumes a requests Response object.
295 if "date" in response_headers:
306 and "content-length" in response_headers
307 and response_headers[
"content-length"].
isdigit()
308 and int(response_headers[
"content-length"]) !=
len(body)
316 logger.debug(
'Updating cache with response from "%s"', cache_url)
323 if "no-store" in cc_req:
326 if no_store
and self.
cache.get(cache_url):
327 logger.debug(
'Purging existing cache entry to honor "no-store"')
328 self.
cache.delete(cache_url)
342 if self.
cache_etags and "etag" in response_headers:
345 expires = parsedate_tz(response_headers[
"expires"])
346 if expires
is not None:
349 expires_time = max(expires_time, 14 * 86400)
351 logger.debug(
"etag object cached for {0} seconds".format(expires_time))
353 self.
_cache_set(cache_url, request, response, body, expires_time)
359 self.
_cache_set(cache_url, request, response, b
"")
364 elif "date" in response_headers:
367 if "max-age" in cc
and cc[
"max-age"] > 0:
369 expires_time = cc[
"max-age"]
380 elif "expires" in response_headers:
381 if response_headers[
"expires"]:
382 expires = parsedate_tz(response_headers[
"expires"])
383 if expires
is not None:
389 "Caching b/c of expires header. expires in {0} seconds".format(
402 """On a 304 we will get a new set of headers that we want to
403 update our cached value with, assuming we have one.
405 This should only ever be called when we've sent an ETag and
406 gotten a 304 as the response.
410 cached_response = self.
serializer.loads(request, self.
cache.get(cache_url))
412 if not cached_response:
423 excluded_headers = [
"content-length"]
429 if k.lower()
not in excluded_headers
437 self.
_cache_set(cache_url, request, cached_response)
439 return cached_response
_cache_set(self, cache_url, request, response, body=None, expires_time=None)
conditional_headers(self, request)
update_cached_response(self, request, response)
__init__(self, cache=None, cache_etags=True, serializer=None, status_codes=None)
cached_request(self, request)
parse_cache_control(self, headers)
cache_response(self, request, response, body=None, status_codes=None)