1from __future__
import absolute_import
4from collections
import namedtuple
6from ..exceptions
import LocationParseError
7from ..packages
import six
9url_attrs = [
"scheme",
"auth",
"host",
"port",
"path",
"query",
"fragment"]
13NORMALIZABLE_SCHEMES = (
"http",
"https",
None)
20 r"^(?:([a-zA-Z][a-zA-Z0-9+.-]*):)?"
28IPV4_PAT =
r"(?:[0-9]{1,3}\.){3}[0-9]{1,3}"
29HEX_PAT =
"[0-9A-Fa-f]{1,4}"
30LS32_PAT =
"(?:{hex}:{hex}|{ipv4})".format(hex=HEX_PAT, ipv4=IPV4_PAT)
31_subs = {
"hex": HEX_PAT,
"ls32": LS32_PAT}
34 "(?:%(hex)s:){6}%(ls32)s",
36 "::(?:%(hex)s:){5}%(ls32)s",
38 "(?:%(hex)s)?::(?:%(hex)s:){4}%(ls32)s",
40 "(?:(?:%(hex)s:)?%(hex)s)?::(?:%(hex)s:){3}%(ls32)s",
42 "(?:(?:%(hex)s:){0,2}%(hex)s)?::(?:%(hex)s:){2}%(ls32)s",
44 "(?:(?:%(hex)s:){0,3}%(hex)s)?::%(hex)s:%(ls32)s",
46 "(?:(?:%(hex)s:){0,4}%(hex)s)?::%(ls32)s",
48 "(?:(?:%(hex)s:){0,5}%(hex)s)?::%(hex)s",
50 "(?:(?:%(hex)s:){0,6}%(hex)s)?::",
53UNRESERVED_PAT =
r"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._\-~"
54IPV6_PAT =
"(?:" +
"|".join([x % _subs
for x
in _variations]) +
")"
55ZONE_ID_PAT =
"(?:%25|%)(?:[" + UNRESERVED_PAT +
"]|%[a-fA-F0-9]{2})+"
56IPV6_ADDRZ_PAT =
r"\[" + IPV6_PAT +
r"(?:" + ZONE_ID_PAT +
r")?\]"
57REG_NAME_PAT =
r"(?:[^\[\]%:/?#]|%[a-fA-F0-9]{2})*"
58TARGET_RE =
re.compile(
r"^(/[^?#]*)(?:\?([^#]*))?(?:#.*)?$")
63BRACELESS_IPV6_ADDRZ_RE =
re.compile(
"^" + IPV6_ADDRZ_PAT[2:-2] +
"$")
66_HOST_PORT_PAT = (
"^(%s|%s|%s)(?::0*?(|0|[1-9][0-9]{0,4}))?$") % (
73UNRESERVED_CHARS = set(
74 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789._-~"
76SUB_DELIM_CHARS = set(
"!$&'()*+,;=")
77USERINFO_CHARS = UNRESERVED_CHARS | SUB_DELIM_CHARS | {
":"}
78PATH_CHARS = USERINFO_CHARS | {
"@",
"/"}
79QUERY_CHARS = FRAGMENT_CHARS = PATH_CHARS | {
"?"}
84 Data structure for representing an HTTP URL. Used as a return value for
85 :func:`parse_url`. Both the scheme and host are normalized as they are
86 both case-insensitive according to RFC 3986.
103 if scheme
is not None:
106 cls, scheme, auth, host, port, path, query, fragment
111 """For backwards-compatibility with urlparse. We're nice like that."""
116 """Absolute path including the query string."""
117 uri = self.path
or "/"
119 if self.query
is not None:
120 uri +=
"?" + self.query
126 """Network location including host and port"""
134 Convert self into a url
136 This function should more or less round-trip with :func:`.parse_url`. The
137 returned url may not be exactly the same as the url inputted to
138 :func:`.parse_url`, but it should be equivalent by the RFC (e.g., urls
139 with a blank port will have : removed).
143 >>> U = parse_url('http://google.com/mail/')
145 'http://google.com/mail/'
146 >>> Url('http', 'username:password', 'host.com', 80,
147 ... '/path', 'query', 'fragment').url
148 'http://username:password@host.com:80/path?query#fragment'
150 scheme, auth, host, port, path, query, fragment = self
154 if scheme
is not None:
155 url += scheme +
u"://"
161 url +=
u":" + str(port)
164 if query
is not None:
166 if fragment
is not None:
167 url +=
u"#" + fragment
175def split_first(s, delims):
179 Given a string and an iterable of delimiters, split on the first found
180 delimiter. Return two split parts and the matched delimiter.
182 If not found, then the first part is the full input string.
186 >>> split_first('foo/bar?baz', '?/=')
187 ('foo', 'bar?baz', '/')
188 >>> split_first('foo/bar?baz', '123')
189 ('foo/bar?baz', '', None)
191 Scales linearly with number of delims. Not ideal for large number of delims.
200 if min_idx
is None or idx < min_idx:
204 if min_idx
is None or min_idx < 0:
207 return s[:min_idx], s[min_idx + 1 :], min_delim
211 """Percent-encodes a URI component without reapplying
212 onto an already percent-encoded component.
214 if component
is None:
232 byte = uri_bytes[i : i + 1]
234 if (is_percent_encoded
and byte == b
"%")
or (
237 encoded_component += byte
249 for segment
in segments:
254 elif segment !=
"..":
271 return "/".join(output)
274def _normalize_host(host, scheme):
279 if scheme
in NORMALIZABLE_SCHEMES:
288 zone_id = host[start:end]
291 zone_id = zone_id[3:]
293 zone_id = zone_id[1:]
295 return host[:start].
lower() + zone_id + host[end:]
306 if name
and any(
ord(x) >= 128
for x
in name):
323def _encode_target(target):
324 """Percent-encodes a request target so that there are no invalid characters"""
328 if query
is not None:
329 target +=
"?" + query
335 Given a url, return a parsed :class:`.Url` namedtuple. Best-effort is
336 performed to parse incomplete urls. Fields not provided will be None.
337 This parser is RFC 3986 and RFC 6874 compliant.
339 The parser logic and helper functions are based heavily on
340 work done in the ``rfc3986`` module.
342 :param str url: URL to parse into a :class:`.Url` namedtuple.
344 Partly backwards-compatible with :mod:`urlparse`.
348 >>> parse_url('http://google.com/mail/')
349 Url(scheme='http', host='google.com', port=None, path='/mail/', ...)
350 >>> parse_url('google.com:80')
351 Url(scheme=None, host='google.com', port=80, path=None, ...)
352 >>> parse_url('/foo?bar')
353 Url(scheme=None, host=None, port=None, path='/foo', query='bar', ...)
364 scheme, authority, path, query, fragment =
URI_RE.match(url).groups()
365 normalize_uri = scheme
is None or scheme.lower()
in NORMALIZABLE_SCHEMES
374 if auth
and normalize_uri:
379 auth, host, port =
None,
None,
None
383 if not (0 <= port <= 65535):
386 host = _normalize_host(host, scheme)
388 if normalize_uri
and path:
391 if normalize_uri
and query:
393 if normalize_uri
and fragment:
396 except (ValueError, AttributeError):
404 if query
is not None or fragment
is not None:
432 Deprecated. Use :func:`parse_url` instead.
__new__(cls, scheme=None, auth=None, host=None, port=None, path=None, query=None, fragment=None)
_remove_path_dot_segments(path)
_encode_invalid_chars(component, allowed_chars, encoding="utf-8")