8from dataclasses
import dataclass
27 split_auth_from_netloc,
41_SUPPORTED_HASHES = (
"sha512",
"sha384",
"sha256",
"sha224",
"sha1",
"md5")
44@dataclass(frozen=True)
46 """Links to content may have embedded hash values. This class parses those.
48 `name` must be any member of `_SUPPORTED_HASHES`.
50 This class can be converted to and from `ArchiveInfo`. While ArchiveInfo intends to
51 be JSON-serializable to conform to PEP 610, this class contains the logic for
52 parsing a hash name and value for correctness, and then checking whether that hash
53 conforms to a schema with `.is_hash_allowed()`."""
64 r"[#&]({choices})=([^&]*)".format(
65 choices=
"|".join(
re.escape(hash_name)
for hash_name
in _SUPPORTED_HASHES)
70 assert self.
namename in _SUPPORTED_HASHES
73 @functools.lru_cache(maxsize=None)
75 """Search a string for a checksum algorithm name and encoded output value."""
80 return cls(name=name, value=value)
86 """Return a Hashes instance which checks only for the current hash."""
91 Return True if the current hash is allowed by `hashes`.
98@dataclass(frozen=True)
100 """Information about a core metadata file associated with a distribution."""
102 hashes: Optional[Dict[str, str]]
114 hashes = {n: v
for n, v
in hashes.items()
if n
in _SUPPORTED_HASHES}
122 Clean a "part" of a URL path (i.e. after splitting on "@" characters).
130 Clean the first part of a URL path that corresponds to a local
131 filesystem path (i.e. the first part after splitting on "@" characters).
147 Clean the path portion of a URL.
150 clean_func = _clean_file_url_path
152 clean_func = _clean_url_path_part
164 return "".join(cleaned_parts)
169 Make sure a link is fully quoted.
170 For example, if ' ' occurs in the URL, it will be replaced with "%20",
171 and without double-quoting other characters.
183 """Represents a parsed link from a Package Index's simple URL"""
192 "metadata_file_data",
193 "cache_link_parsing",
200 comes_from: Optional[Union[str,
"IndexContent"]] =
None,
201 requires_python: Optional[str] =
None,
202 yanked_reason: Optional[str] =
None,
203 metadata_file_data: Optional[MetadataFile] =
None,
204 cache_link_parsing: bool =
True,
205 hashes: Optional[Mapping[str, str]] =
None,
208 :param url: url of the resource pointed to (href of the link)
209 :param comes_from: instance of IndexContent where the link was found,
211 :param requires_python: String containing the `Requires-Python`
212 metadata field, specified in PEP 345. This may be specified by
213 a data-requires-python attribute in the HTML link tag, as
214 described in PEP 503.
215 :param yanked_reason: the reason the file has been yanked, if the
216 file has been yanked, or None if the file hasn't been yanked.
217 This is the value of the "data-yanked" attribute, if present, in
218 a simple repository HTML link. If the file has been yanked but
219 no reason was provided, this should be the empty string. See
220 PEP 592 for more information and the specification.
221 :param metadata_file_data: the metadata attached to the file, or None if
222 no such metadata is provided. This argument, if not None, indicates
223 that a separate metadata file exists, and also optionally supplies
224 hashes for that file.
225 :param cache_link_parsing: A flag that is used elsewhere to determine
226 whether resources retrieved from this link should be cached. PyPI
227 URLs should generally have this set to False, for example.
228 :param hashes: A mapping of hash names to digests to allow us to
229 determine the validity of a download.
238 url = path_to_url(url)
250 self.
_hashes = {**hashes, **hashes_from_link}
265 file_data: Dict[str, Any],
267 ) -> Optional[
"Link"]:
269 Convert an pypi json document from a simple repository page into a Link.
283 if metadata_info
is None:
295 metadata_file_data =
None
298 if yanked_reason
and not isinstance(yanked_reason, str):
301 elif not yanked_reason:
307 requires_python=pyrequire,
308 yanked_reason=yanked_reason,
310 metadata_file_data=metadata_file_data,
316 anchor_attribs: Dict[str, Optional[str]],
319 ) -> Optional[
"Link"]:
321 Convert an anchor element's attributes in a simple repository page to a Link.
334 if metadata_info
is None:
338 if metadata_info ==
"true":
341 elif metadata_info
is None:
343 metadata_file_data =
None
352 "Index returned invalid data-dist-info-metadata value: %s",
360 requires_python=pyrequire,
361 yanked_reason=yanked_reason,
362 metadata_file_data=metadata_file_data,
367 rp = f
" (requires-python:{self.requires_python})"
371 return "{} (from {}){}".format(
375 return redact_auth_from_url(str(self.
_url))
378 return f
"<Link {self}>"
386 path = self.
path.rstrip(
"/")
391 netloc, user_pass = split_auth_from_netloc(self.
netloc)
395 assert name, f
"URL {self._url!r} produced no filename"
409 This can contain auth information.
414 def path(self) -> str:
417 def splitext(self) -> Tuple[str, str]:
426 scheme, netloc, path, query, fragment = self.
_parsed_url
446 reason=f
"{self} contains an egg fragment with a non-PEP 508 name",
447 replacement=
"to use the req @ url syntax, and remove the egg fragment",
454 _subdirectory_fragment_re =
re.compile(
r"[#&]subdirectory=([^&]*)")
464 """Return a link to the associated core metadata file (if any)."""
467 metadata_url = f
"{self.url_without_fragment}.metadata"
469 return Link(metadata_url)
476 def hash(self) -> Optional[str]:
477 return next(iter(self.
_hashes.values()),
None)
481 return next(iter(self.
_hashes),
None)
514 Return True if the link has a hash and it is allowed by `hashes`.
522 """Convert link for equivalency check.
524 This is used in the resolver to check whether two URL-specified requirements
525 likely point to the same distribution and can be considered equivalent. This
526 equivalency logic avoids comparing URLs literally, which can be too strict
527 (e.g. "a=1&b=2" vs "b=2&a=1") and produce conflicts unexpecting to users.
529 Currently this does three things:
531 1. Drop the basic auth part. This is technically wrong since a server can
532 serve different content based on auth, but if it does that, it is even
533 impossible to guarantee two URLs without auth are equivalent, since
534 the user can input different auth information when prompted. So the
535 practical solution is to assume the auth doesn't affect the response.
536 2. Parse the query to avoid the ordering issue. Note that ordering under the
537 same key in the query are NOT cleaned; i.e. "a=1&a=2" and "a=2&a=1" are
538 still considered different.
539 3. Explicitly drop most of the fragment part, except ``subdirectory=`` and
540 hash values, since it should have no impact the downloaded content. Note
541 that this drops the "egg=" part historically used to denote the requested
542 project (and extras), which is wrong in the strictest sense, but too many
543 people are supplying it inconsistently to cause superfluous resolution
544 conflicts, so we choose to also ignore them.
548 query: Dict[str, List[str]]
550 hashes: Dict[str, str]
560 if "egg" in fragment:
565 subdirectory = fragment[
"subdirectory"][0]
566 except (IndexError, KeyError):
570 hashes = {k: fragment[k][0]
for k
in _SUPPORTED_HASHES
if k
in fragment}
574 subdirectory=subdirectory,
579@functools.lru_cache(maxsize=None)
580def links_equivalent(link1: Link, link2: Link) -> bool:
Optional["LinkHash"] find_hash_url_fragment(cls, str url)
bool is_hash_allowed(self, Optional[Hashes] hashes)
Dict[str, str] as_dict(self)
_subdirectory_fragment_re
Tuple[str, str] splitext(self)
Optional[str] _egg_fragment(self)
Optional[str] subdirectory_fragment(self)
None __init__(self, str url, Optional[Union[str, "IndexContent"]] comes_from=None, Optional[str] requires_python=None, Optional[str] yanked_reason=None, Optional[MetadataFile] metadata_file_data=None, bool cache_link_parsing=True, Optional[Mapping[str, str]] hashes=None)
Optional["Link"] from_json(cls, Dict[str, Any] file_data, str page_url)
bool is_existing_dir(self)
bool is_hash_allowed(self, Optional[Hashes] hashes)
Optional["Link"] from_element(cls, Dict[str, Optional[str]] anchor_attribs, str page_url, str base_url)
Optional["Link"] metadata_link(self)
Optional[str] hash_name(self)
str url_without_fragment(self)
Optional[Dict[str, str]] supported_hashes(Optional[Dict[str, str]] hashes)
str _clean_url_path(str path, bool is_local_path)
_CleanResult _clean_link(Link link)
str _clean_file_url_path(str part)
str _clean_url_path_part(str part)
str _ensure_quoted_url(str url)