18 import dummy_threading
as threading
21from .
import DistlibException
22from .compat
import (urljoin, urlparse, urlunparse, url2pathname, pathname2url,
23 queue, quote, unescape, build_opener,
24 HTTPRedirectHandler
as BaseRedirectHandler, text_type,
25 Request, HTTPError, URLError)
26from .database
import Distribution, DistributionPath, make_dist
27from .metadata
import Metadata, MetadataInvalidError
28from .util
import (cached_property, ensure_slash, split_filename, get_project_data,
29 parse_requirement, parse_name_and_version, ServerProxy,
31from .version
import get_scheme, UnsupportedVersionError
32from .wheel
import Wheel, is_compatible
38HTML_CONTENT_TYPE =
re.compile(
'text/html|application/x(ht)?ml')
39DEFAULT_INDEX =
'https://pypi.org/pypi'
43 Return all distribution names known by an index.
44 :param url: The URL of the index.
45 :return: A list of all known distribution names.
57 A class to work around a bug in some Python 3.2.x releases.
68 for key
in (
'location',
'uri'):
74 urlparts = urlparse(newurl)
77 if hasattr(headers,
'replace_header'):
84 http_error_301 = http_error_303 = http_error_307 = http_error_302
88 A base class for locators - things that locate distributions.
90 source_extensions = (
'.tar.gz',
'.tar.bz2',
'.tar',
'.zip',
'.tgz',
'.tbz')
91 binary_extensions = (
'.egg',
'.exe',
'.whl')
92 excluded_extensions = (
'.pdf',)
100 downloadable_extensions = source_extensions + (
'.whl',)
104 Initialise an instance.
105 :param scheme: Because locators look for most recent versions, they
106 need to know the version scheme to use. This specifies
107 the current PEP-recommended scheme - use ``'legacy'``
108 if you need to support existing distributions on PyPI.
123 Return any errors which have occurred.
126 while not self.
errors.empty():
128 e = self.
errors.get(
False)
137 Clear any errors which may have been logged.
155 For a given project, get a dictionary mapping available versions to Distribution
158 This should be implemented in subclasses.
160 If called from a locate() request, self.matcher will be set to a
161 matcher for the requirement to satisfy, otherwise it will be None.
167 Return all the distribution names known to this locator.
173 For a given project, get a dictionary mapping available versions to Distribution
176 This calls _get_project to do all the work, and just implements a caching layer on top.
181 result = self.
_cache[name]
185 self.
_cache[name] = result
190 Give an url a score which can be used to choose preferred URLs
191 for a given project release.
201 is_downloadable, is_wheel, compatible, basename)
205 Choose one of two URLs where both are candidates for distribution
206 archives for the same version of a distribution (for example,
209 The current implementation favours https:// URLs over http://, archives
210 from PyPI over those from other locations, wheel compatibility (if a
211 wheel) and then the archive name.
225 def split_filename(self, filename, project_name):
227 Attempt to split a filename in project name, version and Python version.
229 return split_filename(filename, project_name)
233 See if a URL is a candidate for a download URL for a project (the URL
234 has typically been scraped from an HTML page).
236 If it is, a dictionary is returned with keys "name", "version",
237 "filename" and "url"; otherwise, None is returned.
240 return normalize_name(name1) == normalize_name(name2)
243 scheme, netloc, path, params, query, frag = urlparse(url)
251 algo, digest =
None,
None
253 if path
and path[-1] ==
'/':
261 if project_name
is None:
270 'url': urlunparse((scheme, netloc, origpath,
272 'python-version':
', '.join(
275 except Exception
as e:
283 path = path[:-
len(ext)]
288 name, version, pyver = t
289 if not project_name
or same_project(project_name, name):
293 'filename': filename,
294 'url': urlunparse((scheme, netloc, origpath,
299 result[
'python-version'] = pyver
302 result[
'%s_digest' % algo] = digest
307 Get a digest from a dictionary by looking at a "digests" dictionary
308 or keys of the form 'algo_digest'.
310 Returns a 2-tuple (algo, digest) if found, else None. Currently
311 looks only for SHA256, then MD5.
314 if 'digests' in info:
315 digests = info[
'digests']
316 for algo
in (
'sha256',
'md5'):
318 result = (algo, digests[algo])
321 for algo
in (
'sha256',
'md5'):
322 key =
'%s_digest' % algo
324 result = (algo, info[key])
330 Update a result dictionary (the final result from _get_project) with a
331 dictionary for a specific version, which typically holds information
332 gleaned from a filename or URL for an archive for the distribution.
336 if version
in result:
337 dist = result[version]
340 dist = make_dist(name, version, scheme=self.
scheme)
344 result[
'digests'][url] = digest
347 result[
'urls'].setdefault(version, set()).add(url)
349 result[version] = dist
351 def locate(self, requirement, prereleases=False):
353 Find the most recent distribution which matches the given
356 :param requirement: A requirement of the form 'foo (1.0)' or perhaps
357 'foo (>= 1.0, < 2.0, != 1.3)'
358 :param prereleases: If ``True``, allow pre-release versions
359 to be located. Otherwise, pre-release versions
361 :return: A :class:`Distribution` instance, or ``None`` if no such
362 distribution could be located.
365 r = parse_requirement(requirement)
367 raise DistlibException(
'Not a valid requirement: %r' % requirement)
368 scheme = get_scheme(self.
scheme)
370 logger.debug(
'matcher: %s (%s)', matcher, type(matcher).__name__)
372 if len(versions) > 2:
377 if k
in (
'urls',
'digests'):
383 if prereleases
or not vcls(k).is_prerelease:
396 result = versions[version]
413 This locator uses XML-RPC to locate distributions. It therefore
414 cannot be used with simple mirrors (that only mirror file content).
418 Initialise an instance.
420 :param url: The URL to use for XML-RPC.
421 :param kwargs: Passed to the superclass constructor.
429 Return all the distribution names known to this locator.
434 result = {
'urls': {},
'digests': {}}
455 result[
'urls'].setdefault(v, set()).add(url)
456 result[
'digests'][url] = digest
461 This locator uses PyPI's JSON interface. It's very limited in functionality
462 and probably not worth using.
470 Return all the distribution names known to this locator.
475 result = {
'urls': {},
'digests': {}}
476 url = urljoin(self.
base_url,
'%s/json' % quote(name))
478 resp = self.
opener.open(url)
492 for info
in d[
'urls']:
496 result[
'urls'].setdefault(
md.version, set()).add(url)
499 for version, infos
in d[
'releases'].items():
507 result[version] = odist
512 result[
'urls'].setdefault(version, set()).add(url)
522 except Exception
as e:
523 self.
errors.put(text_type(e))
530 This class represents a scraped HTML page.
538(rel\\s*=\\s*(?:"(?P<rel1>[^"]*)"|'(?P<rel2>[^']*)'|(?P<rel3>[^>\\s\n]*))\\s+)?
539href\\s*=\\s*(?:"(?P<url1>[^"]*)"|'(?P<url2>[^']*)'|(?P<url3>[^>\\s\n]*))
540(\\s+rel\\s*=\\s*(?:"(?P<rel4>[^"]*)"|'(?P<rel5>[^']*)'|(?P<rel6>[^>\\s\n]*)))?
546 Initialise an instance with the Unicode page contents and the URL they
560 Return the URLs of all the links on a page together with information
561 about their "rel" attribute, for determining which ones to treat as
562 downloads and which ones to queue for further scraping.
566 scheme, netloc, path, params, query, frag = urlparse(url)
567 return urlunparse((scheme, netloc, quote(path),
568 params, query, frag))
573 rel = (d[
'rel1']
or d[
'rel2']
or d[
'rel3']
or
574 d[
'rel4']
or d[
'rel5']
or d[
'rel6'])
575 url = d[
'url1']
or d[
'url2']
or d[
'url3']
582 result = sorted(result, key=
lambda t: t[0], reverse=
True)
588 A locator which scrapes HTML pages to locate downloads for a distribution.
589 This runs multiple threads to do the I/O; performance is at least as good
590 as pip's PackageFinder, which works in an analogous fashion.
600 def __init__(self, url, timeout=None, num_workers=10, **kwargs):
602 Initialise an instance.
603 :param url: The root URL to use for scraping.
604 :param timeout: The timeout, in seconds, to be applied to requests.
605 This defaults to ``None`` (no timeout specified).
606 :param num_workers: The number of worker threads you want to do I/O,
608 :param kwargs: Passed to the superclass.
629 Threads are created only when get_project is called, and terminate
630 before it returns. They are there primarily to parallelise I/O (i.e.
642 Tell all the threads to terminate (by sending a sentinel value) and
643 wait for them to do so.
654 result = {
'urls': {},
'digests': {}}
658 url = urljoin(self.
base_url,
'%s/' % quote(name))
671 platform_dependent =
re.compile(
r'\b(linux_(i\d86|x86_64|arm\w+)|'
672 r'win(32|_amd64)|macosx_?\d+)\b',
re.I)
676 Does an URL refer to a platform-specific download?
682 See if an URL is a suitable download for a project.
684 If it is, register information in the result dictionary (for
685 _get_project) about the specific version it's for.
687 Note that the return value isn't actually used other than as a boolean
702 Determine whether a link URL from a referring page and with a
703 particular "rel" attribute should be queued for scraping.
705 scheme, netloc, path, _, _, _ = urlparse(link)
713 elif rel
not in (
'homepage',
'download'):
715 elif scheme
not in (
'http',
'https',
'ftp'):
725 logger.debug(
'should_queue: %s (%s) from %s -> %s', link, rel,
731 Get a URL to fetch from the work queue, get the HTML page, examine its
732 links for download candidates and candidates for further scraping.
734 This is a handy method to run in a thread.
744 if link
not in self.
_seen:
751 except MetadataInvalidError:
753 except Exception
as e:
754 self.
errors.put(text_type(e))
764 Get the HTML for an URL, possibly from an in-memory cache.
766 XXX TODO Note: this cache is never actually cleared. It's assumed that
767 the data won't get stale over the lifetime of a locator instance (not
768 necessarily true for the default_locator).
771 scheme, netloc, path, _, _, _ = urlparse(url)
773 url = urljoin(ensure_slash(url),
'index.html')
777 logger.debug(
'Returning %s from cache: %s', url, result)
782 logger.debug(
'Skipping %s due to bad host %s', url, host)
784 req = Request(url, headers={
'Accept-encoding':
'identity'})
806 result =
Page(data, final_url)
808 except HTTPError
as e:
811 except URLError
as e:
815 except Exception
as e:
825 Return all the distribution names known to this locator.
830 raise DistlibException(
'Unable to get %s' % self.
base_url)
837 This class locates distributions in a directory tree.
842 Initialise an instance.
843 :param path: The root of the directory tree to search.
844 :param kwargs: Passed to the superclass constructor,
846 * recursive - if True (the default), subdirectories are
847 recursed into. If False, only the top-level directory
854 raise DistlibException(
'Not a directory: %r' % path)
859 Should a filename be considered as a candidate for a distribution
860 archive? As well as the filename, the directory which contains it
861 is provided, though not used by the current implementation.
866 result = {
'urls': {},
'digests': {}}
871 url = urlunparse((
'file',
'',
883 Return all the distribution names known to this locator.
890 url = urlunparse((
'file',
'',
902 This locator uses special extended metadata (not available on PyPI) and is
903 the basis of performant dependency resolution in distlib. Other locators
904 require archive downloads before dependencies can be determined! As you
905 might imagine, that can be slow.
909 Return all the distribution names known to this locator.
914 result = {
'urls': {},
'digests': {}}
915 data = get_project_data(name)
918 if info[
'ptype'] !=
'sdist' or info[
'pyversion'] !=
'source':
923 dist = make_dist(data[
'name'], info[
'version'],
925 'Placeholder for summary'),
930 if 'digest' in info
and info[
'digest']:
935 result[
'urls'].setdefault(
dist.version, set()).add(info[
'url'])
940 This locator finds installed distributions in a path. It can be useful for
941 adding to an :class:`AggregatingLocator`.
945 Initialise an instance.
947 :param distpath: A :class:`DistributionPath` instance to search.
954 dist = self.
distpath.get_distribution(name)
956 result = {
'urls': {},
'digests': {}}
968 This class allows you to chain and/or merge a list of locators.
972 Initialise an instance.
974 :param locators: The list of locators to search.
975 :param kwargs: Passed to the superclass constructor,
977 * merge - if False (the default), the first successful
978 search from any of the locators is returned. If True,
979 the results from all locators are merged (this can be
1044 Return all the distribution names known to this locator.
1050 except NotImplementedError:
1068 Locate dependencies for distributions.
1073 Initialise an instance, using the specified locator
1074 to locate distributions.
1076 self.locator = locator
or default_locator
1079 def add_distribution(self, dist):
1081 Add a distribution to the finder. This will update internal information
1082 about who provides what.
1083 :param dist: The distribution to add.
1087 self.dists_by_name[name] = dist
1090 name, version = parse_name_and_version(p)
1091 logger.debug(
'Add to provided: %s, %s, %s', name, version, dist)
1092 self.provided.setdefault(name, set()).add((version, dist))
1096 Remove a distribution from the finder. This will update internal
1097 information about who provides what.
1098 :param dist: The distribution to remove.
1102 del self.dists_by_name[name]
1105 name, version = parse_name_and_version(p)
1106 logger.debug(
'Remove from provided: %s, %s, %s', name, version, dist)
1107 s = self.provided[name]
1110 del self.provided[name]
1114 Get a version matcher for a requirement.
1115 :param reqt: The requirement
1117 :return: A version matcher (an instance of
1118 :class:`distlib.version.Matcher`).
1122 except UnsupportedVersionError:
1130 Find the distributions which can fulfill a requirement.
1132 :param reqt: The requirement.
1134 :return: A set of distribution which can fulfill the requirement.
1136 matcher = self.get_matcher(reqt)
1139 provided = self.provided
1140 if name
in provided:
1141 for version, provider
in provided[name]:
1144 except UnsupportedVersionError:
1154 Attempt to replace one provider with another. This is typically used
1155 when resolving dependencies from multiple sources, e.g. A requires
1156 (B >= 1.0) while C requires (B >= 1.1).
1158 For successful replacement, ``provider`` must meet all the requirements
1159 which ``other`` fulfills.
1161 :param provider: The provider we are trying to replace with.
1162 :param other: The provider we're trying to replace.
1163 :param problems: If False is returned, this will contain what
1164 problems prevented replacement. This is currently
1165 a tuple of the literal string 'cantreplace',
1166 ``provider``, ``other`` and the set of requirements
1167 that ``provider`` couldn't fulfill.
1168 :return: True if we can replace ``other`` with ``provider``, else
1171 rlist = self.reqts[other]
1174 matcher = self.get_matcher(s)
1184 self.remove_distribution(other)
1185 del self.reqts[other]
1187 self.reqts.setdefault(provider, set()).add(s)
1188 self.add_distribution(provider)
1192 def find(self, requirement, meta_extras=None, prereleases=False):
1194 Find a distribution and all distributions it depends on.
1196 :param requirement: The requirement specifying the distribution to
1197 find, or a Distribution instance.
1198 :param meta_extras: A list of meta extras such as :test:, :build: and
1200 :param prereleases: If ``True``, allow pre-release versions to be
1201 returned - otherwise, don't return prereleases
1202 unless they're all that's available.
1204 Return a set of :class:`Distribution` instances and a set of
1207 The distributions returned should be such that they have the
1208 :attr:`required` attribute set to ``True`` if they were
1209 from the ``requirement`` passed to ``find()``, and they have the
1210 :attr:`build_time_dependency` attribute set to ``True`` unless they
1211 are post-installation dependencies of the ``requirement``.
1213 The problems should be a tuple consisting of the string
1214 ``'unsatisfied'`` and the requirement which couldn't be satisfied
1215 by any distribution known to the locator.
1220 self.dists_by_name = {}
1223 meta_extras = set(meta_extras
or [])
1224 if ':*:' in meta_extras:
1227 meta_extras |= set([
':test:',
':build:',
':dev:'])
1230 dist = odist = requirement
1233 dist = odist = self.locator.
locate(requirement,
1234 prereleases=prereleases)
1236 raise DistlibException(
'Unable to locate %r' % requirement)
1241 install_dists = set([odist])
1245 if name
not in self.dists_by_name:
1246 self.add_distribution(dist)
1249 other = self.dists_by_name[name]
1251 self.try_to_replace(dist, other, problems)
1256 if meta_extras
and dist
in install_dists:
1257 for key
in (
'test',
'build',
'dev'):
1259 if e
in meta_extras:
1260 ereqts |=
getattr(dist,
'%s_requires' % key)
1261 all_reqts = ireqts | sreqts | ereqts
1263 providers = self.find_providers(r)
1266 provider = self.locator.
locate(r, prereleases=prereleases)
1269 if provider
is None and not prereleases:
1270 provider = self.locator.
locate(r, prereleases=
True)
1271 if provider
is None:
1276 if (n, v)
not in self.dists:
1279 if r
in ireqts
and dist
in install_dists:
1285 if name
not in self.dists_by_name:
1286 self.reqts.setdefault(p, set()).add(r)
1288 other = self.dists_by_name[name]
1291 self.try_to_replace(p, other, problems)
1293 dists = set(self.dists.values())
1300 return dists, problems
__init__(self, *locators, **kwargs)
get_distribution_names(self)
get_distribution_names(self)
should_include(self, filename, parent)
__init__(self, path, **kwargs)
__init__(self, distpath, **kwargs)
get_distribution_names(self)
tuple excluded_extensions
tuple downloadable_extensions
get_distribution_names(self)
_update_version_data(self, result, info)
locate(self, requirement, prereleases=False)
__init__(self, scheme='default')
prefer_url(self, url1, url2)
convert_url_to_download_info(self, url, project_name)
split_filename(self, filename, project_name)
__init__(self, data, url)
__init__(self, url, **kwargs)
get_distribution_names(self)
__init__(self, url, **kwargs)
get_distribution_names(self)
http_error_302(self, req, fp, code, msg, headers)
__init__(self, url, timeout=None, num_workers=10, **kwargs)
get_distribution_names(self)
_is_platform_dependent(self, url)
_should_queue(self, link, referrer, rel)
_process_download(self, url)
get_all_distribution_names(url=None)