summaryrefslogtreecommitdiff
path: root/lib/python2.7/site-packages/pip/download.py
diff options
context:
space:
mode:
Diffstat (limited to 'lib/python2.7/site-packages/pip/download.py')
-rw-r--r--lib/python2.7/site-packages/pip/download.py906
1 files changed, 906 insertions, 0 deletions
diff --git a/lib/python2.7/site-packages/pip/download.py b/lib/python2.7/site-packages/pip/download.py
new file mode 100644
index 0000000..54d3131
--- /dev/null
+++ b/lib/python2.7/site-packages/pip/download.py
@@ -0,0 +1,906 @@
+from __future__ import absolute_import
+
+import cgi
+import email.utils
+import getpass
+import json
+import logging
+import mimetypes
+import os
+import platform
+import re
+import shutil
+import sys
+import tempfile
+
+try:
+ import ssl # noqa
+ HAS_TLS = True
+except ImportError:
+ HAS_TLS = False
+
+from pip._vendor.six.moves.urllib import parse as urllib_parse
+from pip._vendor.six.moves.urllib import request as urllib_request
+
+import pip
+
+from pip.exceptions import InstallationError, HashMismatch
+from pip.models import PyPI
+from pip.utils import (splitext, rmtree, format_size, display_path,
+ backup_dir, ask_path_exists, unpack_file,
+ ARCHIVE_EXTENSIONS, consume, call_subprocess)
+from pip.utils.encoding import auto_decode
+from pip.utils.filesystem import check_path_owner
+from pip.utils.logging import indent_log
+from pip.utils.setuptools_build import SETUPTOOLS_SHIM
+from pip.utils.glibc import libc_ver
+from pip.utils.ui import DownloadProgressBar, DownloadProgressSpinner
+from pip.locations import write_delete_marker_file
+from pip.vcs import vcs
+from pip._vendor import requests, six
+from pip._vendor.requests.adapters import BaseAdapter, HTTPAdapter
+from pip._vendor.requests.auth import AuthBase, HTTPBasicAuth
+from pip._vendor.requests.models import CONTENT_CHUNK_SIZE, Response
+from pip._vendor.requests.utils import get_netrc_auth
+from pip._vendor.requests.structures import CaseInsensitiveDict
+from pip._vendor.requests.packages import urllib3
+from pip._vendor.cachecontrol import CacheControlAdapter
+from pip._vendor.cachecontrol.caches import FileCache
+from pip._vendor.lockfile import LockError
+from pip._vendor.six.moves import xmlrpc_client
+
+
+__all__ = ['get_file_content',
+ 'is_url', 'url_to_path', 'path_to_url',
+ 'is_archive_file', 'unpack_vcs_link',
+ 'unpack_file_url', 'is_vcs_url', 'is_file_url',
+ 'unpack_http_url', 'unpack_url']
+
+
+logger = logging.getLogger(__name__)
+
+
+def user_agent():
+ """
+ Return a string representing the user agent.
+ """
+ data = {
+ "installer": {"name": "pip", "version": pip.__version__},
+ "python": platform.python_version(),
+ "implementation": {
+ "name": platform.python_implementation(),
+ },
+ }
+
+ if data["implementation"]["name"] == 'CPython':
+ data["implementation"]["version"] = platform.python_version()
+ elif data["implementation"]["name"] == 'PyPy':
+ if sys.pypy_version_info.releaselevel == 'final':
+ pypy_version_info = sys.pypy_version_info[:3]
+ else:
+ pypy_version_info = sys.pypy_version_info
+ data["implementation"]["version"] = ".".join(
+ [str(x) for x in pypy_version_info]
+ )
+ elif data["implementation"]["name"] == 'Jython':
+ # Complete Guess
+ data["implementation"]["version"] = platform.python_version()
+ elif data["implementation"]["name"] == 'IronPython':
+ # Complete Guess
+ data["implementation"]["version"] = platform.python_version()
+
+ if sys.platform.startswith("linux"):
+ from pip._vendor import distro
+ distro_infos = dict(filter(
+ lambda x: x[1],
+ zip(["name", "version", "id"], distro.linux_distribution()),
+ ))
+ libc = dict(filter(
+ lambda x: x[1],
+ zip(["lib", "version"], libc_ver()),
+ ))
+ if libc:
+ distro_infos["libc"] = libc
+ if distro_infos:
+ data["distro"] = distro_infos
+
+ if sys.platform.startswith("darwin") and platform.mac_ver()[0]:
+ data["distro"] = {"name": "macOS", "version": platform.mac_ver()[0]}
+
+ if platform.system():
+ data.setdefault("system", {})["name"] = platform.system()
+
+ if platform.release():
+ data.setdefault("system", {})["release"] = platform.release()
+
+ if platform.machine():
+ data["cpu"] = platform.machine()
+
+ # Python 2.6 doesn't have ssl.OPENSSL_VERSION.
+ if HAS_TLS and sys.version_info[:2] > (2, 6):
+ data["openssl_version"] = ssl.OPENSSL_VERSION
+
+ return "{data[installer][name]}/{data[installer][version]} {json}".format(
+ data=data,
+ json=json.dumps(data, separators=(",", ":"), sort_keys=True),
+ )
+
+
+class MultiDomainBasicAuth(AuthBase):
+
+ def __init__(self, prompting=True):
+ self.prompting = prompting
+ self.passwords = {}
+
+ def __call__(self, req):
+ parsed = urllib_parse.urlparse(req.url)
+
+ # Get the netloc without any embedded credentials
+ netloc = parsed.netloc.rsplit("@", 1)[-1]
+
+ # Set the url of the request to the url without any credentials
+ req.url = urllib_parse.urlunparse(parsed[:1] + (netloc,) + parsed[2:])
+
+ # Use any stored credentials that we have for this netloc
+ username, password = self.passwords.get(netloc, (None, None))
+
+ # Extract credentials embedded in the url if we have none stored
+ if username is None:
+ username, password = self.parse_credentials(parsed.netloc)
+
+ # Get creds from netrc if we still don't have them
+ if username is None and password is None:
+ netrc_auth = get_netrc_auth(req.url)
+ username, password = netrc_auth if netrc_auth else (None, None)
+
+ if username or password:
+ # Store the username and password
+ self.passwords[netloc] = (username, password)
+
+ # Send the basic auth with this request
+ req = HTTPBasicAuth(username or "", password or "")(req)
+
+ # Attach a hook to handle 401 responses
+ req.register_hook("response", self.handle_401)
+
+ return req
+
+ def handle_401(self, resp, **kwargs):
+ # We only care about 401 responses, anything else we want to just
+ # pass through the actual response
+ if resp.status_code != 401:
+ return resp
+
+ # We are not able to prompt the user so simply return the response
+ if not self.prompting:
+ return resp
+
+ parsed = urllib_parse.urlparse(resp.url)
+
+ # Prompt the user for a new username and password
+ username = six.moves.input("User for %s: " % parsed.netloc)
+ password = getpass.getpass("Password: ")
+
+ # Store the new username and password to use for future requests
+ if username or password:
+ self.passwords[parsed.netloc] = (username, password)
+
+ # Consume content and release the original connection to allow our new
+ # request to reuse the same one.
+ resp.content
+ resp.raw.release_conn()
+
+ # Add our new username and password to the request
+ req = HTTPBasicAuth(username or "", password or "")(resp.request)
+
+ # Send our new request
+ new_resp = resp.connection.send(req, **kwargs)
+ new_resp.history.append(resp)
+
+ return new_resp
+
+ def parse_credentials(self, netloc):
+ if "@" in netloc:
+ userinfo = netloc.rsplit("@", 1)[0]
+ if ":" in userinfo:
+ return userinfo.split(":", 1)
+ return userinfo, None
+ return None, None
+
+
+class LocalFSAdapter(BaseAdapter):
+
+ def send(self, request, stream=None, timeout=None, verify=None, cert=None,
+ proxies=None):
+ pathname = url_to_path(request.url)
+
+ resp = Response()
+ resp.status_code = 200
+ resp.url = request.url
+
+ try:
+ stats = os.stat(pathname)
+ except OSError as exc:
+ resp.status_code = 404
+ resp.raw = exc
+ else:
+ modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
+ content_type = mimetypes.guess_type(pathname)[0] or "text/plain"
+ resp.headers = CaseInsensitiveDict({
+ "Content-Type": content_type,
+ "Content-Length": stats.st_size,
+ "Last-Modified": modified,
+ })
+
+ resp.raw = open(pathname, "rb")
+ resp.close = resp.raw.close
+
+ return resp
+
+ def close(self):
+ pass
+
+
+class SafeFileCache(FileCache):
+ """
+ A file based cache which is safe to use even when the target directory may
+ not be accessible or writable.
+ """
+
+ def __init__(self, *args, **kwargs):
+ super(SafeFileCache, self).__init__(*args, **kwargs)
+
+ # Check to ensure that the directory containing our cache directory
+ # is owned by the user current executing pip. If it does not exist
+ # we will check the parent directory until we find one that does exist.
+ # If it is not owned by the user executing pip then we will disable
+ # the cache and log a warning.
+ if not check_path_owner(self.directory):
+ logger.warning(
+ "The directory '%s' or its parent directory is not owned by "
+ "the current user and the cache has been disabled. Please "
+ "check the permissions and owner of that directory. If "
+ "executing pip with sudo, you may want sudo's -H flag.",
+ self.directory,
+ )
+
+ # Set our directory to None to disable the Cache
+ self.directory = None
+
+ def get(self, *args, **kwargs):
+ # If we don't have a directory, then the cache should be a no-op.
+ if self.directory is None:
+ return
+
+ try:
+ return super(SafeFileCache, self).get(*args, **kwargs)
+ except (LockError, OSError, IOError):
+ # We intentionally silence this error, if we can't access the cache
+ # then we can just skip caching and process the request as if
+ # caching wasn't enabled.
+ pass
+
+ def set(self, *args, **kwargs):
+ # If we don't have a directory, then the cache should be a no-op.
+ if self.directory is None:
+ return
+
+ try:
+ return super(SafeFileCache, self).set(*args, **kwargs)
+ except (LockError, OSError, IOError):
+ # We intentionally silence this error, if we can't access the cache
+ # then we can just skip caching and process the request as if
+ # caching wasn't enabled.
+ pass
+
+ def delete(self, *args, **kwargs):
+ # If we don't have a directory, then the cache should be a no-op.
+ if self.directory is None:
+ return
+
+ try:
+ return super(SafeFileCache, self).delete(*args, **kwargs)
+ except (LockError, OSError, IOError):
+ # We intentionally silence this error, if we can't access the cache
+ # then we can just skip caching and process the request as if
+ # caching wasn't enabled.
+ pass
+
+
+class InsecureHTTPAdapter(HTTPAdapter):
+
+ def cert_verify(self, conn, url, verify, cert):
+ conn.cert_reqs = 'CERT_NONE'
+ conn.ca_certs = None
+
+
+class PipSession(requests.Session):
+
+ timeout = None
+
+ def __init__(self, *args, **kwargs):
+ retries = kwargs.pop("retries", 0)
+ cache = kwargs.pop("cache", None)
+ insecure_hosts = kwargs.pop("insecure_hosts", [])
+
+ super(PipSession, self).__init__(*args, **kwargs)
+
+ # Attach our User Agent to the request
+ self.headers["User-Agent"] = user_agent()
+
+ # Attach our Authentication handler to the session
+ self.auth = MultiDomainBasicAuth()
+
+ # Create our urllib3.Retry instance which will allow us to customize
+ # how we handle retries.
+ retries = urllib3.Retry(
+ # Set the total number of retries that a particular request can
+ # have.
+ total=retries,
+
+ # A 503 error from PyPI typically means that the Fastly -> Origin
+ # connection got interrupted in some way. A 503 error in general
+ # is typically considered a transient error so we'll go ahead and
+ # retry it.
+ status_forcelist=[503],
+
+ # Add a small amount of back off between failed requests in
+ # order to prevent hammering the service.
+ backoff_factor=0.25,
+ )
+
+ # We want to _only_ cache responses on securely fetched origins. We do
+ # this because we can't validate the response of an insecurely fetched
+ # origin, and we don't want someone to be able to poison the cache and
+ # require manual eviction from the cache to fix it.
+ if cache:
+ secure_adapter = CacheControlAdapter(
+ cache=SafeFileCache(cache, use_dir_lock=True),
+ max_retries=retries,
+ )
+ else:
+ secure_adapter = HTTPAdapter(max_retries=retries)
+
+ # Our Insecure HTTPAdapter disables HTTPS validation. It does not
+ # support caching (see above) so we'll use it for all http:// URLs as
+ # well as any https:// host that we've marked as ignoring TLS errors
+ # for.
+ insecure_adapter = InsecureHTTPAdapter(max_retries=retries)
+
+ self.mount("https://", secure_adapter)
+ self.mount("http://", insecure_adapter)
+
+ # Enable file:// urls
+ self.mount("file://", LocalFSAdapter())
+
+ # We want to use a non-validating adapter for any requests which are
+ # deemed insecure.
+ for host in insecure_hosts:
+ self.mount("https://{0}/".format(host), insecure_adapter)
+
+ def request(self, method, url, *args, **kwargs):
+ # Allow setting a default timeout on a session
+ kwargs.setdefault("timeout", self.timeout)
+
+ # Dispatch the actual request
+ return super(PipSession, self).request(method, url, *args, **kwargs)
+
+
+def get_file_content(url, comes_from=None, session=None):
+ """Gets the content of a file; it may be a filename, file: URL, or
+ http: URL. Returns (location, content). Content is unicode."""
+ if session is None:
+ raise TypeError(
+ "get_file_content() missing 1 required keyword argument: 'session'"
+ )
+
+ match = _scheme_re.search(url)
+ if match:
+ scheme = match.group(1).lower()
+ if (scheme == 'file' and comes_from and
+ comes_from.startswith('http')):
+ raise InstallationError(
+ 'Requirements file %s references URL %s, which is local'
+ % (comes_from, url))
+ if scheme == 'file':
+ path = url.split(':', 1)[1]
+ path = path.replace('\\', '/')
+ match = _url_slash_drive_re.match(path)
+ if match:
+ path = match.group(1) + ':' + path.split('|', 1)[1]
+ path = urllib_parse.unquote(path)
+ if path.startswith('/'):
+ path = '/' + path.lstrip('/')
+ url = path
+ else:
+ # FIXME: catch some errors
+ resp = session.get(url)
+ resp.raise_for_status()
+ return resp.url, resp.text
+ try:
+ with open(url, 'rb') as f:
+ content = auto_decode(f.read())
+ except IOError as exc:
+ raise InstallationError(
+ 'Could not open requirements file: %s' % str(exc)
+ )
+ return url, content
+
+
+_scheme_re = re.compile(r'^(http|https|file):', re.I)
+_url_slash_drive_re = re.compile(r'/*([a-z])\|', re.I)
+
+
+def is_url(name):
+ """Returns true if the name looks like a URL"""
+ if ':' not in name:
+ return False
+ scheme = name.split(':', 1)[0].lower()
+ return scheme in ['http', 'https', 'file', 'ftp'] + vcs.all_schemes
+
+
+def url_to_path(url):
+ """
+ Convert a file: URL to a path.
+ """
+ assert url.startswith('file:'), (
+ "You can only turn file: urls into filenames (not %r)" % url)
+
+ _, netloc, path, _, _ = urllib_parse.urlsplit(url)
+
+ # if we have a UNC path, prepend UNC share notation
+ if netloc:
+ netloc = '\\\\' + netloc
+
+ path = urllib_request.url2pathname(netloc + path)
+ return path
+
+
+def path_to_url(path):
+ """
+ Convert a path to a file: URL. The path will be made absolute and have
+ quoted path parts.
+ """
+ path = os.path.normpath(os.path.abspath(path))
+ url = urllib_parse.urljoin('file:', urllib_request.pathname2url(path))
+ return url
+
+
+def is_archive_file(name):
+ """Return True if `name` is a considered as an archive file."""
+ ext = splitext(name)[1].lower()
+ if ext in ARCHIVE_EXTENSIONS:
+ return True
+ return False
+
+
+def unpack_vcs_link(link, location):
+ vcs_backend = _get_used_vcs_backend(link)
+ vcs_backend.unpack(location)
+
+
+def _get_used_vcs_backend(link):
+ for backend in vcs.backends:
+ if link.scheme in backend.schemes:
+ vcs_backend = backend(link.url)
+ return vcs_backend
+
+
+def is_vcs_url(link):
+ return bool(_get_used_vcs_backend(link))
+
+
+def is_file_url(link):
+ return link.url.lower().startswith('file:')
+
+
+def is_dir_url(link):
+ """Return whether a file:// Link points to a directory.
+
+ ``link`` must not have any other scheme but file://. Call is_file_url()
+ first.
+
+ """
+ link_path = url_to_path(link.url_without_fragment)
+ return os.path.isdir(link_path)
+
+
+def _progress_indicator(iterable, *args, **kwargs):
+ return iterable
+
+
+def _download_url(resp, link, content_file, hashes):
+ try:
+ total_length = int(resp.headers['content-length'])
+ except (ValueError, KeyError, TypeError):
+ total_length = 0
+
+ cached_resp = getattr(resp, "from_cache", False)
+
+ if logger.getEffectiveLevel() > logging.INFO:
+ show_progress = False
+ elif cached_resp:
+ show_progress = False
+ elif total_length > (40 * 1000):
+ show_progress = True
+ elif not total_length:
+ show_progress = True
+ else:
+ show_progress = False
+
+ show_url = link.show_url
+
+ def resp_read(chunk_size):
+ try:
+ # Special case for urllib3.
+ for chunk in resp.raw.stream(
+ chunk_size,
+ # We use decode_content=False here because we don't
+ # want urllib3 to mess with the raw bytes we get
+ # from the server. If we decompress inside of
+ # urllib3 then we cannot verify the checksum
+ # because the checksum will be of the compressed
+ # file. This breakage will only occur if the
+ # server adds a Content-Encoding header, which
+ # depends on how the server was configured:
+ # - Some servers will notice that the file isn't a
+ # compressible file and will leave the file alone
+ # and with an empty Content-Encoding
+ # - Some servers will notice that the file is
+ # already compressed and will leave the file
+ # alone and will add a Content-Encoding: gzip
+ # header
+ # - Some servers won't notice anything at all and
+ # will take a file that's already been compressed
+ # and compress it again and set the
+ # Content-Encoding: gzip header
+ #
+ # By setting this not to decode automatically we
+ # hope to eliminate problems with the second case.
+ decode_content=False):
+ yield chunk
+ except AttributeError:
+ # Standard file-like object.
+ while True:
+ chunk = resp.raw.read(chunk_size)
+ if not chunk:
+ break
+ yield chunk
+
+ def written_chunks(chunks):
+ for chunk in chunks:
+ content_file.write(chunk)
+ yield chunk
+
+ progress_indicator = _progress_indicator
+
+ if link.netloc == PyPI.netloc:
+ url = show_url
+ else:
+ url = link.url_without_fragment
+
+ if show_progress: # We don't show progress on cached responses
+ if total_length:
+ logger.info("Downloading %s (%s)", url, format_size(total_length))
+ progress_indicator = DownloadProgressBar(max=total_length).iter
+ else:
+ logger.info("Downloading %s", url)
+ progress_indicator = DownloadProgressSpinner().iter
+ elif cached_resp:
+ logger.info("Using cached %s", url)
+ else:
+ logger.info("Downloading %s", url)
+
+ logger.debug('Downloading from URL %s', link)
+
+ downloaded_chunks = written_chunks(
+ progress_indicator(
+ resp_read(CONTENT_CHUNK_SIZE),
+ CONTENT_CHUNK_SIZE
+ )
+ )
+ if hashes:
+ hashes.check_against_chunks(downloaded_chunks)
+ else:
+ consume(downloaded_chunks)
+
+
+def _copy_file(filename, location, link):
+ copy = True
+ download_location = os.path.join(location, link.filename)
+ if os.path.exists(download_location):
+ response = ask_path_exists(
+ 'The file %s exists. (i)gnore, (w)ipe, (b)ackup, (a)abort' %
+ display_path(download_location), ('i', 'w', 'b', 'a'))
+ if response == 'i':
+ copy = False
+ elif response == 'w':
+ logger.warning('Deleting %s', display_path(download_location))
+ os.remove(download_location)
+ elif response == 'b':
+ dest_file = backup_dir(download_location)
+ logger.warning(
+ 'Backing up %s to %s',
+ display_path(download_location),
+ display_path(dest_file),
+ )
+ shutil.move(download_location, dest_file)
+ elif response == 'a':
+ sys.exit(-1)
+ if copy:
+ shutil.copy(filename, download_location)
+ logger.info('Saved %s', display_path(download_location))
+
+
+def unpack_http_url(link, location, download_dir=None,
+ session=None, hashes=None):
+ if session is None:
+ raise TypeError(
+ "unpack_http_url() missing 1 required keyword argument: 'session'"
+ )
+
+ temp_dir = tempfile.mkdtemp('-unpack', 'pip-')
+
+ # If a download dir is specified, is the file already downloaded there?
+ already_downloaded_path = None
+ if download_dir:
+ already_downloaded_path = _check_download_dir(link,
+ download_dir,
+ hashes)
+
+ if already_downloaded_path:
+ from_path = already_downloaded_path
+ content_type = mimetypes.guess_type(from_path)[0]
+ else:
+ # let's download to a tmp dir
+ from_path, content_type = _download_http_url(link,
+ session,
+ temp_dir,
+ hashes)
+
+ # unpack the archive to the build dir location. even when only downloading
+ # archives, they have to be unpacked to parse dependencies
+ unpack_file(from_path, location, content_type, link)
+
+ # a download dir is specified; let's copy the archive there
+ if download_dir and not already_downloaded_path:
+ _copy_file(from_path, download_dir, link)
+
+ if not already_downloaded_path:
+ os.unlink(from_path)
+ rmtree(temp_dir)
+
+
+def unpack_file_url(link, location, download_dir=None, hashes=None):
+ """Unpack link into location.
+
+ If download_dir is provided and link points to a file, make a copy
+ of the link file inside download_dir.
+ """
+ link_path = url_to_path(link.url_without_fragment)
+
+ # If it's a url to a local directory
+ if is_dir_url(link):
+ if os.path.isdir(location):
+ rmtree(location)
+ shutil.copytree(link_path, location, symlinks=True)
+ if download_dir:
+ logger.info('Link is a directory, ignoring download_dir')
+ return
+
+ # If --require-hashes is off, `hashes` is either empty, the
+ # link's embedded hash, or MissingHashes; it is required to
+ # match. If --require-hashes is on, we are satisfied by any
+ # hash in `hashes` matching: a URL-based or an option-based
+ # one; no internet-sourced hash will be in `hashes`.
+ if hashes:
+ hashes.check_against_path(link_path)
+
+ # If a download dir is specified, is the file already there and valid?
+ already_downloaded_path = None
+ if download_dir:
+ already_downloaded_path = _check_download_dir(link,
+ download_dir,
+ hashes)
+
+ if already_downloaded_path:
+ from_path = already_downloaded_path
+ else:
+ from_path = link_path
+
+ content_type = mimetypes.guess_type(from_path)[0]
+
+ # unpack the archive to the build dir location. even when only downloading
+ # archives, they have to be unpacked to parse dependencies
+ unpack_file(from_path, location, content_type, link)
+
+ # a download dir is specified and not already downloaded
+ if download_dir and not already_downloaded_path:
+ _copy_file(from_path, download_dir, link)
+
+
+def _copy_dist_from_dir(link_path, location):
+ """Copy distribution files in `link_path` to `location`.
+
+ Invoked when user requests to install a local directory. E.g.:
+
+ pip install .
+ pip install ~/dev/git-repos/python-prompt-toolkit
+
+ """
+
+ # Note: This is currently VERY SLOW if you have a lot of data in the
+ # directory, because it copies everything with `shutil.copytree`.
+ # What it should really do is build an sdist and install that.
+ # See https://github.com/pypa/pip/issues/2195
+
+ if os.path.isdir(location):
+ rmtree(location)
+
+ # build an sdist
+ setup_py = 'setup.py'
+ sdist_args = [sys.executable]
+ sdist_args.append('-c')
+ sdist_args.append(SETUPTOOLS_SHIM % setup_py)
+ sdist_args.append('sdist')
+ sdist_args += ['--dist-dir', location]
+ logger.info('Running setup.py sdist for %s', link_path)
+
+ with indent_log():
+ call_subprocess(sdist_args, cwd=link_path, show_stdout=False)
+
+ # unpack sdist into `location`
+ sdist = os.path.join(location, os.listdir(location)[0])
+ logger.info('Unpacking sdist %s into %s', sdist, location)
+ unpack_file(sdist, location, content_type=None, link=None)
+
+
+class PipXmlrpcTransport(xmlrpc_client.Transport):
+ """Provide a `xmlrpclib.Transport` implementation via a `PipSession`
+ object.
+ """
+
+ def __init__(self, index_url, session, use_datetime=False):
+ xmlrpc_client.Transport.__init__(self, use_datetime)
+ index_parts = urllib_parse.urlparse(index_url)
+ self._scheme = index_parts.scheme
+ self._session = session
+
+ def request(self, host, handler, request_body, verbose=False):
+ parts = (self._scheme, host, handler, None, None, None)
+ url = urllib_parse.urlunparse(parts)
+ try:
+ headers = {'Content-Type': 'text/xml'}
+ response = self._session.post(url, data=request_body,
+ headers=headers, stream=True)
+ response.raise_for_status()
+ self.verbose = verbose
+ return self.parse_response(response.raw)
+ except requests.HTTPError as exc:
+ logger.critical(
+ "HTTP error %s while getting %s",
+ exc.response.status_code, url,
+ )
+ raise
+
+
+def unpack_url(link, location, download_dir=None,
+ only_download=False, session=None, hashes=None):
+ """Unpack link.
+ If link is a VCS link:
+ if only_download, export into download_dir and ignore location
+ else unpack into location
+ for other types of link:
+ - unpack into location
+ - if download_dir, copy the file into download_dir
+ - if only_download, mark location for deletion
+
+ :param hashes: A Hashes object, one of whose embedded hashes must match,
+ or HashMismatch will be raised. If the Hashes is empty, no matches are
+ required, and unhashable types of requirements (like VCS ones, which
+ would ordinarily raise HashUnsupported) are allowed.
+ """
+ # non-editable vcs urls
+ if is_vcs_url(link):
+ unpack_vcs_link(link, location)
+
+ # file urls
+ elif is_file_url(link):
+ unpack_file_url(link, location, download_dir, hashes=hashes)
+
+ # http urls
+ else:
+ if session is None:
+ session = PipSession()
+
+ unpack_http_url(
+ link,
+ location,
+ download_dir,
+ session,
+ hashes=hashes
+ )
+ if only_download:
+ write_delete_marker_file(location)
+
+
+def _download_http_url(link, session, temp_dir, hashes):
+ """Download link url into temp_dir using provided session"""
+ target_url = link.url.split('#', 1)[0]
+ try:
+ resp = session.get(
+ target_url,
+ # We use Accept-Encoding: identity here because requests
+ # defaults to accepting compressed responses. This breaks in
+ # a variety of ways depending on how the server is configured.
+ # - Some servers will notice that the file isn't a compressible
+ # file and will leave the file alone and with an empty
+ # Content-Encoding
+ # - Some servers will notice that the file is already
+ # compressed and will leave the file alone and will add a
+ # Content-Encoding: gzip header
+ # - Some servers won't notice anything at all and will take
+ # a file that's already been compressed and compress it again
+ # and set the Content-Encoding: gzip header
+ # By setting this to request only the identity encoding We're
+ # hoping to eliminate the third case. Hopefully there does not
+ # exist a server which when given a file will notice it is
+ # already compressed and that you're not asking for a
+ # compressed file and will then decompress it before sending
+ # because if that's the case I don't think it'll ever be
+ # possible to make this work.
+ headers={"Accept-Encoding": "identity"},
+ stream=True,
+ )
+ resp.raise_for_status()
+ except requests.HTTPError as exc:
+ logger.critical(
+ "HTTP error %s while getting %s", exc.response.status_code, link,
+ )
+ raise
+
+ content_type = resp.headers.get('content-type', '')
+ filename = link.filename # fallback
+ # Have a look at the Content-Disposition header for a better guess
+ content_disposition = resp.headers.get('content-disposition')
+ if content_disposition:
+ type, params = cgi.parse_header(content_disposition)
+ # We use ``or`` here because we don't want to use an "empty" value
+ # from the filename param.
+ filename = params.get('filename') or filename
+ ext = splitext(filename)[1]
+ if not ext:
+ ext = mimetypes.guess_extension(content_type)
+ if ext:
+ filename += ext
+ if not ext and link.url != resp.url:
+ ext = os.path.splitext(resp.url)[1]
+ if ext:
+ filename += ext
+ file_path = os.path.join(temp_dir, filename)
+ with open(file_path, 'wb') as content_file:
+ _download_url(resp, link, content_file, hashes)
+ return file_path, content_type
+
+
+def _check_download_dir(link, download_dir, hashes):
+ """ Check download_dir for previously downloaded file with correct hash
+ If a correct file is found return its path else None
+ """
+ download_path = os.path.join(download_dir, link.filename)
+ if os.path.exists(download_path):
+ # If already downloaded, does its hash match?
+ logger.info('File was already downloaded %s', download_path)
+ if hashes:
+ try:
+ hashes.check_against_path(download_path)
+ except HashMismatch:
+ logger.warning(
+ 'Previously-downloaded file %s has bad hash. '
+ 'Re-downloading.',
+ download_path
+ )
+ os.unlink(download_path)
+ return None
+ return download_path
+ return None