tools/helpers/http.py

   1 # Copyright 2021 Oliver Smith
   2 # SPDX-License-Identifier: GPL-3.0-or-later
   3 import hashlib
   4 import json
   5 import logging
   6 import os
   7 import shutil
   8 import urllib.request
   9
  10 import tools.helpers.run
  11
  12
  13 def download(args, url, prefix, cache=True, loglevel=logging.INFO,
  14              allow_404=False):
  15     """ Download a file to disk.
  16
  17         :param url: the http(s) address of to the file to download
  18         :param prefix: for the cache, to make it easier to find (cache files
  19                        get a hash of the URL after the prefix)
  20         :param cache: if True, and url is cached, do not download it again
  21         :param loglevel: change to logging.DEBUG to only display the download
  22                          message in 'waydroid log', not in stdout. We use
  23                          this when downloading many APKINDEX files at once, no
  24                          point in showing a dozen messages.
  25         :param allow_404: do not raise an exception when the server responds
  26                           with a 404 Not Found error. Only display a warning on
  27                           stdout (no matter if loglevel is changed).
  28         :returns: path to the downloaded file in the cache or None on 404 """
  29     # Create cache folder
  30     if not os.path.exists(args.work + "/cache_http"):
  31         tools.helpers.run.user(args, ["mkdir", "-p", args.work + "/cache_http"])
  32
  33     # Check if file exists in cache
  34     prefix = prefix.replace("/", "_")
  35     path = (args.work + "/cache_http/" + prefix + "_" +
  36             hashlib.sha256(url.encode("utf-8")).hexdigest())
  37     if os.path.exists(path):
  38         if cache:
  39             return path
  40         tools.helpers.run.user(args, ["rm", path])
  41
  42     # Download the file
  43     logging.log(loglevel, "Download " + url)
  44     try:
  45         with urllib.request.urlopen(url) as response:
  46             with open(path, "wb") as handle:
  47                 shutil.copyfileobj(response, handle)
  48     # Handle 404
  49     except urllib.error.HTTPError as e:
  50         if e.code == 404 and allow_404:
  51             logging.warning("WARNING: file not found: " + url)
  52             return None
  53         raise
  54
  55     # Return path in cache
  56     return path
  57
  58
  59 def retrieve(url, headers=None, allow_404=False):
  60     """ Fetch the content of a URL and returns it as string.
  61
  62         :param url: the http(s) address of to the resource to fetch
  63         :param headers: dict of HTTP headers to use
  64         :param allow_404: do not raise an exception when the server responds
  65                           with a 404 Not Found error. Only display a warning
  66         :returns: str with the content of the response
  67     """
  68     # Download the file
  69     logging.verbose("Retrieving " + url)
  70
  71     if headers is None:
  72         headers = {}
  73
  74     req = urllib.request.Request(url, headers=headers)
  75     try:
  76         with urllib.request.urlopen(req) as response:
  77             return response.read()
  78     # Handle 404
  79     except urllib.error.HTTPError as e:
  80         if e.code == 404 and allow_404:
  81             logging.warning("WARNING: failed to retrieve content from: " + url)
  82             return None
  83         raise
  84
  85
  86 def retrieve_json(*args, **kwargs):
  87     """ Fetch the contents of a URL, parse it as JSON and return it. See
  88         retrieve() for the list of all parameters. """
  89     return json.loads(retrieve(*args, **kwargs))