tools/helpers/http.py

   1 # Copyright 2021 Oliver Smith
   2 # SPDX-License-Identifier: GPL-3.0-or-later
   3 import hashlib
   4 import logging
   5 import os
   6 import shutil
   7 import threading
   8 import urllib.request
   9
  10 import tools.helpers.run
  11 import time
  12
  13
  14 def download(args, url, prefix, cache=True, loglevel=logging.INFO,
  15              allow_404=False):
  16     """ Download a file to disk.
  17
  18         :param url: the http(s) address of to the file to download
  19         :param prefix: for the cache, to make it easier to find (cache files
  20                        get a hash of the URL after the prefix)
  21         :param cache: if True, and url is cached, do not download it again
  22         :param loglevel: change to logging.DEBUG to only display the download
  23                          message in 'waydroid log', not in stdout. We use
  24                          this when downloading many APKINDEX files at once, no
  25                          point in showing a dozen messages.
  26         :param allow_404: do not raise an exception when the server responds
  27                           with a 404 Not Found error. Only display a warning on
  28                           stdout (no matter if loglevel is changed).
  29         :returns: path to the downloaded file in the cache or None on 404 """
  30
  31     # Show progress while downloading
  32     downloadEnded = False
  33     def progress(totalSize, destinationPath):
  34         while not downloadEnded:
  35             print("[Downloading] {}/{}".format(os.path.getsize(destinationPath), totalSize), end='\r')
  36             time.sleep(.01)
  37
  38     # Create cache folder
  39     if not os.path.exists(args.work + "/cache_http"):
  40         tools.helpers.run.user(args, ["mkdir", "-p", args.work + "/cache_http"])
  41
  42     # Check if file exists in cache
  43     prefix = prefix.replace("/", "_")
  44     path = (args.work + "/cache_http/" + prefix + "_" +
  45             hashlib.sha256(url.encode("utf-8")).hexdigest())
  46     if os.path.exists(path):
  47         if cache:
  48             return path
  49         tools.helpers.run.user(args, ["rm", path])
  50
  51     # Download the file
  52     logging.log(loglevel, "Download " + url)
  53     try:
  54         with urllib.request.urlopen(url) as response:
  55             with open(path, "wb") as handle:
  56                 threading.Thread(target=progress, args=(response.headers.get('content-length'), path)).start()
  57                 shutil.copyfileobj(response, handle)
  58     # Handle 404
  59     except urllib.error.HTTPError as e:
  60         if e.code == 404 and allow_404:
  61             logging.warning("WARNING: file not found: " + url)
  62             return None
  63         raise
  64     downloadEnded = True
  65
  66     # Return path in cache
  67     return path
  68
  69
  70 def retrieve(url, headers=None):
  71     """ Fetch the content of a URL and returns it as string.
  72
  73         :param url: the http(s) address of to the resource to fetch
  74         :param headers: dict of HTTP headers to use
  75         :returns: status and str with the content of the response
  76     """
  77     # Download the file
  78     logging.verbose("Retrieving " + url)
  79
  80     if headers is None:
  81         headers = {}
  82
  83     req = urllib.request.Request(url, headers=headers)
  84     try:
  85         with urllib.request.urlopen(req) as response:
  86             return 200, response.read()
  87     # Handle 404
  88     except urllib.error.HTTPError as e:
  89         return e.code, ""