tools/helpers/http.py

   1 # Copyright 2021 Oliver Smith
   2 # SPDX-License-Identifier: GPL-3.0-or-later
   3 import hashlib
   4 import logging
   5 import os
   6 import shutil
   7 import threading
   8 import urllib.request
   9
  10 import tools.helpers.run
  11 import time
  12
  13
  14 def download(args, url, prefix, cache=True, loglevel=logging.INFO,
  15              allow_404=False):
  16     """ Download a file to disk.
  17
  18         :param url: the http(s) address of to the file to download
  19         :param prefix: for the cache, to make it easier to find (cache files
  20                        get a hash of the URL after the prefix)
  21         :param cache: if True, and url is cached, do not download it again
  22         :param loglevel: change to logging.DEBUG to only display the download
  23                          message in 'waydroid log', not in stdout. We use
  24                          this when downloading many APKINDEX files at once, no
  25                          point in showing a dozen messages.
  26         :param allow_404: do not raise an exception when the server responds
  27                           with a 404 Not Found error. Only display a warning on
  28                           stdout (no matter if loglevel is changed).
  29         :returns: path to the downloaded file in the cache or None on 404 """
  30
  31     # helper functions for progress
  32     def fromBytesToMB(numBytes, decimalPlaces=2):
  33         return round(int(numBytes)/1000000, decimalPlaces)
  34
  35     def getDownloadSpeed(lastSize, currentSize, timeTaken, decimalPlaces=2):
  36         # sizes are in mb and timeTaken in seconds
  37         speedUnit = "mbps"
  38         sizeDifference = currentSize-lastSize
  39
  40         if sizeDifference < 1:
  41             # sizeDifference is less than 1 mb
  42             # convert sizeDifference to kb and speedUnit to kbps,
  43             # for better readability
  44             sizeDifference*=1000
  45             speedUnit = "kbps"
  46
  47         # sizeDifference mb(or kb) was downloaded in timeTaken seconds
  48         # so downloadSpeed = sizeDifference/timeTaken mbps(or kbps)
  49         return (round(sizeDifference/timeTaken, decimalPlaces), speedUnit)
  50
  51     # Show progress while downloading
  52     downloadEnded = False
  53     def progress(totalSize, destinationPath):
  54         # convert totalSize to mb before hand,
  55         # it's value won't change inside while loop and
  56         # will be unnecessarily calculated every .01 seconds
  57         totalSize = fromBytesToMB(totalSize)
  58
  59         # this value will be used to figure out maximum chars
  60         # required to denote downloaded size later on
  61         totalSizeStrLen = len(str(totalSize))
  62
  63         # lastSize and lastSizeChangeAt is used to calculate speed
  64         lastSize = 0
  65         lastSizeChangeAt = time.time()
  66
  67         downloadSpeed = 0, "mbps"
  68
  69         while not downloadEnded:
  70             currentSize = fromBytesToMB(os.path.getsize(destinationPath))
  71
  72             if currentSize != lastSize:
  73                 sizeChangeAt = time.time()
  74                 downloadSpeed = getDownloadSpeed(
  75                     lastSize, currentSize,
  76                     timeTaken=sizeChangeAt-lastSizeChangeAt
  77                 )
  78
  79                 lastSize = currentSize
  80                 lastSizeChangeAt = sizeChangeAt
  81
  82                 # make currentSize and downloadSpeed of a fix max len,
  83                 # to avoid previously printed chars to appear while \
  84                 # printing recursively
  85                 # currentSize is not going to exceed totalSize
  86                 currentSize = str(currentSize).rjust(totalSizeStrLen)
  87                 # assuming max downloadSpeed to be 9999.99 mbps
  88                 downloadSpeed = f"{str(downloadSpeed[0]).rjust(7)} {downloadSpeed[1]}"
  89
  90                 # print progress bar
  91                 print(f"\r[Downloading] {currentSize} MB/{totalSize} MB    {downloadSpeed}(approx.)", end=" ")
  92             time.sleep(.01)
  93
  94     # Create cache folder
  95     if not os.path.exists(args.work + "/cache_http"):
  96         tools.helpers.run.user(args, ["mkdir", "-p", args.work + "/cache_http"])
  97
  98     # Check if file exists in cache
  99     prefix = prefix.replace("/", "_")
 100     path = (args.work + "/cache_http/" + prefix + "_" +
 101             hashlib.sha256(url.encode("utf-8")).hexdigest())
 102     if os.path.exists(path):
 103         if cache:
 104             return path
 105         tools.helpers.run.user(args, ["rm", path])
 106
 107     # Download the file
 108     logging.log(loglevel, "Downloading " + url)
 109     try:
 110         with urllib.request.urlopen(url) as response:
 111             with open(path, "wb") as handle:
 112                 # adding daemon=True will kill this thread if main thread is killed
 113                 # else progress_bar will continue to show even if user cancels download by ctrl+c
 114                 threading.Thread(target=progress, args=(response.headers.get('content-length'), path), daemon=True).start()
 115                 shutil.copyfileobj(response, handle)
 116     # Handle 404
 117     except urllib.error.HTTPError as e:
 118         if e.code == 404 and allow_404:
 119             logging.warning("WARNING: file not found: " + url)
 120             return None
 121         raise
 122     downloadEnded = True
 123
 124     # Return path in cache
 125     return path
 126
 127
 128 def retrieve(url, headers=None):
 129     """ Fetch the content of a URL and returns it as string.
 130
 131         :param url: the http(s) address of to the resource to fetch
 132         :param headers: dict of HTTP headers to use
 133         :returns: status and str with the content of the response
 134     """
 135     # Download the file
 136     logging.verbose("Retrieving " + url)
 137
 138     if headers is None:
 139         headers = {}
 140
 141     try:
 142         req = urllib.request.Request(url, headers=headers)
 143         with urllib.request.urlopen(req) as response:
 144             return 200, response.read()
 145     # Handle malformed URL
 146     except ValueError as e:
 147         return -1, ""
 148     # Handle 404
 149     except urllib.error.HTTPError as e:
 150         return e.code, ""