Fix HTTP resume (#84)

* Fix HTTP resume

* Handle error 416 when resuming complete file

* Line break for comment

* Fix logic

* ensure err.output is digits

* amend comment

* Support retry on error 412
This commit is contained in:
Craig Davison 2021-02-15 18:14:52 -07:00 committed by GitHub
parent 8b4287d7b4
commit 03ef2ba0fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -264,22 +264,39 @@ def replicate_url(full_url,
options = '-fL' options = '-fL'
else: else:
options = '-sfL' options = '-sfL'
need_download = True
while need_download:
curl_cmd = ['/usr/bin/curl', options, curl_cmd = ['/usr/bin/curl', options,
'--create-dirs', '--create-dirs',
'-o', local_file_path] '-o', local_file_path,
'-w', '%{http_code}']
if not full_url.endswith(".gz"): if not full_url.endswith(".gz"):
# stupid hack for stupid Apple behavior where it sometimes returns # stupid hack for stupid Apple behavior where it sometimes returns
# compressed files even when not asked for # compressed files even when not asked for
curl_cmd.append('--compressed') curl_cmd.append('--compressed')
resumed = False
if not ignore_cache and os.path.exists(local_file_path): if not ignore_cache and os.path.exists(local_file_path):
if not attempt_resume:
curl_cmd.extend(['-z', local_file_path]) curl_cmd.extend(['-z', local_file_path])
if attempt_resume: else:
curl_cmd.extend(['-C', '-']) resumed = True
curl_cmd.extend(['-z', '-' + local_file_path, '-C', '-'])
curl_cmd.append(full_url) curl_cmd.append(full_url)
print("Downloading %s..." % full_url) print("Downloading %s..." % full_url)
need_download = False
try: try:
subprocess.check_call(curl_cmd) output = subprocess.check_output(curl_cmd)
except subprocess.CalledProcessError as err: except subprocess.CalledProcessError as err:
if not resumed or not err.output.isdigit():
raise ReplicationError(err)
# HTTP error 416 on resume: the download is already complete and the
# file is up-to-date
# HTTP error 412 on resume: the file was updated server-side
if int(err.output) == 412:
print("Removing %s and retrying." % local_file_path)
os.unlink(local_file_path)
need_download = True
elif int(err.output) != 416:
raise ReplicationError(err) raise ReplicationError(err)
return local_file_path return local_file_path