Fix HTTP resume (#84)

* Fix HTTP resume

* Handle error 416 when resuming complete file

* Line break for comment

* Fix logic

* ensure err.output is digits

* amend comment

* Support retry on error 412
This commit is contained in:
Craig Davison 2021-02-15 18:14:52 -07:00 committed by GitHub
parent 8b4287d7b4
commit 03ef2ba0fe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -264,23 +264,40 @@ def replicate_url(full_url,
options = '-fL'
else:
options = '-sfL'
curl_cmd = ['/usr/bin/curl', options,
'--create-dirs',
'-o', local_file_path]
if not full_url.endswith(".gz"):
# stupid hack for stupid Apple behavior where it sometimes returns
# compressed files even when not asked for
curl_cmd.append('--compressed')
if not ignore_cache and os.path.exists(local_file_path):
curl_cmd.extend(['-z', local_file_path])
if attempt_resume:
curl_cmd.extend(['-C', '-'])
curl_cmd.append(full_url)
print("Downloading %s..." % full_url)
try:
subprocess.check_call(curl_cmd)
except subprocess.CalledProcessError as err:
raise ReplicationError(err)
need_download = True
while need_download:
curl_cmd = ['/usr/bin/curl', options,
'--create-dirs',
'-o', local_file_path,
'-w', '%{http_code}']
if not full_url.endswith(".gz"):
# stupid hack for stupid Apple behavior where it sometimes returns
# compressed files even when not asked for
curl_cmd.append('--compressed')
resumed = False
if not ignore_cache and os.path.exists(local_file_path):
if not attempt_resume:
curl_cmd.extend(['-z', local_file_path])
else:
resumed = True
curl_cmd.extend(['-z', '-' + local_file_path, '-C', '-'])
curl_cmd.append(full_url)
print("Downloading %s..." % full_url)
need_download = False
try:
output = subprocess.check_output(curl_cmd)
except subprocess.CalledProcessError as err:
if not resumed or not err.output.isdigit():
raise ReplicationError(err)
# HTTP error 416 on resume: the download is already complete and the
# file is up-to-date
# HTTP error 412 on resume: the file was updated server-side
if int(err.output) == 412:
print("Removing %s and retrying." % local_file_path)
os.unlink(local_file_path)
need_download = True
elif int(err.output) != 416:
raise ReplicationError(err)
return local_file_path