1
0
Fork 0

Improved error handling, retries for damaged files.

This commit is contained in:
Alexander Andreev 2020-11-19 01:26:19 +04:00
parent 21837c5335
commit 7b2fcf0899
1 changed files with 107 additions and 89 deletions

View File

@ -97,15 +97,33 @@ class Scraper:
filepath = join(self._save_directory, \
self._same_filename(f.name, self._save_directory))
try:
retries = 3
while retries > 0:
self._url_opener.retrieve(f.download_url, filepath)
if not self._check_file(f, filepath):
print(filepath, f.size, f.hash_value)
remove(filepath)
retries -= 1
else:
break
if is_same_filename:
f1_hexdig, f1_dig = self._hash_file(orig_filepath, f.hash_algorithm)
f2_hexdig, f2_dig = self._hash_file(filepath, f.hash_algorithm)
assert filepath != orig_filepath, 'Filepaths are matching!'
if f1_hexdig == f2_hexdig or f1_dig == f2_dig:
remove(filepath)
except HTTPError as e:
print(e, f.download_url)
print("HTTP Error", e.code, e.reason, f.download_url)
if exists(filepath):
remove(filepath)
except ConnectionResetError:
print("Remote host reset connection for", f.download_url, \
"Try again later.")
print("Connection reset for", f.download_url)
if exists(filepath):
remove(filepath)
except ConnectionRefusedError:
print("Connection refused for", f.download_url)
if exists(filepath):
remove(filepath)
except ConnectionAbortedError:
print("Connection aborted for", f.download_url)
if exists(filepath):
remove(filepath)