Improved error handling, retries for damaged files.
This commit is contained in:
parent
21837c5335
commit
7b2fcf0899
@ -97,15 +97,33 @@ class Scraper:
|
||||
filepath = join(self._save_directory, \
|
||||
self._same_filename(f.name, self._save_directory))
|
||||
try:
|
||||
retries = 3
|
||||
while retries > 0:
|
||||
self._url_opener.retrieve(f.download_url, filepath)
|
||||
if not self._check_file(f, filepath):
|
||||
print(filepath, f.size, f.hash_value)
|
||||
remove(filepath)
|
||||
retries -= 1
|
||||
else:
|
||||
break
|
||||
if is_same_filename:
|
||||
f1_hexdig, f1_dig = self._hash_file(orig_filepath, f.hash_algorithm)
|
||||
f2_hexdig, f2_dig = self._hash_file(filepath, f.hash_algorithm)
|
||||
assert filepath != orig_filepath, 'Filepaths are matching!'
|
||||
if f1_hexdig == f2_hexdig or f1_dig == f2_dig:
|
||||
remove(filepath)
|
||||
except HTTPError as e:
|
||||
print(e, f.download_url)
|
||||
print("HTTP Error", e.code, e.reason, f.download_url)
|
||||
if exists(filepath):
|
||||
remove(filepath)
|
||||
except ConnectionResetError:
|
||||
print("Remote host reset connection for", f.download_url, \
|
||||
"Try again later.")
|
||||
print("Connection reset for", f.download_url)
|
||||
if exists(filepath):
|
||||
remove(filepath)
|
||||
except ConnectionRefusedError:
|
||||
print("Connection refused for", f.download_url)
|
||||
if exists(filepath):
|
||||
remove(filepath)
|
||||
except ConnectionAbortedError:
|
||||
print("Connection aborted for", f.download_url)
|
||||
if exists(filepath):
|
||||
remove(filepath)
|
||||
|
Loading…
Reference in New Issue
Block a user