1
0

Compare commits

..

2 Commits

2 changed files with 111 additions and 91 deletions

View File

@ -4,8 +4,10 @@
### Added
- For 2ch.hk check for if a file is a sticker was added;
- Encoding for `!op.txt` file was explicitly set to `utf-8`;
- Handling of HTTP errors and reset connection error was added so now program
won't crash if file doesn't exist or not accessible for any other reason;
- Handling of connection errors was added so now program won't crash if file
doesn't exist or not accessible for any other reason and if any damaged files
was created then they will be removed;
- Added 3 retries if file was damaged during downloading;
- To a scraper was added matching of hashes of two files that happen to share
same name and size, but hash reported by an imageboard is not the same as of
a file. It results in excessive downloading and hash calculations. Hopefully,

View File

@ -97,15 +97,33 @@ class Scraper:
filepath = join(self._save_directory, \
self._same_filename(f.name, self._save_directory))
try:
retries = 3
while retries > 0:
self._url_opener.retrieve(f.download_url, filepath)
if not self._check_file(f, filepath):
print(filepath, f.size, f.hash_value)
remove(filepath)
retries -= 1
else:
break
if is_same_filename:
f1_hexdig, f1_dig = self._hash_file(orig_filepath, f.hash_algorithm)
f2_hexdig, f2_dig = self._hash_file(filepath, f.hash_algorithm)
assert filepath != orig_filepath, 'Filepaths are matching!'
if f1_hexdig == f2_hexdig or f1_dig == f2_dig:
remove(filepath)
except HTTPError as e:
print(e, f.download_url)
print("HTTP Error", e.code, e.reason, f.download_url)
if exists(filepath):
remove(filepath)
except ConnectionResetError:
print("Remote host reset connection for", f.download_url, \
"Try again later.")
print("Connection reset for", f.download_url)
if exists(filepath):
remove(filepath)
except ConnectionRefusedError:
print("Connection refused for", f.download_url)
if exists(filepath):
remove(filepath)
except ConnectionAbortedError:
print("Connection aborted for", f.download_url)
if exists(filepath):
remove(filepath)