Compare commits
2 Commits
21837c5335
...
2e6352cb13
Author | SHA1 | Date | |
---|---|---|---|
2e6352cb13 | |||
7b2fcf0899 |
|
@ -4,8 +4,10 @@
|
|||
### Added
|
||||
- For 2ch.hk check for if a file is a sticker was added;
|
||||
- Encoding for `!op.txt` file was explicitly set to `utf-8`;
|
||||
- Handling of HTTP errors and reset connection error was added so now program
|
||||
won't crash if file doesn't exist or not accessible for any other reason;
|
||||
- Handling of connection errors was added so now program won't crash if file
|
||||
doesn't exist or not accessible for any other reason and if any damaged files
|
||||
was created then they will be removed;
|
||||
- Added 3 retries if file was damaged during downloading;
|
||||
- To a scraper was added matching of hashes of two files that happen to share
|
||||
same name and size, but hash reported by an imageboard is not the same as of
|
||||
a file. It results in excessive downloading and hash calculations. Hopefully,
|
||||
|
|
|
@ -97,15 +97,33 @@ class Scraper:
|
|||
filepath = join(self._save_directory, \
|
||||
self._same_filename(f.name, self._save_directory))
|
||||
try:
|
||||
retries = 3
|
||||
while retries > 0:
|
||||
self._url_opener.retrieve(f.download_url, filepath)
|
||||
if not self._check_file(f, filepath):
|
||||
print(filepath, f.size, f.hash_value)
|
||||
remove(filepath)
|
||||
retries -= 1
|
||||
else:
|
||||
break
|
||||
if is_same_filename:
|
||||
f1_hexdig, f1_dig = self._hash_file(orig_filepath, f.hash_algorithm)
|
||||
f2_hexdig, f2_dig = self._hash_file(filepath, f.hash_algorithm)
|
||||
assert filepath != orig_filepath, 'Filepaths are matching!'
|
||||
if f1_hexdig == f2_hexdig or f1_dig == f2_dig:
|
||||
remove(filepath)
|
||||
except HTTPError as e:
|
||||
print(e, f.download_url)
|
||||
print("HTTP Error", e.code, e.reason, f.download_url)
|
||||
if exists(filepath):
|
||||
remove(filepath)
|
||||
except ConnectionResetError:
|
||||
print("Remote host reset connection for", f.download_url, \
|
||||
"Try again later.")
|
||||
print("Connection reset for", f.download_url)
|
||||
if exists(filepath):
|
||||
remove(filepath)
|
||||
except ConnectionRefusedError:
|
||||
print("Connection refused for", f.download_url)
|
||||
if exists(filepath):
|
||||
remove(filepath)
|
||||
except ConnectionAbortedError:
|
||||
print("Connection aborted for", f.download_url)
|
||||
if exists(filepath):
|
||||
remove(filepath)
|
||||
|
|
Loading…
Reference in New Issue
Block a user