Compare commits
2 Commits
21837c5335
...
2e6352cb13
Author | SHA1 | Date | |
---|---|---|---|
2e6352cb13 | |||
7b2fcf0899 |
|
@ -4,8 +4,10 @@
|
||||||
### Added
|
### Added
|
||||||
- For 2ch.hk check for if a file is a sticker was added;
|
- For 2ch.hk check for if a file is a sticker was added;
|
||||||
- Encoding for `!op.txt` file was explicitly set to `utf-8`;
|
- Encoding for `!op.txt` file was explicitly set to `utf-8`;
|
||||||
- Handling of HTTP errors and reset connection error was added so now program
|
- Handling of connection errors was added so now program won't crash if file
|
||||||
won't crash if file doesn't exist or not accessible for any other reason;
|
doesn't exist or not accessible for any other reason and if any damaged files
|
||||||
|
was created then they will be removed;
|
||||||
|
- Added 3 retries if file was damaged during downloading;
|
||||||
- To a scraper was added matching of hashes of two files that happen to share
|
- To a scraper was added matching of hashes of two files that happen to share
|
||||||
same name and size, but hash reported by an imageboard is not the same as of
|
same name and size, but hash reported by an imageboard is not the same as of
|
||||||
a file. It results in excessive downloading and hash calculations. Hopefully,
|
a file. It results in excessive downloading and hash calculations. Hopefully,
|
||||||
|
|
|
@ -97,15 +97,33 @@ class Scraper:
|
||||||
filepath = join(self._save_directory, \
|
filepath = join(self._save_directory, \
|
||||||
self._same_filename(f.name, self._save_directory))
|
self._same_filename(f.name, self._save_directory))
|
||||||
try:
|
try:
|
||||||
|
retries = 3
|
||||||
|
while retries > 0:
|
||||||
self._url_opener.retrieve(f.download_url, filepath)
|
self._url_opener.retrieve(f.download_url, filepath)
|
||||||
|
if not self._check_file(f, filepath):
|
||||||
|
print(filepath, f.size, f.hash_value)
|
||||||
|
remove(filepath)
|
||||||
|
retries -= 1
|
||||||
|
else:
|
||||||
|
break
|
||||||
if is_same_filename:
|
if is_same_filename:
|
||||||
f1_hexdig, f1_dig = self._hash_file(orig_filepath, f.hash_algorithm)
|
f1_hexdig, f1_dig = self._hash_file(orig_filepath, f.hash_algorithm)
|
||||||
f2_hexdig, f2_dig = self._hash_file(filepath, f.hash_algorithm)
|
f2_hexdig, f2_dig = self._hash_file(filepath, f.hash_algorithm)
|
||||||
assert filepath != orig_filepath, 'Filepaths are matching!'
|
|
||||||
if f1_hexdig == f2_hexdig or f1_dig == f2_dig:
|
if f1_hexdig == f2_hexdig or f1_dig == f2_dig:
|
||||||
remove(filepath)
|
remove(filepath)
|
||||||
except HTTPError as e:
|
except HTTPError as e:
|
||||||
print(e, f.download_url)
|
print("HTTP Error", e.code, e.reason, f.download_url)
|
||||||
|
if exists(filepath):
|
||||||
|
remove(filepath)
|
||||||
except ConnectionResetError:
|
except ConnectionResetError:
|
||||||
print("Remote host reset connection for", f.download_url, \
|
print("Connection reset for", f.download_url)
|
||||||
"Try again later.")
|
if exists(filepath):
|
||||||
|
remove(filepath)
|
||||||
|
except ConnectionRefusedError:
|
||||||
|
print("Connection refused for", f.download_url)
|
||||||
|
if exists(filepath):
|
||||||
|
remove(filepath)
|
||||||
|
except ConnectionAbortedError:
|
||||||
|
print("Connection aborted for", f.download_url)
|
||||||
|
if exists(filepath):
|
||||||
|
remove(filepath)
|
||||||
|
|
Loading…
Reference in New Issue
Block a user