Compare commits
No commits in common. "2e6352cb13bad749651ce5b41c34820621395d71" and "21837c5335cc57deb890e2aece23fb01fbbb3cd9" have entirely different histories.
2e6352cb13
...
21837c5335
@ -4,10 +4,8 @@
|
|||||||
### Added
|
### Added
|
||||||
- For 2ch.hk check for if a file is a sticker was added;
|
- For 2ch.hk check for if a file is a sticker was added;
|
||||||
- Encoding for `!op.txt` file was explicitly set to `utf-8`;
|
- Encoding for `!op.txt` file was explicitly set to `utf-8`;
|
||||||
- Handling of connection errors was added so now program won't crash if file
|
- Handling of HTTP errors and reset connection error was added so now program
|
||||||
doesn't exist or not accessible for any other reason and if any damaged files
|
won't crash if file doesn't exist or not accessible for any other reason;
|
||||||
was created then they will be removed;
|
|
||||||
- Added 3 retries if file was damaged during downloading;
|
|
||||||
- To a scraper was added matching of hashes of two files that happen to share
|
- To a scraper was added matching of hashes of two files that happen to share
|
||||||
same name and size, but hash reported by an imageboard is not the same as of
|
same name and size, but hash reported by an imageboard is not the same as of
|
||||||
a file. It results in excessive downloading and hash calculations. Hopefully,
|
a file. It results in excessive downloading and hash calculations. Hopefully,
|
||||||
|
@ -97,33 +97,15 @@ class Scraper:
|
|||||||
filepath = join(self._save_directory, \
|
filepath = join(self._save_directory, \
|
||||||
self._same_filename(f.name, self._save_directory))
|
self._same_filename(f.name, self._save_directory))
|
||||||
try:
|
try:
|
||||||
retries = 3
|
|
||||||
while retries > 0:
|
|
||||||
self._url_opener.retrieve(f.download_url, filepath)
|
self._url_opener.retrieve(f.download_url, filepath)
|
||||||
if not self._check_file(f, filepath):
|
|
||||||
print(filepath, f.size, f.hash_value)
|
|
||||||
remove(filepath)
|
|
||||||
retries -= 1
|
|
||||||
else:
|
|
||||||
break
|
|
||||||
if is_same_filename:
|
if is_same_filename:
|
||||||
f1_hexdig, f1_dig = self._hash_file(orig_filepath, f.hash_algorithm)
|
f1_hexdig, f1_dig = self._hash_file(orig_filepath, f.hash_algorithm)
|
||||||
f2_hexdig, f2_dig = self._hash_file(filepath, f.hash_algorithm)
|
f2_hexdig, f2_dig = self._hash_file(filepath, f.hash_algorithm)
|
||||||
|
assert filepath != orig_filepath, 'Filepaths are matching!'
|
||||||
if f1_hexdig == f2_hexdig or f1_dig == f2_dig:
|
if f1_hexdig == f2_hexdig or f1_dig == f2_dig:
|
||||||
remove(filepath)
|
remove(filepath)
|
||||||
except HTTPError as e:
|
except HTTPError as e:
|
||||||
print("HTTP Error", e.code, e.reason, f.download_url)
|
print(e, f.download_url)
|
||||||
if exists(filepath):
|
|
||||||
remove(filepath)
|
|
||||||
except ConnectionResetError:
|
except ConnectionResetError:
|
||||||
print("Connection reset for", f.download_url)
|
print("Remote host reset connection for", f.download_url, \
|
||||||
if exists(filepath):
|
"Try again later.")
|
||||||
remove(filepath)
|
|
||||||
except ConnectionRefusedError:
|
|
||||||
print("Connection refused for", f.download_url)
|
|
||||||
if exists(filepath):
|
|
||||||
remove(filepath)
|
|
||||||
except ConnectionAbortedError:
|
|
||||||
print("Connection aborted for", f.download_url)
|
|
||||||
if exists(filepath):
|
|
||||||
remove(filepath)
|
|
||||||
|
Loading…
Reference in New Issue
Block a user