Added HTTP and URL exceptions handling.
This commit is contained in:
parent
f79abcc310
commit
6022c9929a
@ -5,8 +5,9 @@ from os import remove, stat
|
|||||||
from os.path import exists, join, getsize
|
from os.path import exists, join, getsize
|
||||||
import re
|
import re
|
||||||
from typing import List, Callable
|
from typing import List, Callable
|
||||||
from urllib.request import urlretrieve, URLopener, HTTPError
|
from urllib.request import urlretrieve, URLopener, HTTPError, URLError
|
||||||
import hashlib
|
import hashlib
|
||||||
|
from http.client import HTTPException
|
||||||
|
|
||||||
from scrapthechan import USER_AGENT
|
from scrapthechan import USER_AGENT
|
||||||
from scrapthechan.fileinfo import FileInfo
|
from scrapthechan.fileinfo import FileInfo
|
||||||
@ -66,6 +67,8 @@ class Scraper:
|
|||||||
def _hash_file(self, filepath: str, hash_algorithm: str = "md5",
|
def _hash_file(self, filepath: str, hash_algorithm: str = "md5",
|
||||||
blocksize: int = 1048576) -> (str, str):
|
blocksize: int = 1048576) -> (str, str):
|
||||||
"""Compute hash of a file."""
|
"""Compute hash of a file."""
|
||||||
|
if hash_algorithm is None:
|
||||||
|
return None
|
||||||
hash_func = hashlib.new(hash_algorithm)
|
hash_func = hashlib.new(hash_algorithm)
|
||||||
with open(filepath, 'rb') as f:
|
with open(filepath, 'rb') as f:
|
||||||
buf = f.read(blocksize)
|
buf = f.read(blocksize)
|
||||||
@ -82,6 +85,7 @@ class Scraper:
|
|||||||
if not (f.size == computed_size \
|
if not (f.size == computed_size \
|
||||||
or f.size == round(computed_size / 1024)):
|
or f.size == round(computed_size / 1024)):
|
||||||
return False
|
return False
|
||||||
|
if not f.hash_algorithm is None:
|
||||||
hexdig, dig = self._hash_file(filepath, f.hash_algorithm)
|
hexdig, dig = self._hash_file(filepath, f.hash_algorithm)
|
||||||
return f.hash_value == hexdig or f.hash_value == dig
|
return f.hash_value == hexdig or f.hash_value == dig
|
||||||
|
|
||||||
@ -101,7 +105,6 @@ class Scraper:
|
|||||||
while retries > 0:
|
while retries > 0:
|
||||||
self._url_opener.retrieve(f.download_url, filepath)
|
self._url_opener.retrieve(f.download_url, filepath)
|
||||||
if not self._check_file(f, filepath):
|
if not self._check_file(f, filepath):
|
||||||
print(filepath, f.size, f.hash_value)
|
|
||||||
remove(filepath)
|
remove(filepath)
|
||||||
retries -= 1
|
retries -= 1
|
||||||
else:
|
else:
|
||||||
@ -115,6 +118,14 @@ class Scraper:
|
|||||||
print("HTTP Error", e.code, e.reason, f.download_url)
|
print("HTTP Error", e.code, e.reason, f.download_url)
|
||||||
if exists(filepath):
|
if exists(filepath):
|
||||||
remove(filepath)
|
remove(filepath)
|
||||||
|
except HTTPException:
|
||||||
|
print("HTTP Exception for", f.download_url)
|
||||||
|
if exists(filepath):
|
||||||
|
remove(filepath)
|
||||||
|
except URLError as e:
|
||||||
|
print("URL Error for", f.download_url)
|
||||||
|
if exists(filepath):
|
||||||
|
remove(filepath)
|
||||||
except ConnectionResetError:
|
except ConnectionResetError:
|
||||||
print("Connection reset for", f.download_url)
|
print("Connection reset for", f.download_url)
|
||||||
if exists(filepath):
|
if exists(filepath):
|
||||||
|
Loading…
Reference in New Issue
Block a user