Added HTTP and URL exceptions handling.
This commit is contained in:
parent
f79abcc310
commit
6022c9929a
@ -5,8 +5,9 @@ from os import remove, stat
|
||||
from os.path import exists, join, getsize
|
||||
import re
|
||||
from typing import List, Callable
|
||||
from urllib.request import urlretrieve, URLopener, HTTPError
|
||||
from urllib.request import urlretrieve, URLopener, HTTPError, URLError
|
||||
import hashlib
|
||||
from http.client import HTTPException
|
||||
|
||||
from scrapthechan import USER_AGENT
|
||||
from scrapthechan.fileinfo import FileInfo
|
||||
@ -66,6 +67,8 @@ class Scraper:
|
||||
def _hash_file(self, filepath: str, hash_algorithm: str = "md5",
|
||||
blocksize: int = 1048576) -> (str, str):
|
||||
"""Compute hash of a file."""
|
||||
if hash_algorithm is None:
|
||||
return None
|
||||
hash_func = hashlib.new(hash_algorithm)
|
||||
with open(filepath, 'rb') as f:
|
||||
buf = f.read(blocksize)
|
||||
@ -82,8 +85,9 @@ class Scraper:
|
||||
if not (f.size == computed_size \
|
||||
or f.size == round(computed_size / 1024)):
|
||||
return False
|
||||
hexdig, dig = self._hash_file(filepath, f.hash_algorithm)
|
||||
return f.hash_value == hexdig or f.hash_value == dig
|
||||
if not f.hash_algorithm is None:
|
||||
hexdig, dig = self._hash_file(filepath, f.hash_algorithm)
|
||||
return f.hash_value == hexdig or f.hash_value == dig
|
||||
|
||||
def _download_file(self, f: FileInfo):
|
||||
"""Download a single file."""
|
||||
@ -101,7 +105,6 @@ class Scraper:
|
||||
while retries > 0:
|
||||
self._url_opener.retrieve(f.download_url, filepath)
|
||||
if not self._check_file(f, filepath):
|
||||
print(filepath, f.size, f.hash_value)
|
||||
remove(filepath)
|
||||
retries -= 1
|
||||
else:
|
||||
@ -115,6 +118,14 @@ class Scraper:
|
||||
print("HTTP Error", e.code, e.reason, f.download_url)
|
||||
if exists(filepath):
|
||||
remove(filepath)
|
||||
except HTTPException:
|
||||
print("HTTP Exception for", f.download_url)
|
||||
if exists(filepath):
|
||||
remove(filepath)
|
||||
except URLError as e:
|
||||
print("URL Error for", f.download_url)
|
||||
if exists(filepath):
|
||||
remove(filepath)
|
||||
except ConnectionResetError:
|
||||
print("Connection reset for", f.download_url)
|
||||
if exists(filepath):
|
||||
|
Loading…
x
Reference in New Issue
Block a user