1
0

Added HTTP and URL exceptions handling.

This commit is contained in:
Alexander Andreev 2021-04-28 02:47:41 +04:00
parent f79abcc310
commit 6022c9929a
Signed by: Arav
GPG Key ID: 610DF2574456329F

View File

@ -5,8 +5,9 @@ from os import remove, stat
from os.path import exists, join, getsize
import re
from typing import List, Callable
from urllib.request import urlretrieve, URLopener, HTTPError
from urllib.request import urlretrieve, URLopener, HTTPError, URLError
import hashlib
from http.client import HTTPException
from scrapthechan import USER_AGENT
from scrapthechan.fileinfo import FileInfo
@ -66,6 +67,8 @@ class Scraper:
def _hash_file(self, filepath: str, hash_algorithm: str = "md5",
blocksize: int = 1048576) -> (str, str):
"""Compute hash of a file."""
if hash_algorithm is None:
return None
hash_func = hashlib.new(hash_algorithm)
with open(filepath, 'rb') as f:
buf = f.read(blocksize)
@ -82,6 +85,7 @@ class Scraper:
if not (f.size == computed_size \
or f.size == round(computed_size / 1024)):
return False
if not f.hash_algorithm is None:
hexdig, dig = self._hash_file(filepath, f.hash_algorithm)
return f.hash_value == hexdig or f.hash_value == dig
@ -101,7 +105,6 @@ class Scraper:
while retries > 0:
self._url_opener.retrieve(f.download_url, filepath)
if not self._check_file(f, filepath):
print(filepath, f.size, f.hash_value)
remove(filepath)
retries -= 1
else:
@ -115,6 +118,14 @@ class Scraper:
print("HTTP Error", e.code, e.reason, f.download_url)
if exists(filepath):
remove(filepath)
except HTTPException:
print("HTTP Exception for", f.download_url)
if exists(filepath):
remove(filepath)
except URLError as e:
print("URL Error for", f.download_url)
if exists(filepath):
remove(filepath)
except ConnectionResetError:
print("Connection reset for", f.download_url)
if exists(filepath):