1
0
Fork 0

Added HTTP and URL exceptions handling.

This commit is contained in:
Alexander Andreev 2021-04-28 02:47:41 +04:00
parent f79abcc310
commit 6022c9929a
Signed by: Arav
GPG Key ID: 610DF2574456329F
1 changed files with 15 additions and 4 deletions

View File

@ -5,8 +5,9 @@ from os import remove, stat
from os.path import exists, join, getsize
import re
from typing import List, Callable
from urllib.request import urlretrieve, URLopener, HTTPError
from urllib.request import urlretrieve, URLopener, HTTPError, URLError
import hashlib
from http.client import HTTPException
from scrapthechan import USER_AGENT
from scrapthechan.fileinfo import FileInfo
@ -66,6 +67,8 @@ class Scraper:
def _hash_file(self, filepath: str, hash_algorithm: str = "md5",
blocksize: int = 1048576) -> (str, str):
"""Compute hash of a file."""
if hash_algorithm is None:
return None
hash_func = hashlib.new(hash_algorithm)
with open(filepath, 'rb') as f:
buf = f.read(blocksize)
@ -82,8 +85,9 @@ class Scraper:
if not (f.size == computed_size \
or f.size == round(computed_size / 1024)):
return False
hexdig, dig = self._hash_file(filepath, f.hash_algorithm)
return f.hash_value == hexdig or f.hash_value == dig
if not f.hash_algorithm is None:
hexdig, dig = self._hash_file(filepath, f.hash_algorithm)
return f.hash_value == hexdig or f.hash_value == dig
def _download_file(self, f: FileInfo):
"""Download a single file."""
@ -101,7 +105,6 @@ class Scraper:
while retries > 0:
self._url_opener.retrieve(f.download_url, filepath)
if not self._check_file(f, filepath):
print(filepath, f.size, f.hash_value)
remove(filepath)
retries -= 1
else:
@ -115,6 +118,14 @@ class Scraper:
print("HTTP Error", e.code, e.reason, f.download_url)
if exists(filepath):
remove(filepath)
except HTTPException:
print("HTTP Exception for", f.download_url)
if exists(filepath):
remove(filepath)
except URLError as e:
print("URL Error for", f.download_url)
if exists(filepath):
remove(filepath)
except ConnectionResetError:
print("Connection reset for", f.download_url)
if exists(filepath):