Added HTTP and URL exceptions handling.

2021-04-28 02:47:41 +04:00 · 2021-04-28 02:47:41 +04:00 · 6022c9929a
commit 6022c9929a
parent f79abcc310
1 changed files with 15 additions and 4 deletions
--- a/scrapthechan/scraper.py
+++ b/scrapthechan/scraper.py
@ -5,8 +5,9 @@ from os import remove, stat
 from os.path import exists, join, getsize
 import re
 from typing import List, Callable
-from urllib.request import urlretrieve, URLopener, HTTPError
+from urllib.request import urlretrieve, URLopener, HTTPError, URLError
 import hashlib
 from http.client import HTTPException
 from scrapthechan import USER_AGENT
 from scrapthechan.fileinfo import FileInfo
@ -66,6 +67,8 @@ class Scraper:
 	def _hash_file(self, filepath: str, hash_algorithm: str = "md5",
 				   blocksize: int = 1048576) -> (str, str):
 		"""Compute hash of a file."""
 		if hash_algorithm is None:
 			return None
 		hash_func = hashlib.new(hash_algorithm)
 		with open(filepath, 'rb') as f:
 			buf = f.read(blocksize)
@ -82,6 +85,7 @@ class Scraper:
 		if not (f.size == computed_size \
 				or f.size == round(computed_size / 1024)):
 			return False
 		if not f.hash_algorithm is None:
 			hexdig, dig = self._hash_file(filepath, f.hash_algorithm)
 			return f.hash_value == hexdig or f.hash_value == dig
@ -101,7 +105,6 @@ class Scraper:
 			while retries > 0:
 				self._url_opener.retrieve(f.download_url, filepath)
 				if not self._check_file(f, filepath):
 					print(filepath, f.size, f.hash_value)
 					remove(filepath)
 					retries -= 1
 				else:
@ -115,6 +118,14 @@ class Scraper:
 			print("HTTP Error", e.code, e.reason, f.download_url)
 			if exists(filepath):
 				remove(filepath)
 		except HTTPException:
 			print("HTTP Exception for", f.download_url)
 			if exists(filepath):
 				remove(filepath)
 		except URLError as e:
 			print("URL Error for", f.download_url)
 			if exists(filepath):
 				remove(filepath)
 		except ConnectionResetError:
 			print("Connection reset for", f.download_url)
 			if exists(filepath):