From 6022c9929adff8ba3937437891bfeb5e402e1690 Mon Sep 17 00:00:00 2001
From: "Alexander \"Arav\" Andreev" <me@arav.top>
Date: Wed, 28 Apr 2021 02:47:41 +0400
Subject: [PATCH] Added HTTP and URL exceptions handling.

---
 scrapthechan/scraper.py | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/scrapthechan/scraper.py b/scrapthechan/scraper.py
index 025f5c1..dfa1937 100644
--- a/scrapthechan/scraper.py
+++ b/scrapthechan/scraper.py
@@ -5,8 +5,9 @@ from os import remove, stat
 from os.path import exists, join, getsize
 import re
 from typing import List, Callable
-from urllib.request import urlretrieve, URLopener, HTTPError
+from urllib.request import urlretrieve, URLopener, HTTPError, URLError
 import hashlib
+from http.client import HTTPException
 
 from scrapthechan import USER_AGENT
 from scrapthechan.fileinfo import FileInfo
@@ -66,6 +67,8 @@ class Scraper:
 	def _hash_file(self, filepath: str, hash_algorithm: str = "md5",
 				   blocksize: int = 1048576) -> (str, str):
 		"""Compute hash of a file."""
+		if hash_algorithm is None:
+			return None
 		hash_func = hashlib.new(hash_algorithm)
 		with open(filepath, 'rb') as f:
 			buf = f.read(blocksize)
@@ -82,8 +85,9 @@ class Scraper:
 		if not (f.size == computed_size \
 				or f.size == round(computed_size / 1024)):
 			return False
-		hexdig, dig = self._hash_file(filepath, f.hash_algorithm)
-		return f.hash_value == hexdig or f.hash_value == dig
+		if not f.hash_algorithm is None:
+			hexdig, dig = self._hash_file(filepath, f.hash_algorithm)
+			return f.hash_value == hexdig or f.hash_value == dig
 
 	def _download_file(self, f: FileInfo):
 		"""Download a single file."""
@@ -101,7 +105,6 @@ class Scraper:
 			while retries > 0:
 				self._url_opener.retrieve(f.download_url, filepath)
 				if not self._check_file(f, filepath):
-					print(filepath, f.size, f.hash_value)
 					remove(filepath)
 					retries -= 1
 				else:
@@ -115,6 +118,14 @@ class Scraper:
 			print("HTTP Error", e.code, e.reason, f.download_url)
 			if exists(filepath):
 				remove(filepath)
+		except HTTPException:
+			print("HTTP Exception for", f.download_url)
+			if exists(filepath):
+				remove(filepath)
+		except URLError as e:
+			print("URL Error for", f.download_url)
+			if exists(filepath):
+				remove(filepath)
 		except ConnectionResetError:
 			print("Connection reset for", f.download_url)
 			if exists(filepath):