Did a minor refactoring. Also combined two first lines that are printed for a thread into one.

Moved User-Agent off to __init__ in its own variable.
2020-07-20 04:32:30 +04:00 · 2020-07-20 04:31:27 +04:00
4 changed files with 22 additions and 21 deletions
--- a/scrapthechan/init.py
+++ b/scrapthechan/init.py
@ -8,6 +8,9 @@ __license__ = \
 For a copy see COPYING file in a directory of the program, or 
 see <https://opensource.org/licenses/MIT>"""

+
+USER_AGENT = f"ScrapTheChan/{__version__}"
+
 VERSION = \
    f"ScrapTheChan ver. {__version__} ({__date__})\n{__copyright__}\n"\
          f"\n{__license__}"
--- a/scrapthechan/cli/scraper.py
+++ b/scrapthechan/cli/scraper.py
@ -6,10 +6,9 @@ from sys import argv
 from typing import List

 from scrapthechan import VERSION
-from scrapthechan.parser import Parser, ParserThreadNotFoundError
+from scrapthechan.parser import Parser, ThreadNotFoundError
 from scrapthechan.parsers import get_parser_by_url, get_parser_by_site, \
 								 SUPPORTED_IMAGEBOARDS
-#from scrapthechan.scrapers.basicscraper import BasicScraper
 from scrapthechan.scrapers.threadedscraper import ThreadedScraper


@ -40,12 +39,12 @@ Supported imageboards: {', '.join(SUPPORTED_IMAGEBOARDS)}.

 def parse_common_arguments(args: str) -> dict:
    r = r"(?P<help>-h|--help)|(?P<version>-v|--version)"
-    argd = search(r, args)
-    if not argd is None:
-        argd = argd.groupdict()
+    args = search(r, args)
+    if not args is None:
+        args = args.groupdict()
        return {
-            "help": not argd["help"] is None,
-            "version": not argd["version"] is None }
+            "help": not args["help"] is None,
+            "version": not args["version"] is None }
    return None

 def parse_arguments(args: str) -> dict:
@ -84,16 +83,13 @@ def main() -> None:
 		print(f"{str(ex)}.")
 		print(f"Supported image boards are {', '.join(SUPPORTED_IMAGEBOARDS)}")
 		exit()
-	except ParserThreadNotFoundError:
+	except ThreadNotFoundError:
 		print(f"Thread {args['site']}/{args['board']}/{args['thread']} " \
 			   "is no longer exist.")
 		exit()

-	flen = len(parser.files)

-
-	print(f"There are {flen} files in " \
-		  f"{args['site']}/{args['board']}/{args['thread']}.")
+	files_count = len(parser.files)

 	if not args["output-dir"] is None:
 		save_dir = args["output-dir"]
@ -101,7 +97,9 @@ def main() -> None:
 		save_dir = join(parser.imageboard, parser.board,
 						parser.thread)

-	print(f"They will be saved in {save_dir}.")
+	print(f"There are {files_count} files in " \
+		  f"{args['site']}/{args['board']}/{args['thread']}." \
+		  f"They will be saved in {save_dir}.")

 	makedirs(save_dir, exist_ok=True)

@ -119,7 +117,7 @@ def main() -> None:


 	scraper = ThreadedScraper(save_dir, parser.files, \
-		lambda i: print(f"{i}/{flen}", end="\r"))
+		lambda i: print(f"{i}/{files_count}", end="\r"))
 	scraper.run()


--- a/scrapthechan/parser.py
+++ b/scrapthechan/parser.py
@ -9,10 +9,10 @@ from urllib.request import urlopen, urlretrieve
 from scrapthechan.fileinfo import FileInfo


-__all__ = ["Parser", "ParserThreadNotFoundError"]
+__all__ = ["Parser", "ThreadNotFoundError"]


-class ParserThreadNotFoundError(Exception):
+class ThreadNotFoundError(Exception):
 	pass


@ -74,7 +74,7 @@ class Parser:
 			with urlopen(thread_url) as url:
 				return loads(url.read().decode('utf-8'))
 		except:
-			raise ParserThreadNotFoundError
+			raise ThreadNotFoundError

 	def _parse_post(self, post: dict) -> List[FileInfo]:
 		"""Parses a single post and extracts files into `FileInfo` object."""
--- a/scrapthechan/scraper.py
+++ b/scrapthechan/scraper.py
@ -1,4 +1,4 @@
-"""Base Scraper implementation."""
+"""Base class for all scrapers that will actually do the job."""

 from base64 import b64encode
 from os import remove, stat
@ -8,14 +8,14 @@ from typing import List, Callable
 from urllib.request import urlretrieve, URLopener
 import hashlib

-from scrapthechan import __version__
+from scrapthechan import USER_AGENT
 from scrapthechan.fileinfo import FileInfo

 __all__ = ["Scraper"]


 class Scraper:
-    """Base scraper implementation.
+    """Base class for all scrapers that will actually do the job.
    
    Arguments:
        save_directory             -- a path to a directory where file will be
@ -29,7 +29,7 @@ class Scraper:
        self._save_directory = save_directory
        self._files = files
        self._url_opener = URLopener()
-        self._url_opener.version = f"ScrapTheChan/{__version__}"
+        self._url_opener.version = USER_AGENT
        self._progress_callback = download_progress_callback

    def run(self):
Author	SHA1	Message	Date
Alexander Andreev	7825b53121	Did a minor refactoring. Also combined two first lines that are printed for a thread into one.	2020-07-20 04:32:30 +04:00
Alexander Andreev	b26152f3ca	Moved User-Agent off to __init__ in its own variable.	2020-07-20 04:31:27 +04:00