1
0

Did a minor refactoring. Also combined two first lines that are printed for a thread into one.

This commit is contained in:
Alexander Andreev 2020-07-20 04:32:30 +04:00
parent b26152f3ca
commit 7825b53121
2 changed files with 15 additions and 17 deletions

View File

@ -6,10 +6,9 @@ from sys import argv
from typing import List from typing import List
from scrapthechan import VERSION from scrapthechan import VERSION
from scrapthechan.parser import Parser, ParserThreadNotFoundError from scrapthechan.parser import Parser, ThreadNotFoundError
from scrapthechan.parsers import get_parser_by_url, get_parser_by_site, \ from scrapthechan.parsers import get_parser_by_url, get_parser_by_site, \
SUPPORTED_IMAGEBOARDS SUPPORTED_IMAGEBOARDS
#from scrapthechan.scrapers.basicscraper import BasicScraper
from scrapthechan.scrapers.threadedscraper import ThreadedScraper from scrapthechan.scrapers.threadedscraper import ThreadedScraper
@ -40,12 +39,12 @@ Supported imageboards: {', '.join(SUPPORTED_IMAGEBOARDS)}.
def parse_common_arguments(args: str) -> dict: def parse_common_arguments(args: str) -> dict:
r = r"(?P<help>-h|--help)|(?P<version>-v|--version)" r = r"(?P<help>-h|--help)|(?P<version>-v|--version)"
argd = search(r, args) args = search(r, args)
if not argd is None: if not args is None:
argd = argd.groupdict() args = args.groupdict()
return { return {
"help": not argd["help"] is None, "help": not args["help"] is None,
"version": not argd["version"] is None } "version": not args["version"] is None }
return None return None
def parse_arguments(args: str) -> dict: def parse_arguments(args: str) -> dict:
@ -84,16 +83,13 @@ def main() -> None:
print(f"{str(ex)}.") print(f"{str(ex)}.")
print(f"Supported image boards are {', '.join(SUPPORTED_IMAGEBOARDS)}") print(f"Supported image boards are {', '.join(SUPPORTED_IMAGEBOARDS)}")
exit() exit()
except ParserThreadNotFoundError: except ThreadNotFoundError:
print(f"Thread {args['site']}/{args['board']}/{args['thread']} " \ print(f"Thread {args['site']}/{args['board']}/{args['thread']} " \
"is no longer exist.") "is no longer exist.")
exit() exit()
flen = len(parser.files)
files_count = len(parser.files)
print(f"There are {flen} files in " \
f"{args['site']}/{args['board']}/{args['thread']}.")
if not args["output-dir"] is None: if not args["output-dir"] is None:
save_dir = args["output-dir"] save_dir = args["output-dir"]
@ -101,7 +97,9 @@ def main() -> None:
save_dir = join(parser.imageboard, parser.board, save_dir = join(parser.imageboard, parser.board,
parser.thread) parser.thread)
print(f"They will be saved in {save_dir}.") print(f"There are {files_count} files in " \
f"{args['site']}/{args['board']}/{args['thread']}." \
f"They will be saved in {save_dir}.")
makedirs(save_dir, exist_ok=True) makedirs(save_dir, exist_ok=True)
@ -119,7 +117,7 @@ def main() -> None:
scraper = ThreadedScraper(save_dir, parser.files, \ scraper = ThreadedScraper(save_dir, parser.files, \
lambda i: print(f"{i}/{flen}", end="\r")) lambda i: print(f"{i}/{files_count}", end="\r"))
scraper.run() scraper.run()

View File

@ -9,10 +9,10 @@ from urllib.request import urlopen, urlretrieve
from scrapthechan.fileinfo import FileInfo from scrapthechan.fileinfo import FileInfo
__all__ = ["Parser", "ParserThreadNotFoundError"] __all__ = ["Parser", "ThreadNotFoundError"]
class ParserThreadNotFoundError(Exception): class ThreadNotFoundError(Exception):
pass pass
@ -74,7 +74,7 @@ class Parser:
with urlopen(thread_url) as url: with urlopen(thread_url) as url:
return loads(url.read().decode('utf-8')) return loads(url.read().decode('utf-8'))
except: except:
raise ParserThreadNotFoundError raise ThreadNotFoundError
def _parse_post(self, post: dict) -> List[FileInfo]: def _parse_post(self, post: dict) -> List[FileInfo]:
"""Parses a single post and extracts files into `FileInfo` object.""" """Parses a single post and extracts files into `FileInfo` object."""