1
0
Fork 0

Did a minor refactoring. Also combined two first lines that are printed for a thread into one.

This commit is contained in:
Alexander Andreev 2020-07-20 04:32:30 +04:00
parent b26152f3ca
commit 7825b53121
2 changed files with 15 additions and 17 deletions

View File

@ -6,10 +6,9 @@ from sys import argv
from typing import List
from scrapthechan import VERSION
from scrapthechan.parser import Parser, ParserThreadNotFoundError
from scrapthechan.parser import Parser, ThreadNotFoundError
from scrapthechan.parsers import get_parser_by_url, get_parser_by_site, \
SUPPORTED_IMAGEBOARDS
#from scrapthechan.scrapers.basicscraper import BasicScraper
from scrapthechan.scrapers.threadedscraper import ThreadedScraper
@ -40,12 +39,12 @@ Supported imageboards: {', '.join(SUPPORTED_IMAGEBOARDS)}.
def parse_common_arguments(args: str) -> dict:
r = r"(?P<help>-h|--help)|(?P<version>-v|--version)"
argd = search(r, args)
if not argd is None:
argd = argd.groupdict()
args = search(r, args)
if not args is None:
args = args.groupdict()
return {
"help": not argd["help"] is None,
"version": not argd["version"] is None }
"help": not args["help"] is None,
"version": not args["version"] is None }
return None
def parse_arguments(args: str) -> dict:
@ -84,16 +83,13 @@ def main() -> None:
print(f"{str(ex)}.")
print(f"Supported image boards are {', '.join(SUPPORTED_IMAGEBOARDS)}")
exit()
except ParserThreadNotFoundError:
except ThreadNotFoundError:
print(f"Thread {args['site']}/{args['board']}/{args['thread']} " \
"is no longer exist.")
exit()
flen = len(parser.files)
print(f"There are {flen} files in " \
f"{args['site']}/{args['board']}/{args['thread']}.")
files_count = len(parser.files)
if not args["output-dir"] is None:
save_dir = args["output-dir"]
@ -101,7 +97,9 @@ def main() -> None:
save_dir = join(parser.imageboard, parser.board,
parser.thread)
print(f"They will be saved in {save_dir}.")
print(f"There are {files_count} files in " \
f"{args['site']}/{args['board']}/{args['thread']}." \
f"They will be saved in {save_dir}.")
makedirs(save_dir, exist_ok=True)
@ -119,7 +117,7 @@ def main() -> None:
scraper = ThreadedScraper(save_dir, parser.files, \
lambda i: print(f"{i}/{flen}", end="\r"))
lambda i: print(f"{i}/{files_count}", end="\r"))
scraper.run()

View File

@ -9,10 +9,10 @@ from urllib.request import urlopen, urlretrieve
from scrapthechan.fileinfo import FileInfo
__all__ = ["Parser", "ParserThreadNotFoundError"]
__all__ = ["Parser", "ThreadNotFoundError"]
class ParserThreadNotFoundError(Exception):
class ThreadNotFoundError(Exception):
pass
@ -74,7 +74,7 @@ class Parser:
with urlopen(thread_url) as url:
return loads(url.read().decode('utf-8'))
except:
raise ParserThreadNotFoundError
raise ThreadNotFoundError
def _parse_post(self, post: dict) -> List[FileInfo]:
"""Parses a single post and extracts files into `FileInfo` object."""