From caf18a1bf0ea8bc3b5d4f8c1cfcfedc41596ccf2 Mon Sep 17 00:00:00 2001 From: "Alexander \"Arav\" Andreev" Date: Mon, 3 May 2021 02:35:31 +0400 Subject: [PATCH] Added option --skip-posts and messages are now takes just one line. --- scrapthechan/cli/scraper.py | 64 ++++++++++++++++++++----------------- 1 file changed, 35 insertions(+), 29 deletions(-) diff --git a/scrapthechan/cli/scraper.py b/scrapthechan/cli/scraper.py index 2f1d44a..97a2cc4 100644 --- a/scrapthechan/cli/scraper.py +++ b/scrapthechan/cli/scraper.py @@ -3,7 +3,7 @@ from os import makedirs from os.path import join, exists from re import search from sys import argv -from typing import List +from typing import List, Optional from scrapthechan import VERSION from scrapthechan.parser import Parser, ThreadNotFoundError @@ -15,17 +15,18 @@ from scrapthechan.scrapers.threadedscraper import ThreadedScraper __all__ = ["main"] -USAGE = \ +USAGE: str = \ f"""Usage: scrapthechan [OPTIONS] (URL | IMAGEBOARD BOARD THREAD) Options: -\t-h,--help -- print this help and exit; -\t-v,--version -- print program's version and exit; -\t-o,--output-dir -- directory where to place scraped files. By default -\t following structure will be created in current directory: -\t //; -\t-N,--no-op -- by default OP's post will be written in !op.txt file. This -\t option disables this behaviour; +\t-h,--help -- print this help and exit; +\t-v,--version -- print program's version and exit; +\t-o,--output-dir -- directory where to place scraped files. By default +\t following structure will be created in current directory: +\t //; +\t-N,--no-op -- by default OP's post will be written in !op.txt file. This +\t option disables this behaviour; +\t-S,--skip-posts -- skip given number of posts. Arguments: \tURL -- URL of a thread; @@ -37,15 +38,15 @@ Supported imageboards: {', '.join(SUPPORTED_IMAGEBOARDS)}. """ -def parse_common_arguments(args: str) -> dict: - r = r"(?P-h|--help)|(?P-v|--version)" - args = search(r, args) - if not args is None: - args = args.groupdict() - return { - "help": not args["help"] is None, - "version": not args["version"] is None } - return None +def parse_common_arguments(args: str) -> Optional[dict]: + r = r"(?P-h|--help)|(?P-v|--version)" + args = search(r, args) + if not args is None: + args = args.groupdict() + return { + "help": not args["help"] is None, + "version": not args["version"] is None } + return None def parse_arguments(args: str) -> dict: rlink = r"^(https?:\/\/)?(?P[\w.-]+)[ \/](?P\w+)(\S+)?[ \/](?P\w+)" @@ -53,10 +54,12 @@ def parse_arguments(args: str) -> dict: if not link is None: link = link.groupdict() out_dir = search(r"(?=(-o|--output-dir) (?P\S+))", args) + skip_posts = search(r"(?=(-S|--skip-posts) (?P\d+))", args) return { "site": None if link is None else link["site"], "board": None if link is None else link["board"], "thread": None if link is None else link["thread"], + "skip-posts": None if skip_posts is None else int(skip_posts.group('skip')), "no-op": not search(r"-N|--no-op", args) is None, "output-dir": None if out_dir is None \ else out_dir.groupdict()["outdir"] } @@ -82,17 +85,21 @@ def main() -> None: exit() try: - parser = get_parser_by_site(args["site"], args["board"], args["thread"]) + if not args["skip-posts"] is None: + parser = get_parser_by_site(args["site"], args["board"], + args["thread"], args["skip-posts"]) + else: + parser = get_parser_by_site(args["site"], args["board"], + args["thread"]) except NotImplementedError as ex: print(f"{str(ex)}.") print(f"Supported image boards are {', '.join(SUPPORTED_IMAGEBOARDS)}") exit() - except ThreadNotFoundError: + except ThreadNotFoundError as e: print(f"Thread {args['site']}/{args['board']}/{args['thread']} " \ - "is no longer exist.") + "not found. Reason: {e.reason}") exit() - files_count = len(parser.files) if not args["output-dir"] is None: @@ -101,23 +108,22 @@ def main() -> None: save_dir = join(parser.imageboard, parser.board, parser.thread) - print(f"There are {files_count} files in " \ - f"{args['site']}/{args['board']}/{args['thread']}." \ - f"They will be saved in {save_dir}.") + print(f"{files_count} files in " \ + f"{args['site']}/{args['board']}/{args['thread']}. " \ + f"They're going to {save_dir}. ", end="") makedirs(save_dir, exist_ok=True) if not args["no-op"]: - print("Writing OP... ", end='') if parser.op is None: - print("No text's there.") + print("OP's empty.") elif not exists(join(save_dir, "!op.txt")): with open(join(save_dir, "!op.txt"), 'w', encoding='utf-8') as opf: opf.write(f"{parser.op}\n") - print("Done.") + print("OP's written.") else: - print("Exists.") + print("OP exists.") scraper = ThreadedScraper(save_dir, parser.files, \