1
0
Fork 0

Added option --skip-posts and messages are now takes just one line.

This commit is contained in:
Alexander Andreev 2021-05-03 02:35:31 +04:00
parent 751549f575
commit caf18a1bf0
Signed by: Arav
GPG Key ID: 610DF2574456329F
1 changed files with 35 additions and 29 deletions

View File

@ -3,7 +3,7 @@ from os import makedirs
from os.path import join, exists from os.path import join, exists
from re import search from re import search
from sys import argv from sys import argv
from typing import List from typing import List, Optional
from scrapthechan import VERSION from scrapthechan import VERSION
from scrapthechan.parser import Parser, ThreadNotFoundError from scrapthechan.parser import Parser, ThreadNotFoundError
@ -15,17 +15,18 @@ from scrapthechan.scrapers.threadedscraper import ThreadedScraper
__all__ = ["main"] __all__ = ["main"]
USAGE = \ USAGE: str = \
f"""Usage: scrapthechan [OPTIONS] (URL | IMAGEBOARD BOARD THREAD) f"""Usage: scrapthechan [OPTIONS] (URL | IMAGEBOARD BOARD THREAD)
Options: Options:
\t-h,--help -- print this help and exit; \t-h,--help -- print this help and exit;
\t-v,--version -- print program's version and exit; \t-v,--version -- print program's version and exit;
\t-o,--output-dir -- directory where to place scraped files. By default \t-o,--output-dir -- directory where to place scraped files. By default
\t following structure will be created in current directory: \t following structure will be created in current directory:
\t <imageboard>/<board>/<thread>; \t <imageboard>/<board>/<thread>;
\t-N,--no-op -- by default OP's post will be written in !op.txt file. This \t-N,--no-op -- by default OP's post will be written in !op.txt file. This
\t option disables this behaviour; \t option disables this behaviour;
\t-S,--skip-posts <num> -- skip given number of posts.
Arguments: Arguments:
\tURL -- URL of a thread; \tURL -- URL of a thread;
@ -37,15 +38,15 @@ Supported imageboards: {', '.join(SUPPORTED_IMAGEBOARDS)}.
""" """
def parse_common_arguments(args: str) -> dict: def parse_common_arguments(args: str) -> Optional[dict]:
r = r"(?P<help>-h|--help)|(?P<version>-v|--version)" r = r"(?P<help>-h|--help)|(?P<version>-v|--version)"
args = search(r, args) args = search(r, args)
if not args is None: if not args is None:
args = args.groupdict() args = args.groupdict()
return { return {
"help": not args["help"] is None, "help": not args["help"] is None,
"version": not args["version"] is None } "version": not args["version"] is None }
return None return None
def parse_arguments(args: str) -> dict: def parse_arguments(args: str) -> dict:
rlink = r"^(https?:\/\/)?(?P<site>[\w.-]+)[ \/](?P<board>\w+)(\S+)?[ \/](?P<thread>\w+)" rlink = r"^(https?:\/\/)?(?P<site>[\w.-]+)[ \/](?P<board>\w+)(\S+)?[ \/](?P<thread>\w+)"
@ -53,10 +54,12 @@ def parse_arguments(args: str) -> dict:
if not link is None: if not link is None:
link = link.groupdict() link = link.groupdict()
out_dir = search(r"(?=(-o|--output-dir) (?P<outdir>\S+))", args) out_dir = search(r"(?=(-o|--output-dir) (?P<outdir>\S+))", args)
skip_posts = search(r"(?=(-S|--skip-posts) (?P<skip>\d+))", args)
return { return {
"site": None if link is None else link["site"], "site": None if link is None else link["site"],
"board": None if link is None else link["board"], "board": None if link is None else link["board"],
"thread": None if link is None else link["thread"], "thread": None if link is None else link["thread"],
"skip-posts": None if skip_posts is None else int(skip_posts.group('skip')),
"no-op": not search(r"-N|--no-op", args) is None, "no-op": not search(r"-N|--no-op", args) is None,
"output-dir": None if out_dir is None \ "output-dir": None if out_dir is None \
else out_dir.groupdict()["outdir"] } else out_dir.groupdict()["outdir"] }
@ -82,17 +85,21 @@ def main() -> None:
exit() exit()
try: try:
parser = get_parser_by_site(args["site"], args["board"], args["thread"]) if not args["skip-posts"] is None:
parser = get_parser_by_site(args["site"], args["board"],
args["thread"], args["skip-posts"])
else:
parser = get_parser_by_site(args["site"], args["board"],
args["thread"])
except NotImplementedError as ex: except NotImplementedError as ex:
print(f"{str(ex)}.") print(f"{str(ex)}.")
print(f"Supported image boards are {', '.join(SUPPORTED_IMAGEBOARDS)}") print(f"Supported image boards are {', '.join(SUPPORTED_IMAGEBOARDS)}")
exit() exit()
except ThreadNotFoundError: except ThreadNotFoundError as e:
print(f"Thread {args['site']}/{args['board']}/{args['thread']} " \ print(f"Thread {args['site']}/{args['board']}/{args['thread']} " \
"is no longer exist.") "not found. Reason: {e.reason}")
exit() exit()
files_count = len(parser.files) files_count = len(parser.files)
if not args["output-dir"] is None: if not args["output-dir"] is None:
@ -101,23 +108,22 @@ def main() -> None:
save_dir = join(parser.imageboard, parser.board, save_dir = join(parser.imageboard, parser.board,
parser.thread) parser.thread)
print(f"There are {files_count} files in " \ print(f"{files_count} files in " \
f"{args['site']}/{args['board']}/{args['thread']}." \ f"{args['site']}/{args['board']}/{args['thread']}. " \
f"They will be saved in {save_dir}.") f"They're going to {save_dir}. ", end="")
makedirs(save_dir, exist_ok=True) makedirs(save_dir, exist_ok=True)
if not args["no-op"]: if not args["no-op"]:
print("Writing OP... ", end='')
if parser.op is None: if parser.op is None:
print("No text's there.") print("OP's empty.")
elif not exists(join(save_dir, "!op.txt")): elif not exists(join(save_dir, "!op.txt")):
with open(join(save_dir, "!op.txt"), 'w', encoding='utf-8') as opf: with open(join(save_dir, "!op.txt"), 'w', encoding='utf-8') as opf:
opf.write(f"{parser.op}\n") opf.write(f"{parser.op}\n")
print("Done.") print("OP's written.")
else: else:
print("Exists.") print("OP exists.")
scraper = ThreadedScraper(save_dir, parser.files, \ scraper = ThreadedScraper(save_dir, parser.files, \