Added option --skip-posts and messages are now takes just one line.
This commit is contained in:
parent
751549f575
commit
caf18a1bf0
@ -3,7 +3,7 @@ from os import makedirs
|
||||
from os.path import join, exists
|
||||
from re import search
|
||||
from sys import argv
|
||||
from typing import List
|
||||
from typing import List, Optional
|
||||
|
||||
from scrapthechan import VERSION
|
||||
from scrapthechan.parser import Parser, ThreadNotFoundError
|
||||
@ -15,17 +15,18 @@ from scrapthechan.scrapers.threadedscraper import ThreadedScraper
|
||||
__all__ = ["main"]
|
||||
|
||||
|
||||
USAGE = \
|
||||
USAGE: str = \
|
||||
f"""Usage: scrapthechan [OPTIONS] (URL | IMAGEBOARD BOARD THREAD)
|
||||
|
||||
Options:
|
||||
\t-h,--help -- print this help and exit;
|
||||
\t-v,--version -- print program's version and exit;
|
||||
\t-o,--output-dir -- directory where to place scraped files. By default
|
||||
\t following structure will be created in current directory:
|
||||
\t <imageboard>/<board>/<thread>;
|
||||
\t-N,--no-op -- by default OP's post will be written in !op.txt file. This
|
||||
\t option disables this behaviour;
|
||||
\t-h,--help -- print this help and exit;
|
||||
\t-v,--version -- print program's version and exit;
|
||||
\t-o,--output-dir -- directory where to place scraped files. By default
|
||||
\t following structure will be created in current directory:
|
||||
\t <imageboard>/<board>/<thread>;
|
||||
\t-N,--no-op -- by default OP's post will be written in !op.txt file. This
|
||||
\t option disables this behaviour;
|
||||
\t-S,--skip-posts <num> -- skip given number of posts.
|
||||
|
||||
Arguments:
|
||||
\tURL -- URL of a thread;
|
||||
@ -37,15 +38,15 @@ Supported imageboards: {', '.join(SUPPORTED_IMAGEBOARDS)}.
|
||||
"""
|
||||
|
||||
|
||||
def parse_common_arguments(args: str) -> dict:
|
||||
r = r"(?P<help>-h|--help)|(?P<version>-v|--version)"
|
||||
args = search(r, args)
|
||||
if not args is None:
|
||||
args = args.groupdict()
|
||||
return {
|
||||
"help": not args["help"] is None,
|
||||
"version": not args["version"] is None }
|
||||
return None
|
||||
def parse_common_arguments(args: str) -> Optional[dict]:
|
||||
r = r"(?P<help>-h|--help)|(?P<version>-v|--version)"
|
||||
args = search(r, args)
|
||||
if not args is None:
|
||||
args = args.groupdict()
|
||||
return {
|
||||
"help": not args["help"] is None,
|
||||
"version": not args["version"] is None }
|
||||
return None
|
||||
|
||||
def parse_arguments(args: str) -> dict:
|
||||
rlink = r"^(https?:\/\/)?(?P<site>[\w.-]+)[ \/](?P<board>\w+)(\S+)?[ \/](?P<thread>\w+)"
|
||||
@ -53,10 +54,12 @@ def parse_arguments(args: str) -> dict:
|
||||
if not link is None:
|
||||
link = link.groupdict()
|
||||
out_dir = search(r"(?=(-o|--output-dir) (?P<outdir>\S+))", args)
|
||||
skip_posts = search(r"(?=(-S|--skip-posts) (?P<skip>\d+))", args)
|
||||
return {
|
||||
"site": None if link is None else link["site"],
|
||||
"board": None if link is None else link["board"],
|
||||
"thread": None if link is None else link["thread"],
|
||||
"skip-posts": None if skip_posts is None else int(skip_posts.group('skip')),
|
||||
"no-op": not search(r"-N|--no-op", args) is None,
|
||||
"output-dir": None if out_dir is None \
|
||||
else out_dir.groupdict()["outdir"] }
|
||||
@ -82,17 +85,21 @@ def main() -> None:
|
||||
exit()
|
||||
|
||||
try:
|
||||
parser = get_parser_by_site(args["site"], args["board"], args["thread"])
|
||||
if not args["skip-posts"] is None:
|
||||
parser = get_parser_by_site(args["site"], args["board"],
|
||||
args["thread"], args["skip-posts"])
|
||||
else:
|
||||
parser = get_parser_by_site(args["site"], args["board"],
|
||||
args["thread"])
|
||||
except NotImplementedError as ex:
|
||||
print(f"{str(ex)}.")
|
||||
print(f"Supported image boards are {', '.join(SUPPORTED_IMAGEBOARDS)}")
|
||||
exit()
|
||||
except ThreadNotFoundError:
|
||||
except ThreadNotFoundError as e:
|
||||
print(f"Thread {args['site']}/{args['board']}/{args['thread']} " \
|
||||
"is no longer exist.")
|
||||
"not found. Reason: {e.reason}")
|
||||
exit()
|
||||
|
||||
|
||||
files_count = len(parser.files)
|
||||
|
||||
if not args["output-dir"] is None:
|
||||
@ -101,23 +108,22 @@ def main() -> None:
|
||||
save_dir = join(parser.imageboard, parser.board,
|
||||
parser.thread)
|
||||
|
||||
print(f"There are {files_count} files in " \
|
||||
f"{args['site']}/{args['board']}/{args['thread']}." \
|
||||
f"They will be saved in {save_dir}.")
|
||||
print(f"{files_count} files in " \
|
||||
f"{args['site']}/{args['board']}/{args['thread']}. " \
|
||||
f"They're going to {save_dir}. ", end="")
|
||||
|
||||
makedirs(save_dir, exist_ok=True)
|
||||
|
||||
|
||||
if not args["no-op"]:
|
||||
print("Writing OP... ", end='')
|
||||
if parser.op is None:
|
||||
print("No text's there.")
|
||||
print("OP's empty.")
|
||||
elif not exists(join(save_dir, "!op.txt")):
|
||||
with open(join(save_dir, "!op.txt"), 'w', encoding='utf-8') as opf:
|
||||
opf.write(f"{parser.op}\n")
|
||||
print("Done.")
|
||||
print("OP's written.")
|
||||
else:
|
||||
print("Exists.")
|
||||
print("OP exists.")
|
||||
|
||||
|
||||
scraper = ThreadedScraper(save_dir, parser.files, \
|
||||
|
Loading…
Reference in New Issue
Block a user