Added option --skip-posts and messages are now takes just one line.

2021-05-03 02:35:31 +04:00 · 2021-05-03 02:35:31 +04:00 · caf18a1bf0
commit caf18a1bf0
parent 751549f575
1 changed files with 35 additions and 29 deletions
--- a/scrapthechan/cli/scraper.py
+++ b/scrapthechan/cli/scraper.py
@ -3,7 +3,7 @@ from os import makedirs
 from os.path import join, exists
 from re import search
 from sys import argv
-from typing import List
+from typing import List, Optional

 from scrapthechan import VERSION
 from scrapthechan.parser import Parser, ThreadNotFoundError
@ -15,17 +15,18 @@ from scrapthechan.scrapers.threadedscraper import ThreadedScraper
 __all__ = ["main"]


-USAGE = \
+USAGE: str = \
 f"""Usage: scrapthechan [OPTIONS] (URL | IMAGEBOARD BOARD THREAD)

 Options:
-\t-h,--help       -- print this help and exit;
-\t-v,--version    -- print program's version and exit;
-\t-o,--output-dir -- directory where to place scraped files. By default
-\t                   following structure will be created in current directory:
-\t                   <imageboard>/<board>/<thread>;
-\t-N,--no-op      -- by default OP's post will be written in !op.txt file. This
-\t                   option disables this behaviour;
+\t-h,--help             -- print this help and exit;
+\t-v,--version          -- print program's version and exit;
+\t-o,--output-dir       -- directory where to place scraped files. By default
+\t                         following structure will be created in current directory:
+\t                         <imageboard>/<board>/<thread>;
+\t-N,--no-op            -- by default OP's post will be written in !op.txt file. This
+\t                         option disables this behaviour;
+\t-S,--skip-posts <num> -- skip given number of posts.

 Arguments:
 \tURL        -- URL of a thread;
@ -37,15 +38,15 @@ Supported imageboards: {', '.join(SUPPORTED_IMAGEBOARDS)}.
 """


-def parse_common_arguments(args: str) -> dict:
-    r = r"(?P<help>-h|--help)|(?P<version>-v|--version)"
-    args = search(r, args)
-    if not args is None:
-        args = args.groupdict()
-        return {
-            "help": not args["help"] is None,
-            "version": not args["version"] is None }
-    return None
+def parse_common_arguments(args: str) -> Optional[dict]:
+	r = r"(?P<help>-h|--help)|(?P<version>-v|--version)"
+	args = search(r, args)
+	if not args is None:
+		args = args.groupdict()
+		return {
+			"help": not args["help"] is None,
+			"version": not args["version"] is None }
+	return None

 def parse_arguments(args: str) -> dict:
 	rlink = r"^(https?:\/\/)?(?P<site>[\w.-]+)[ \/](?P<board>\w+)(\S+)?[ \/](?P<thread>\w+)"
@ -53,10 +54,12 @@ def parse_arguments(args: str) -> dict:
 	if not link is None:
 		link = link.groupdict()
 	out_dir = search(r"(?=(-o|--output-dir) (?P<outdir>\S+))", args)
+	skip_posts = search(r"(?=(-S|--skip-posts) (?P<skip>\d+))", args)
 	return {
 		"site": None if link is None else link["site"],
 		"board": None if link is None else link["board"],
 		"thread": None if link is None else link["thread"],
+		"skip-posts": None if skip_posts is None else int(skip_posts.group('skip')),
 		"no-op": not search(r"-N|--no-op", args) is None,
 		"output-dir": None if out_dir is None \
 					  else out_dir.groupdict()["outdir"] }
@ -82,17 +85,21 @@ def main() -> None:
 		exit()

 	try:
-		parser = get_parser_by_site(args["site"], args["board"], args["thread"])
+		if not args["skip-posts"] is None:
+			parser = get_parser_by_site(args["site"], args["board"],
+										args["thread"], args["skip-posts"])
+		else:
+			parser = get_parser_by_site(args["site"], args["board"],
+										args["thread"])
 	except NotImplementedError as ex:
 		print(f"{str(ex)}.")
 		print(f"Supported image boards are {', '.join(SUPPORTED_IMAGEBOARDS)}")
 		exit()
-	except ThreadNotFoundError:
+	except ThreadNotFoundError as e:
 		print(f"Thread {args['site']}/{args['board']}/{args['thread']} " \
-			   "is no longer exist.")
+			   "not found. Reason: {e.reason}")
 		exit()

-
 	files_count = len(parser.files)

 	if not args["output-dir"] is None:
@ -101,23 +108,22 @@ def main() -> None:
 		save_dir = join(parser.imageboard, parser.board,
 						parser.thread)

-	print(f"There are {files_count} files in " \
-		  f"{args['site']}/{args['board']}/{args['thread']}." \
-		  f"They will be saved in {save_dir}.")
+	print(f"{files_count} files in " \
+		  f"{args['site']}/{args['board']}/{args['thread']}. " \
+		  f"They're going to {save_dir}. ", end="")

 	makedirs(save_dir, exist_ok=True)


 	if not args["no-op"]:
-		print("Writing OP... ", end='')
 		if parser.op is None:
-			print("No text's there.")
+			print("OP's empty.")
 		elif not exists(join(save_dir, "!op.txt")):
 			with open(join(save_dir, "!op.txt"), 'w', encoding='utf-8') as opf:
 				opf.write(f"{parser.op}\n")
-			print("Done.")
+			print("OP's written.")
 		else:
-			print("Exists.")
+			print("OP exists.")


 	scraper = ThreadedScraper(save_dir, parser.files, \