diff --git a/scrapthechan/parsers/tinyboardlike.py b/scrapthechan/parsers/tinyboardlike.py new file mode 100644 index 0000000..d4be7f5 --- /dev/null +++ b/scrapthechan/parsers/tinyboardlike.py @@ -0,0 +1,51 @@ +from re import match +from typing import List, Optional + +from scrapthechan.parser import Parser +from scrapthechan.fileinfo import FileInfo + + +__all__ = ["TinyboardLikeParser"] + + +class TinyboardLikeParser(Parser): + """Base parser for imageboards that are based on Tinyboard, or have similar + JSON API.""" + def __init__(self, board: str, thread: str, + skip_posts: Optional[int] = None) -> None: + super().__init__(board, thread, skip_posts) + + def _extract_posts_list(self, lst: List) -> List[dict]: + return lst['posts'] + + def _parse_post(self, post: dict) -> Optional[List[FileInfo]]: + if not 'tim' in post: return None + + dlfname = f"{post['tim']}{post['ext']}" + + if "filename" in post: + if match(r"^image\.\w+$", post['filename']) is None: + filename = dlfname + else: + filename = f"{post['filename']}{post['ext']}" + + files = [] + + files.append(FileInfo(filename, post['fsize'], + self.file_base_url.format(board=self.board, filename=dlfname), + post['md5'], 'md5')) + + if "extra_files" in post: + for f in post["extra_files"]: + dlfname = f"{f['tim']}{f['ext']}" + if "filename" in post: + if match(r"^image\.\w+$", post['filename']) is None: + filename = dlfname + else: + filename = f"{post['filename']}{post['ext']}" + dlurl = self.file_base_url.format(board=self.board, \ + filename=dlfname) + files.append(FileInfo(filename, f['fsize'], \ + dlurl, f['md5'], 'md5')) + + return files