from re import match from typing import List, Optional from scrapthechan.parser import Parser from scrapthechan.fileinfo import FileInfo __all__ = ["TinyboardLikeParser"] class TinyboardLikeParser(Parser): """Base parser for imageboards that are based on Tinyboard, or have similar JSON API.""" def __init__(self, board: str, thread: str, skip_posts: Optional[int] = None) -> None: super().__init__(board, thread, skip_posts) def _extract_posts_list(self, lst: List) -> List[dict]: return lst['posts'] def _parse_post(self, post: dict) -> Optional[List[FileInfo]]: if not 'tim' in post: return None dlfname = f"{post['tim']}{post['ext']}" if "filename" in post: if match(r"^image\.\w+$", post['filename']) is None: filename = dlfname else: filename = f"{post['filename']}{post['ext']}" files = [] files.append(FileInfo(filename, post['fsize'], self.file_base_url.format(board=self.board, filename=dlfname), post['md5'], 'md5')) if "extra_files" in post: for f in post["extra_files"]: dlfname = f"{f['tim']}{f['ext']}" if "filename" in post: if match(r"^image\.\w+$", post['filename']) is None: filename = dlfname else: filename = f"{post['filename']}{post['ext']}" dlurl = self.file_base_url.format(board=self.board, \ filename=dlfname) files.append(FileInfo(filename, f['fsize'], \ dlurl, f['md5'], 'md5')) return files