from re import match from typing import List, Optional from scrapthechan.parser import Parser from scrapthechan.fileinfo import FileInfo __all__ = ["LainchanParser"] class LainchanParser(Parser): """JSON parser for lainchan.org image board. JSON structure is identical to 4chan.org's, so this parser is just inherited from 4chan.org's parser and only needed things are redefined. """ __url_thread_json = "https://lainchan.org/{board}/res/{thread}.json" __url_file_link = "https://lainchan.org/{board}/src/{filename}" def __init__(self, board: str, thread: str, skip_posts: Optional[int] = None) -> None: posts = self._get_json(self.__url_thread_json.format(board=board, \ thread=thread))['posts'] super(LainchanParser, self).__init__(board, thread, posts, skip_posts) @property def imageboard(self) -> str: return "lainchan.org" @property def op(self) -> Optional[str]: op = "" if 'sub' in self._op_post: op = f"{self._op_post['sub']}\n" if 'com' in self._op_post: op += self._op_post['com'] return op if not op == "" else None def _parse_post(self, post) -> List[FileInfo]: if not 'tim' in post: return None dlfname = f"{post['tim']}{post['ext']}" if "filename" in post: if match(r"^image\.\w+$", post['filename']) is None: filename = dlfname else: filename = f"{post['filename']}{post['ext']}" files = [] files.append(FileInfo(filename, post['fsize'], self.__url_file_link.format(board=self.board, filename=dlfname), post['md5'], 'md5')) if "extra_files" in post: for f in post["extra_files"]: dlfname = f"{f['tim']}{f['ext']}" if "filename" in post: if match(r"^image\.\w+$", post['filename']) is None: filename = dlfname else: filename = f"{post['filename']}{post['ext']}" dlurl = self.__url_file_link.format(board=self.board, \ filename=dlfname) files.append(FileInfo(filename, f['fsize'], \ dlurl, f['md5'], 'md5')) return files