1
0
ScrapTheChan/scrapthechan/parsers/tinyboardlike.py

52 lines
1.4 KiB
Python
Raw Normal View History

from re import match
from typing import List, Optional
from scrapthechan.parser import Parser
from scrapthechan.fileinfo import FileInfo
__all__ = ["TinyboardLikeParser"]
class TinyboardLikeParser(Parser):
"""Base parser for imageboards that are based on Tinyboard, or have similar
JSON API."""
def __init__(self, board: str, thread: str,
skip_posts: Optional[int] = None) -> None:
super().__init__(board, thread, skip_posts)
def _extract_posts_list(self, lst: List) -> List[dict]:
return lst['posts']
def _parse_post(self, post: dict) -> Optional[List[FileInfo]]:
if not 'tim' in post: return None
dlfname = f"{post['tim']}{post['ext']}"
if "filename" in post:
if match(r"^image\.\w+$", post['filename']) is None:
filename = dlfname
else:
filename = f"{post['filename']}{post['ext']}"
files = []
files.append(FileInfo(filename, post['fsize'],
self.file_base_url.format(board=self.board, filename=dlfname),
post['md5'], 'md5'))
if "extra_files" in post:
for f in post["extra_files"]:
dlfname = f"{f['tim']}{f['ext']}"
if "filename" in post:
if match(r"^image\.\w+$", post['filename']) is None:
filename = dlfname
else:
filename = f"{post['filename']}{post['ext']}"
dlurl = self.file_base_url.format(board=self.board, \
filename=dlfname)
files.append(FileInfo(filename, f['fsize'], \
dlurl, f['md5'], 'md5'))
return files