1
0
ScrapTheChan/scrapthechan/parsers/lolifox.py

66 lines
1.8 KiB
Python

from re import match
from typing import List, Optional
from scrapthechan.parser import Parser
from scrapthechan.fileinfo import FileInfo
__all__ = ["LolifoxParser"]
class LolifoxParser(Parser):
"""JSON parser for lolifox.cc image board.
JSON structure is identical to lainchan.org.
"""
__url_thread_json = "https://lolifox.cc/{board}/res/{thread}.json"
__url_file_link = "https://lolifox.cc/{board}/src/{filename}"
def __init__(self, board: str, thread: str,
skip_posts: Optional[int] = None) -> None:
posts = self._get_json(self.__url_thread_json.format(board=board, \
thread=thread))['posts']
super(LolifoxParser, self).__init__(board, thread, posts, skip_posts)
@property
def imageboard(self) -> str:
return "lolifox.cc"
@property
def op(self) -> Optional[str]:
op = ""
if 'sub' in self._op_post:
op = f"{self._op_post['sub']}\n"
if 'com' in self._op_post:
op += self._op_post['com']
return op if not op == "" else None
def _parse_post(self, post) -> List[FileInfo]:
if not 'tim' in post: return None
dlfname = f"{post['tim']}{post['ext']}"
if "filename" in post:
if match(post['filename'], r"^image\.\w{1,4}$") is None:
filename = dlfname
else:
filename = f"{post['filename']}{post['ext']}"
files = []
files.append(FileInfo(filename, post['fsize'],
self.__url_file_link.format(board=self.board, filename=dlfname),
post['md5'], 'md5'))
if "extra_files" in post:
for f in post["extra_files"]:
dlfname = f"{f['tim']}{f['ext']}"
if "filename" in post:
if match(post['filename'], r"^image\.\w+$") is None:
filename = dlfname
else:
filename = f"{post['filename']}{post['ext']}"
dlurl = self.__url_file_link.format(board=self.board, \
filename=dlfname)
files.append(FileInfo(filename, f['fsize'], \
dlurl, f['md5'], 'md5'))
return files