Initial commit with all the files.

2020-07-08 22:53:39 +04:00 · 2020-07-08 22:53:39 +04:00 · a5028162d8
commit a5028162d8
19 changed files with 655 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,5 @@
+.vscode/
+build/
+dist/
+*.egg-info/
+__pycache__
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -0,0 +1,11 @@
+# Changelog
+
+## 0.1.0 - 2020-07-08
+### Added
+- JSON parsers for 4chan.org, lainchan.org and 2ch.hk.
+- Basic straightforward scraper that downloads files one by one.
+
+### Issues
+- 2ch.hk: I can't figure out what exactly it tells as a size and hash of a file.
+  Example: file may have a size of 127798 bytes (125K) but 2ch reports 150 and a
+  hash reported doesn't equal to a computed one.
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+The MIT License
+
+Copyright (c) 2020 Alexander "Arav" Andreev <me@arav.top>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/13
+++ b/13
@ -0,0 +1,13 @@
+build: scrapthechan README.md setup.cfg
+	python setup.py sdist bdist_wheel
+install:
+	python -m pip install --upgrade dist/scrapthechan-1.0.0-py3-none-any.whl --user
+uninstall:
+	# We change directory so pip uninstall will run, it'll fail otherwise.
+	@cd ~/
+	python -m pip uninstall scrapthechan
+clean:
+	rm -rf __pycache__ scrapthechan/__pycache__ scrapthechan/parsers/__pycache__ \
+		scrapthechan.egg-info build
+
+.PHONY: build
--- a/README.md
+++ b/README.md
@ -0,0 +1,33 @@
+This is a tool for scraping files from imageboards' threads.
+
+It extracts the files from a JSON version of a thread. And then downloads 'em
+in a specified output directory or if it isn't specified then creates following
+directory hierarchy in a working directory:
+
+    <imageboard name>
+    |-<board name>
+      |-<thread>
+        |-[!op.txt]
+        |-...
+      |-...
+
+# Usage
+
+```bash
+scrapthechan [<url> | <imageboard> <board> <thread>] [-o,--output-dir] [--no-op]
+ [-v,--version] [-h,--help]
+```
+
+There are two ways to pass a thread. One is by passing a full URL of a thread
+(`<url>` argument), and the other one is by passing thread in three components:
+`<imageboard>` is a name of website (e.g. 4chan), `<board>` is a name of a board
+ (e.g. wg), and `<thread>` is a number of a thread on that board.
+
+`-o`, `--output-dir` -- output directory where all files will be dumped to.
+
+`--no-op` -- by default OP's post will be saved in a `!op.txt` file. This flag
+disables this behaviour. I desided to put an `!` in a name so this file will be
+on the top in a directory listing.
+
+`-v`, `--version` prints the version of the program, and `-h`, `--help` prints
+help for a program.
--- a/scrapthechan/init.py
+++ b/scrapthechan/init.py
@ -0,0 +1,13 @@
+__date__ = "8 Jule 2020"
+__version__ = "0.1.0"
+__author__ = "Alexander \"Arav\" Andreev"
+__email__ = "me@arav.top"
+__copyright__ = f"Copyright (c) 2020 {__author__} <{__email__}>"
+__license__ = \
+"""This program is licensed under the terms of the MIT license.
+For a copy see COPYING file in a directory of the program, or 
+see <https://opensource.org/licenses/MIT>"""
+
+VERSION = \
+    f"ScrapTheChan ver. {__version__} ({__date__})\n\n{__copyright__}\n"\
+          f"\n{__license__}"
--- a/scrapthechan/cli/init.py
+++ b/scrapthechan/cli/init.py
--- a/scrapthechan/cli/scraper.py
+++ b/scrapthechan/cli/scraper.py
@ -0,0 +1,116 @@
+from argparse import ArgumentParser
+from os import makedirs
+from os.path import join, exists
+from re import search
+from sys import argv
+from typing import List
+
+from scrapthechan import VERSION
+from scrapthechan.parser import Parser, ParserThreadNotFoundError
+from scrapthechan.parsers import get_parser_by_url, get_parser_by_site, \
+								 SUPPORTED_IMAGEBOARDS
+from scrapthechan.scrapers.basicscraper import BasicScraper
+
+
+__all__ = ["main"]
+
+
+USAGE = \
+"""Usage: scrapthechan [OPTIONS] (URL|)
+
+Options:
+\t-h,--help       -- print this help and exit;
+\t-v,--version    -- print program's version and exit;
+\t-o,--output-dir -- directory where to place scraped files. By default
+\t                   following structure will be created in current directory:
+\t                   <imageboard>/<board>/<thread>;
+\t-N,--no-op      -- by default OP's post will be written in !op.txt file. This
+\t                   option disables this behaviour;
+
+Supported imageboards: 4chan.org, 2ch.hk, lainchan.org
+"""
+
+
+def parse_common_arguments(args: str) -> dict:
+    r = r"(?P<help>-h|--help)|(?P<version>-v|--version)"
+    argd = search(r, args)
+    if not argd is None:
+        argd = argd.groupdict()
+        return {
+            "help": not argd["help"] is None,
+            "version": not argd["version"] is None }
+    return None
+
+def parse_arguments(args: str) -> dict:
+	rlink = r"^(https?:\/\/)?(?P<site>[\w.-]+)[ \/](?P<board>\w+)(\S+)?[ \/](?P<thread>\w+)"
+	link = search(rlink, args)
+	if not link is None:
+		link = link.groupdict()
+	out_dir = search(r"(?=(-o|--output-dir) (?P<outdir>\S+))", args)
+	return {
+		"site": None if link is None else link["site"],
+		"board": None if link is None else link["board"],
+		"thread": None if link is None else link["thread"],
+		"no-op": not search(r"-N|--no-op", args) is None,
+		"output-dir": None if out_dir is None \
+					  else out_dir.groupdict()["outdir"] }
+
+def main() -> None:
+	cargs = parse_common_arguments(' '.join(argv[1:]))
+	if not cargs is None:
+		if cargs["help"]:
+			print(USAGE)
+			exit()
+		elif cargs["version"]:
+			print(VERSION)
+			exit()
+
+	args = parse_arguments(' '.join(argv[1:]))
+	if args is None \
+		or not "site" in args or not "board" in args or not "thread" in args:
+		print(USAGE)
+		exit()
+
+	try:
+		parser = get_parser_by_site(args["site"], args["board"], args["thread"])
+	except NotImplementedError as ex:
+		print(f"{str(ex)}.")
+		print(f"Supported image boards are {', '.join(SUPPORTED_IMAGEBOARDS)}")
+		exit()
+	except ParserThreadNotFoundError:
+		print(f"Thread is no longer exist.")
+		exit()
+
+	flen = len(parser.files)
+
+
+	print(f"There are {flen} files in this thread.")
+
+	if not args["output-dir"] is None:
+		save_dir = args["output-dir"]
+	else:
+		save_dir = join(parser.imageboard, parser.board,
+						parser.thread)
+
+	print(f"They will be saved in {save_dir}.")
+
+	makedirs(save_dir, exist_ok=True)
+
+
+	if not args["no-op"]:
+		print("Writing OP... ", end='')
+		if not exists(join(save_dir, "!op.txt")):
+			with open(join(save_dir, "!op.txt"), 'w') as opf:
+				opf.write(f"{parser.op}\n")
+			print("Done.")
+		else:
+			print("Exists.")
+
+
+	scraper = BasicScraper(save_dir, parser.files, \
+		lambda i: print(f"{i}/{flen}", end="\r"))
+	scraper.run()
+
+
+if __name__ == "__main__":
+	main()
--- a/scrapthechan/fileinfo.py
+++ b/scrapthechan/fileinfo.py
@ -0,0 +1,23 @@
+"""FileInfo object stores all needed information about a file."""
+
+
+__all__ = ["FileInfo"]
+
+
+class FileInfo:
+	"""Stores all needed information about a file.
+
+    Arguments:
+        - `name`       -- name of a file;
+        - `size`       -- size of a file;
+        - `dlurl`      -- full download URL for a file;
+        - `hash_value` -- hash sum of a file;
+        - `hash_algo`  -- hash algorithm used (e.g. md5).
+    """
+	def __init__(self, name: str, size: int, dlurl: str,
+        hash_value: str, hash_algo: str) -> None:
+		self.name = name
+		self.size = size
+		self.dlurl = dlurl
+		self.hash_value = hash_value
+		self.hash_algo = hash_algo
--- a/scrapthechan/parser.py
+++ b/scrapthechan/parser.py
@ -0,0 +1,81 @@
+"""Base `Parser` class for JSON parsers to inherit."""
+
+from itertools import chain
+from json import loads
+from re import findall, match
+from typing import List, Optional
+from urllib.request import urlopen, urlretrieve
+
+from scrapthechan.fileinfo import FileInfo
+
+
+__all__ = ["Parser", "ParserThreadNotFoundError"]
+
+
+class ParserThreadNotFoundError(Exception):
+	pass
+
+
+class Parser:
+	"""Base class for all parsers.
+	It fetches JSON of a specified thread and collects all the files from it
+	into a list of the `FileInfo` objects.
+	Also it extracts OP's post, that may come handy if you do bulk scraping.
+
+	Arguments:
+		board      -- is a name of a board on an image board;
+		thread     -- is a name of a thread inside a board;
+		posts      -- is a list of posts in form of dictionaries exported from a JSON;
+		skip_posts -- number of posts to skip.
+
+	All the extracted files will be stored as the `FileInfo` objects."""
+	__url_thread_json: str = "https://example.org/{board}/{thread}.json"
+	__url_file_link: str = None
+
+	def __init__(self, board: str, thread: str, posts: List[dict],
+				 skip_posts: Optional[int] = None) -> None:
+		self._board = board
+		self._thread = thread
+		self._op_post = posts[0]
+		if not skip_posts is None:
+			posts = posts[skip_posts:]
+		self._files = list(chain.from_iterable(filter(None, \
+			map(self._parse_post, posts))))
+
+	@property
+	def imageboard(self) -> str:
+		"""Returns image board's name."""
+		return NotImplementedError
+
+	@property
+	def board(self) -> str:
+		"""Returns a name of a board of image board."""
+		return self._board
+
+	@property
+	def thread(self) -> str:
+		"""Returns a name of thread from a board."""
+		return self._thread
+
+	@property
+	def op(self) -> str:
+		"""Returns OP's post as combination of subject and comment separated
+		by a new line."""
+		raise NotImplementedError
+
+	@property
+	def files(self) -> List[FileInfo]:
+		"""Returns a list of retrieved files as `FileInfo` objects."""
+		return self._files
+
+	def _get_json(self, thread_url: str) -> dict:
+		"""Gets JSON version of a thread and converts it in a dictionary."""
+		try:
+			with urlopen(thread_url) as url:
+				return loads(url.read().decode('utf-8'))
+		except:
+			raise ParserThreadNotFoundError
+
+	def _parse_post(self, post: dict) -> List[FileInfo]:
+		"""Parses a single post and extracts files into `FileInfo` object."""
+		raise NotImplementedError
--- a/scrapthechan/parsers/init.py
+++ b/scrapthechan/parsers/init.py
@ -0,0 +1,34 @@
+"""Here are defined the JSON parsers for imageboards."""
+from re import search
+from typing import List
+
+from scrapthechan.parser import Parser
+
+
+__all__ = ["SUPPORTED_IMAGEBOARDS", "get_parser_by_url", "get_parser_by_site"]
+
+
+SUPPORTED_IMAGEBOARDS: List[str] = ["4chan.org", "lainchan.org", "2ch.hk"]
+
+
+def get_parser_by_url(url: str) -> Parser:
+	"""Parses URL and extracts from it site name, board and thread.
+	And then returns initialised Parser object for detected imageboard."""
+	URLRX = r"https?:\/\/(?P<s>[\w\.]+)\/(?P<b>\w+)\/(?:\w+)?\/(?P<t>\w+)"
+	site, board, thread = search(URLRX, url).groups()
+	return get_parser_by_site(site, board, thread)
+
+def get_parser_by_site(site: str, board: str, thread: str) -> Parser:
+	"""Returns an initialised parser for `site` with `board` and `thread`."""
+	if site in ['boards.4chan.org', 'boards.4channel.org',
+				'4chan', '4chan.org']:
+		from .fourchan import FourChanParser
+		return FourChanParser(board, thread)
+	elif site in ['lainchan.org', 'lainchan']:
+		from .lainchan import LainchanParser
+		return LainchanParser(board, thread)
+	elif site in ['2ch.hk', '2ch']:
+		from .dvach import DvachParser
+		return DvachParser(board, thread)
+	else:
+		raise NotImplementedError(f"Parser for {site} is not implemented")
--- a/scrapthechan/parsers/dvach.py
+++ b/scrapthechan/parsers/dvach.py
@ -0,0 +1,43 @@
+from re import match
+from typing import List, Optional
+
+from scrapthechan.fileinfo import FileInfo
+from scrapthechan.parser import Parser
+
+__all__ = ["DvachParser"]
+
+
+class DvachParser(Parser):
+	"""JSON parser for 2ch.hk image board."""
+
+	__url_thread_json = "https://2ch.hk/{board}/res/{thread}.json"
+	__url_file_link = "https://2ch.hk"
+
+	def __init__(self, board: str, thread: str,
+				 skip_posts: Optional[int] = None) -> None:
+		posts = self._get_json(self.__url_thread_json.format(board=board, \
+			thread=thread))['threads'][0]['posts']
+		super(DvachParser, self).__init__(board, thread, posts, skip_posts)
+
+	@property
+	def imageboard(self) -> str:
+		return "2ch.hk"
+
+	@property
+	def op(self) -> str:
+		return f"{self._op_post['subject']}\n{self._op_post['comment']}"
+
+	def _parse_post(self, post) -> Optional[List[FileInfo]]:
+		if not 'files' in post: return None
+		files = []
+		for f in post['files']:
+			if match(f['fullname'], r"^image\.\w+$") is None:
+				fullname = f['fullname']
+			else:
+				fullname = f['name']
+			# Here's same thing as 4chan. 2ch.hk also has md5 field, so it is
+			# completely fine to hardcode `hash_algo`.
+			files.append(FileInfo(fullname, f['size'],
+				f"{self.__url_file_link}{f['path']}",
+				f['md5'], 'md5'))
+		return files
--- a/scrapthechan/parsers/fourchan.py
+++ b/scrapthechan/parsers/fourchan.py
@ -0,0 +1,49 @@
+from re import match
+from typing import List, Optional
+
+from scrapthechan.fileinfo import FileInfo
+from scrapthechan.parser import Parser
+
+__all__ = ["FourChanParser"]
+
+
+class FourChanParser(Parser):
+	"""JSON parser for 4chan.org image board."""
+
+	__url_thread_json = "https://a.4cdn.org/{board}/thread/{thread}.json"
+	__url_file_link = "https://i.4cdn.org/{board}/{filename}"
+
+	def __init__(self, board: str, thread: str,
+				 skip_posts: Optional[int] = None) -> None:
+		posts = self._get_json(self.__url_thread_json.format(board=board, \
+			thread=thread))['posts']
+		super(FourChanParser, self).__init__(board, thread, posts, skip_posts)
+
+	@property
+	def imageboard(self) -> str:
+		return "4chan.org"
+
+	@property
+	def op(self) -> str:
+		if 'sub' in self._op_post:
+			return f"{self._op_post['sub']}\n{self._op_post['com']}"
+		else:
+			return self._op_post['com']
+
+	def _parse_post(self, post: dict) -> List[FileInfo]:
+		if not 'tim' in post: return None
+
+		dlfname = f"{post['tim']}{post['ext']}"
+
+		if "filename" in post:
+			if match(post['filename'], r"^image\.\w+$") is None:
+				filename = dlfname
+			else:
+				filename = f"{post['filename']}{post['ext']}"
+
+		# Hash algorithm is hardcoded since it is highly unlikely that it will
+		# be changed in foreseeable future. And if it'll change then this line
+		# will be necessarily updated anyway.
+		return [FileInfo(filename, post['fsize'],
+			self.__url_file_link.format(board=self.board, filename=dlfname),
+			post['md5'], 'md5')]
--- a/scrapthechan/parsers/lainchan.py
+++ b/scrapthechan/parsers/lainchan.py
@ -0,0 +1,57 @@
+from re import match
+from typing import List, Optional
+
+from scrapthechan.parser import Parser
+from scrapthechan.fileinfo import FileInfo
+
+__all__ = ["LainchanParser"]
+
+
+class LainchanParser(Parser):
+	"""JSON parser for lainchan.org image board.
+	JSON structure is identical to 4chan.org's, so this parser is just inherited
+	from 4chan.org's parser and only needed things are redefined.
+	"""
+
+	__url_thread_json = "https://lainchan.org/{board}/res/{thread}.json"
+	__url_file_link = "https://lainchan.org/{board}/src/{filename}"
+
+	def __init__(self, board: str, thread: str,
+				 skip_posts: Optional[int] = None) -> None:
+		posts = self._get_json(self.__url_thread_json.format(board=board, \
+			thread=thread))['posts']
+		super(LainchanParser, self).__init__(board, thread, posts, skip_posts)
+
+	@property
+	def imageboard(self) -> str:
+		return "lainchan.org"
+
+	def _parse_post(self, post) -> List[FileInfo]:
+		if not 'tim' in post: return None
+
+		dlfname = f"{post['tim']}{post['ext']}"
+
+		if "filename" in post:
+			if match(post['filename'], r"^image\.\w+$") is None:
+				filename = dlfname
+			else:
+				filename = f"{post['filename']}{post['ext']}"
+
+		files = []
+		files.append(FileInfo(filename, post['fsize'],
+			self.__url_file_link.format(board=self.board, filename=dlfname),
+			post['md5'], 'md5'))
+
+		if "extra_files" in post:
+			for f in post["extra_files"]:
+				dlfname = f"{f['tim']}{f['ext']}"
+				if "filename" in post:
+					if match(post['filename'], r"^image\.\w+$") is None:
+						filename = dlfname
+					else:
+						filename = f"{post['filename']}{post['ext']}"
+				dlurl = self.__url_file_link.format(board=self.board, \
+					filename=dlfname)
+				files.append(FileInfo(filename, f['fsize'], \
+					dlurl, f['md5'], 'md5'))
+		return files
--- a/scrapthechan/scraper.py
+++ b/scrapthechan/scraper.py
@ -0,0 +1,96 @@
+"""Base Scraper implementation."""
+
+from base64 import b64encode
+from os import remove, stat
+from os.path import exists, join, getsize
+import re
+from typing import List, Callable
+from urllib.request import urlretrieve, URLopener
+import hashlib
+
+from scrapthechan import __version__
+from scrapthechan.fileinfo import FileInfo
+
+__all__ = ["Scraper"]
+
+
+class Scraper:
+    """Base scraper implementation.
+    
+    Arguments:
+        save_directory             -- a path to a directory where file will be
+                                      saved;
+        files                      -- a list of FileInfo objects;
+        download_progress_callback -- a callback function that will be called
+                                      for each file started downloading.
+    """
+    def __init__(self, save_directory: str, files: List[FileInfo],
+        download_progress_callback: Callable[[int], None] = None) -> None:
+        self._save_directory = save_directory
+        self._files = files
+        self._url_opener = URLopener()
+        self._url_opener.version = f"ScrapTheChan/{__version__}"
+        self._progress_callback = download_progress_callback
+
+    def run(self):
+        raise NotImplementedError
+
+    def _same_filename(self, filename: str, path: str) -> str:
+        """Check if there is a file with same name. If so then add incremental
+        number enclosed in brackets to a name of a new one."""
+        newname = filename
+        while exists(join(path, newname)):
+            has_extension = newname.rfind(".") != -1
+            if has_extension:
+                l, r = newname.rsplit(".", 1)
+                lbracket = l.rfind("(")
+                if lbracket == -1:
+                    newname = f"{l}(1).{r}"
+                else:
+                    num = l[lbracket+1:-1]
+                    if num.isnumeric():
+                        newname = f"{l[:lbracket]}({int(num)+1}).{r}"
+                    else:
+                        newname = f"{l}(1).{r}"
+            else:
+                lbracket = l.rfind("(")
+                if lbracket == -1:
+                    newname = f"{newname}(1)"
+                else:
+                    num = newname[lbracket+1:-1]
+                    if num.isnumeric():
+                        newname = f"{newname[:lbracket]}({int(num)+1})"
+        return newname
+
+    def _hash_file(self, filename: str, hash_algo: str = "md5",
+                   blocksize: int = 1048576) -> (str, str):
+        """Compute hash of a file."""
+        hash_func = hashlib.new(hash_algo)
+        with open(filename, 'rb') as f:
+            buf = f.read(blocksize)
+            while len(buf) > 0:
+                hash_func.update(buf)
+                buf = f.read(blocksize)
+        return hash_func.hexdigest(), hash_func.digest()
+
+    def _is_file_ok(self, f: FileInfo, filepath: str) -> bool:
+        """Check if a file exist and isn't broken."""
+        if not exists(filepath):
+            return False
+        computed_size = getsize(filepath)
+        is_size_match = f.size == computed_size \
+                        or f.size == round(computed_size / 1024)
+        hexdig, dig = self._hash_file(filepath, f.hash_algo)
+        is_hash_match = f.hash_value == hexdig \
+                        or f.hash_value == b64encode(dig).decode()
+        return is_size_match and is_hash_match
+
+    def _download_file(self, f: FileInfo):
+        """Download a single file."""
+        filepath = join(self._save_directory, f.name)
+        if self._is_file_ok(f, filepath):
+            return True
+        elif exists(filepath):
+            filepath = join(self._save_directory, \
+                self._same_filename(f.name, self._save_directory))
+        self._url_opener.retrieve(f.dlurl, filepath)
--- a/scrapthechan/scrapers/init.py
+++ b/scrapthechan/scrapers/init.py
--- a/scrapthechan/scrapers/basicscraper.py
+++ b/scrapthechan/scrapers/basicscraper.py
@ -0,0 +1,15 @@
+"""Implementation of basic sequential one-threaded scraper that downloads
+files one by one."""
+
+from scrapthechan.scraper import Scraper
+
+__all__ = ["BasicScraper"]
+
+
+class BasicScraper(Scraper):
+    def run(self):
+        """Download files one by one."""
+        for i, f in enumerate(self._files, start=1):
+            if not self._progress_callback is None:
+                self._progress_callback(i)
+            self._download_file(f)
--- a/setup.cfg
+++ b/setup.cfg
@ -0,0 +1,42 @@
+[metadata]
+name = scrapthechan
+version = attr: scrapthechan.__version__
+description =
+    Scrap the files posted in a thread on an imageboard. Currently supports
+    4chan.org, lainchan.org and 2ch.hk.
+long_description = file: README.md
+long_description_content_type = text/markdown
+author = Alexander "Arav" Andreev
+author_email = me@arav.top
+url = https://arav.top
+keywords =
+    scraper
+    imageboard
+    4chan
+    2ch
+    lainchan
+license = MIT
+license_file = COPYING
+classifiers =
+    Development Status :: 2 - Pre-Alpha
+    Environment :: Console
+    Intended Audience :: End Users/Desktop
+    License :: Other/Proprietary License
+    Natural Language :: English
+    Operating System :: OS Independent
+    Programming Language :: Python :: 3.7
+    Programming Language :: Python :: 3.8
+    Topic :: Utilities
+
+[options]
+zip_safe = False
+python_requires = >=3.7
+include_package_data = True
+packages = find:
+
+[options.package_data]
+* = COPYING, README.md
+
+[options.entry_points]
+console_scripts =
+    scrapthechan = scrapthechan.cli.scraper:main
--- a/setup.py
+++ b/setup.py
@ -0,0 +1,3 @@
+from setuptools import setup
+
+setup()