initial commit
This commit is contained in:
193
wookiee_dl/cli.py
Normal file
193
wookiee_dl/cli.py
Normal file
@@ -0,0 +1,193 @@
|
||||
import os
|
||||
|
||||
import click
|
||||
import mediawiki
|
||||
from pywikibot import FilePage, Page, Site
|
||||
|
||||
site = Site("en", "wookieepedia")
|
||||
site_mw = mediawiki.MediaWiki("https://starwars.fandom.com/api.php")
|
||||
|
||||
|
||||
def search_for_page_obj(query: str, num_results=10, top_result=False) -> Page:
|
||||
base_url = "https://starwars.fandom.com/wiki/"
|
||||
if base_url in query:
|
||||
page = query.replace(base_url, "")
|
||||
else:
|
||||
search_results = site_mw.search(query, results=num_results)
|
||||
|
||||
if top_result:
|
||||
page = search_results[0]
|
||||
else:
|
||||
choices = click.Choice(
|
||||
[result for result in search_results], case_sensitive=False
|
||||
)
|
||||
page = click.prompt(
|
||||
"Which page do you want?",
|
||||
type=choices,
|
||||
show_choices=True,
|
||||
default=search_results[0],
|
||||
)
|
||||
|
||||
return Page(site, page)
|
||||
|
||||
|
||||
query_argument = click.argument("query", required=True)
|
||||
top_result_argument = click.option(
|
||||
"--top_result",
|
||||
"-y",
|
||||
is_flag=True,
|
||||
default=False,
|
||||
help="Automatically use the top search result",
|
||||
)
|
||||
output_directory_option = click.option(
|
||||
"-o",
|
||||
"--output_directory",
|
||||
type=click.Path(file_okay=False, writable=True, exists=True),
|
||||
default=".",
|
||||
)
|
||||
|
||||
|
||||
@click.group(invoke_without_command=True)
|
||||
@click.version_option()
|
||||
@click.pass_context
|
||||
def cli(
|
||||
ctx: click.Context,
|
||||
):
|
||||
"""Command line interface of convenience utilities for use with Wookieepedia (via pywikibot and mediawiki)"""
|
||||
|
||||
|
||||
@cli.command(name="image")
|
||||
@click.pass_context
|
||||
@query_argument
|
||||
@top_result_argument
|
||||
@output_directory_option
|
||||
@click.argument(
|
||||
"template",
|
||||
type=click.Choice(
|
||||
["weapon", "character", "species", "starship_class", "family", "Ship_series"]
|
||||
),
|
||||
required=True,
|
||||
)
|
||||
def image(ctx, page, template, top_result):
|
||||
"""Download the main image for a page
|
||||
Requires to know the template of the main info block on the page.
|
||||
Currently not implemented
|
||||
"""
|
||||
raise NotImplementedError
|
||||
page = search_for_page_obj(page, top_result=top_result)
|
||||
block = [
|
||||
parameters_list
|
||||
for template_page, parameters_list in page.templatesWithParams()
|
||||
if template_page.title() == f"Template:{template.capitalize()}"
|
||||
]
|
||||
# split_block = (s.split("=") for s in block[0])
|
||||
params = {k.split("=")[0]: k.split("=")[1] for k in block[0]}
|
||||
|
||||
image_page = Page(site, params["image"].replace("[[", "").replace("]]", ""))
|
||||
|
||||
if image_page.is_filepage():
|
||||
if 0: # was "not filename"
|
||||
filename = f"{image_page.title(as_filename=True, with_ns=False)}"
|
||||
if dir:
|
||||
os.makedirs(dir, exist_ok=True)
|
||||
filename = os.path.join(dir, filename)
|
||||
image_page = FilePage(image_page)
|
||||
image_page.download(filename)
|
||||
else:
|
||||
print("no image found")
|
||||
pass
|
||||
|
||||
|
||||
@cli.command(name="html")
|
||||
@click.pass_context
|
||||
@query_argument
|
||||
@top_result_argument
|
||||
@output_directory_option
|
||||
@click.option("-n", "--output_filename", type=str)
|
||||
def html_dump(
|
||||
ctx: click.Context,
|
||||
query: str,
|
||||
top_result: bool,
|
||||
output_filename: str,
|
||||
output_directory: click.Path,
|
||||
) -> None:
|
||||
"""Outputs the Wiki page as an HTML document"""
|
||||
query = search_for_page_obj(query, top_result=top_result)
|
||||
format = "html"
|
||||
|
||||
page_mw = site_mw.page(query.title())
|
||||
if not output_filename: # was if not filename
|
||||
filename = f"{query.title(as_filename=True)}.{format}"
|
||||
if dir:
|
||||
os.makedirs(output_directory, exist_ok=True)
|
||||
filename = os.path.join(output_directory, filename)
|
||||
|
||||
doc = page_mw.html
|
||||
with open(filename, "w") as f:
|
||||
f.write(doc)
|
||||
|
||||
click.echo(filename)
|
||||
|
||||
|
||||
@cli.command(name="images")
|
||||
@click.pass_context
|
||||
@query_argument
|
||||
@top_result_argument
|
||||
@output_directory_option
|
||||
def dump_all_images(
|
||||
ctx: click.Context, query: str, top_result, output_directory: click.Path
|
||||
) -> None:
|
||||
"""Dump all images from a given page."""
|
||||
page = search_for_page_obj(query, top_result=top_result)
|
||||
destination = os.path.join(output_directory, page.title(as_filename=True))
|
||||
os.makedirs(destination, exist_ok=True)
|
||||
|
||||
imagelinks = page.imagelinks()
|
||||
for image in imagelinks:
|
||||
filename = os.path.join(
|
||||
destination, image.title(as_filename=True, with_ns=False)
|
||||
)
|
||||
image.download(filename)
|
||||
|
||||
click.echo(destination)
|
||||
|
||||
|
||||
@cli.command(name="text")
|
||||
@click.pass_context
|
||||
@query_argument
|
||||
@top_result_argument
|
||||
@click.option(
|
||||
"-f",
|
||||
"--format",
|
||||
type=click.Choice(["html", "markdown", "url"], case_sensitive=False),
|
||||
prompt=True,
|
||||
default="url",
|
||||
)
|
||||
@click.option(
|
||||
"-o",
|
||||
"--output",
|
||||
"output_file",
|
||||
type=click.Path(exists=False, dir_okay=False, allow_dash=True, writable=True),
|
||||
default="-",
|
||||
)
|
||||
def output_text(
|
||||
ctx: click.Context, query: str, top_result, format, output_file
|
||||
) -> None:
|
||||
"""Return page link in url formats"""
|
||||
query = search_for_page_obj(query, top_result=top_result)
|
||||
|
||||
if format == "markdown":
|
||||
string_format = "[{title} on Wookieepedia]({url})"
|
||||
elif format == "html":
|
||||
string_format = '<a href="{url}>{title}" on Wookieepedia</a>'
|
||||
elif format == "url":
|
||||
string_format = "{url}"
|
||||
else:
|
||||
string_format = ""
|
||||
|
||||
with click.open_file(output_file, "w") as f:
|
||||
f.write(string_format.format(title=query.title().title(), url=query.full_url()))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
cli()
|
||||
Reference in New Issue
Block a user