wookiee-dl/wookiee_dl/cli.py

import os

import click
import mediawiki
from pywikibot import FilePage, Page, Site

site = Site("en", "wookieepedia")
site_mw = mediawiki.MediaWiki("https://starwars.fandom.com/api.php")


def search_for_page_obj(query: str, num_results=10, top_result=False) -> Page:
    base_url = "https://starwars.fandom.com/wiki/"
    if base_url in query:
        page = query.replace(base_url, "")
    else:
        search_results = site_mw.search(query, results=num_results)

        if top_result:
            page = search_results[0]
        else:
            choices = click.Choice(
                [result for result in search_results], case_sensitive=False
            )
            page = click.prompt(
                "Which page do you want?",
                type=choices,
                show_choices=True,
                default=search_results[0],
            )

    return Page(site, page)


query_argument = click.argument("query", required=True)
top_result_argument = click.option(
    "--top_result",
    "-y",
    is_flag=True,
    default=False,
    help="Automatically use the top search result",
)
output_directory_option = click.option(
    "-o",
    "--output_directory",
    type=click.Path(file_okay=False, writable=True, exists=True),
    default=".",
)


@click.group(invoke_without_command=True)
@click.version_option()
@click.pass_context
def cli(
    ctx: click.Context,
):
    """Command line interface of convenience utilities for use with Wookieepedia (via pywikibot and mediawiki)"""


@cli.command(name="image")
@click.pass_context
@query_argument
@top_result_argument
@output_directory_option
@click.argument(
    "template",
    type=click.Choice(
        ["weapon", "character", "species", "starship_class", "family", "Ship_series"]
    ),
    required=True,
)
def image(ctx, page, template, top_result):
    """Download the main image for a page
    Requires to know the template of the main info block on the page.
    Currently not implemented
    """
    raise NotImplementedError
    page = search_for_page_obj(page, top_result=top_result)
    block = [
        parameters_list
        for template_page, parameters_list in page.templatesWithParams()
        if template_page.title() == f"Template:{template.capitalize()}"
    ]
    # split_block = (s.split("=") for s in block[0])
    params = {k.split("=")[0]: k.split("=")[1] for k in block[0]}

    image_page = Page(site, params["image"].replace("[[", "").replace("]]", ""))

    if image_page.is_filepage():
        if 0:  # was "not filename"
            filename = f"{image_page.title(as_filename=True, with_ns=False)}"
        if dir:
            os.makedirs(dir, exist_ok=True)
            filename = os.path.join(dir, filename)
        image_page = FilePage(image_page)
        image_page.download(filename)
    else:
        print("no image found")
    pass


@cli.command(name="html")
@click.pass_context
@query_argument
@top_result_argument
@output_directory_option
@click.option("-n", "--output_filename", type=str)
def html_dump(
    ctx: click.Context,
    query: str,
    top_result: bool,
    output_filename: str,
    output_directory: click.Path,
) -> None:
    """Outputs the Wiki page as an HTML document"""
    query = search_for_page_obj(query, top_result=top_result)
    format = "html"

    page_mw = site_mw.page(query.title())
    if not output_filename:  # was if not filename
        filename = f"{query.title(as_filename=True)}.{format}"
    if dir:
        os.makedirs(output_directory, exist_ok=True)
        filename = os.path.join(output_directory, filename)

    doc = page_mw.html
    with open(filename, "w") as f:
        f.write(doc)

    click.echo(filename)


@cli.command(name="images")
@click.pass_context
@query_argument
@top_result_argument
@output_directory_option
def dump_all_images(
    ctx: click.Context, query: str, top_result, output_directory: click.Path
) -> None:
    """Dump all images from a given page."""
    page = search_for_page_obj(query, top_result=top_result)
    destination = os.path.join(output_directory, page.title(as_filename=True))
    os.makedirs(destination, exist_ok=True)

    imagelinks = page.imagelinks()
    for image in imagelinks:
        filename = os.path.join(
            destination, image.title(as_filename=True, with_ns=False)
        )
        image.download(filename)

    click.echo(destination)


@cli.command(name="text")
@click.pass_context
@query_argument
@top_result_argument
@click.option(
    "-f",
    "--format",
    type=click.Choice(["html", "markdown", "url"], case_sensitive=False),
    prompt=True,
    default="url",
)
@click.option(
    "-o",
    "--output",
    "output_file",
    type=click.Path(exists=False, dir_okay=False, allow_dash=True, writable=True),
    default="-",
)
def output_text(
    ctx: click.Context, query: str, top_result, format, output_file
) -> None:
    """Return page link in url formats"""
    query = search_for_page_obj(query, top_result=top_result)

    if format == "markdown":
        string_format = "[{title} on Wookieepedia]({url})"
    elif format == "html":
        string_format = '<a href="{url}>{title}" on Wookieepedia</a>'
    elif format == "url":
        string_format = "{url}"
    else:
        string_format = ""

    with click.open_file(output_file, "w") as f:
        f.write(string_format.format(title=query.title().title(), url=query.full_url()))


if __name__ == "__main__":
    cli()