import os import click import mediawiki from pywikibot import FilePage, Page, Site from wookiee_dl.wookieepedia_family import Family as WookieepediaFamily site = Site("en", fam=WookieepediaFamily()) site_mw = mediawiki.MediaWiki("https://starwars.fandom.com/api.php") def search_for_page_obj(query: str, num_results=10, top_result=False) -> Page: base_url = "https://starwars.fandom.com/wiki/" if base_url in query: page = query.replace(base_url, "") else: search_results = site_mw.search(query, results=num_results) if top_result: page = search_results[0] else: choices = click.Choice( [result for result in search_results], case_sensitive=False ) page = click.prompt( "Which page do you want?", type=choices, show_choices=True, default=search_results[0], ) return Page(site, page) query_argument = click.argument("query", required=True) top_result_argument = click.option( "--top_result", "-y", is_flag=True, default=False, help="Automatically use the top search result", ) output_directory_option = click.option( "-o", "--output_directory", type=click.Path(file_okay=False, writable=True, exists=True), default=".", ) @click.group(invoke_without_command=True) @click.version_option() @click.pass_context def cli( ctx: click.Context, ): """Command line interface of convenience utilities for use with Wookieepedia (via pywikibot and mediawiki)""" @cli.command(name="image") @click.pass_context @query_argument @top_result_argument @output_directory_option @click.argument( "template", type=click.Choice( ["weapon", "character", "species", "starship_class", "family", "Ship_series"] ), required=True, ) def image(ctx, page, template, top_result): """Download the main image for a page Requires to know the template of the main info block on the page. Currently not implemented """ raise NotImplementedError page = search_for_page_obj(page, top_result=top_result) block = [ parameters_list for template_page, parameters_list in page.templatesWithParams() if template_page.title() == f"Template:{template.capitalize()}" ] # split_block = (s.split("=") for s in block[0]) params = {k.split("=")[0]: k.split("=")[1] for k in block[0]} image_page = Page(site, params["image"].replace("[[", "").replace("]]", "")) if image_page.is_filepage(): if 0: # was "not filename" filename = f"{image_page.title(as_filename=True, with_ns=False)}" if dir: os.makedirs(dir, exist_ok=True) filename = os.path.join(dir, filename) image_page = FilePage(image_page) image_page.download(filename) else: print("no image found") pass @cli.command(name="html") @click.pass_context @query_argument @top_result_argument @output_directory_option @click.option("-n", "--output_filename", type=str) def html_dump( ctx: click.Context, query: str, top_result: bool, output_filename: str, output_directory: click.Path, ) -> None: """Outputs the Wiki page as an HTML document""" query = search_for_page_obj(query, top_result=top_result) format = "html" page_mw = site_mw.page(query.title()) if not output_filename: # was if not filename filename = f"{query.title(as_filename=True)}.{format}" if dir: os.makedirs(output_directory, exist_ok=True) filename = os.path.join(output_directory, filename) doc = page_mw.html with open(filename, "w") as f: f.write(doc) click.echo(filename) @cli.command(name="images") @click.pass_context @query_argument @top_result_argument @output_directory_option def dump_all_images( ctx: click.Context, query: str, top_result, output_directory: click.Path ) -> None: """Dump all images from a given page.""" page = search_for_page_obj(query, top_result=top_result) destination = os.path.join(output_directory, page.title(as_filename=True)) os.makedirs(destination, exist_ok=True) imagelinks = page.imagelinks() for image in imagelinks: filename = os.path.join( destination, image.title(as_filename=True, with_ns=False) ) image.download(filename) click.echo(destination) @cli.command(name="text") @click.pass_context @query_argument @top_result_argument @click.option( "-f", "--format", type=click.Choice(["html", "markdown", "url"], case_sensitive=False), prompt=True, default="url", ) @click.option( "-o", "--output", "output_file", type=click.Path(exists=False, dir_okay=False, allow_dash=True, writable=True), default="-", ) def output_text( ctx: click.Context, query: str, top_result, format, output_file ) -> None: """Return page link in url formats""" query = search_for_page_obj(query, top_result=top_result) if format == "markdown": string_format = "[{title} on Wookieepedia]({url})" elif format == "html": string_format = '' elif format == "url": string_format = "{url}" else: string_format = "" with click.open_file(output_file, "w") as f: f.write(string_format.format(title=query.title().title(), url=query.full_url())) if __name__ == "__main__": cli()