Malline:Hakusana/kokoaminen
Mallineella merkityt hakusanat voi koota hakemistoksi seuraavalla Python-ohjelmalla.
Paketit mwparserfromhell ja pywikibot pitää asentaa ensin.
#!/usr/bin/env python3
import html
import re
import sys
import mwparserfromhell
import pywikibot
usage = f"Käyttö: {sys.argv[0]} <kirjan nimi>"
class Link:
def __init__(self, target):
self.text = target
if target.find("<") != -1:
self.target = re.sub(r'</?[a-z]+>', '', target)
else:
self.target = target
def __str__(self):
return f"{self.target}|{self.text}"
def __lt__(self, other):
return self.text < other.text
if __name__ == "__main__":
if len(sys.argv) != 2 or sys.argv[1] == '-h':
sys.exit(usage)
book_prefix = sys.argv[1] + "/"
site = pywikibot.Site('fi', 'wikibooks')
template = pywikibot.Page(site, "Malline:hakusana")
index = []
for page in site.page_embeddedin(template, namespaces=[0], content=True):
if not page.title().startswith(book_prefix):
continue
wikicode = mwparserfromhell.parse(page.text)
for section in wikicode.get_sections(levels=[2], include_headings=True, include_lead=True):
first = section.get(0)
if type(first) == mwparserfromhell.nodes.heading.Heading:
sect_head = Link(first.title.strip())
else:
sect_head = None
for template in section.ifilter_templates():
if template.name == "hakusana":
keyword = str(template.params[0])
keyword = re.sub(r"</?nowiki>", r"", keyword)
keyword = html.unescape(keyword)
book, book_page = page.title().split("/", maxsplit=1)
index.append((keyword, book, Link(book_page), sect_head))
index.sort()
prev = None
for keyword, book, book_page, sect_head in index:
if re.match(r"[*:#]|('''*)", keyword):
keyword = f"<nowiki>{keyword}</nowiki>"
if keyword != prev:
print(f";{keyword}")
if sect_head:
print(f":[[{book}/{book_page}]] » [[{book}/{book_page.target}#{sect_head}]]")
else:
print(f":[[{book}/{book_page}]]")
prev = keyword