from typing import List, Tuple, Dict, Any
import re
from wdp.models import Word
from jinja2 import Template
# TODO: add IPA handling e.g. {{IPA|en|foo|bar}}
from wdp.validate import validate_word
ENTRY_TEMPLATE = Template(
"""=={{lang_name}}==
{% for word in words %}
{% if words|length > 1 %}
{{ section(2, "Etymology " ~ loop.index) }}
{% endif %}
{% if word['alternative_forms'] %}
{{ section(3, "Alternative forms") }}
{% for form in word['alternative_forms'] %}
* {{LL}}alter|{{lang_code}}|{{form.alternative_form}}||{{form.description_of_use}}{{RR}}
{% endfor %}
{% endif %}
{% if word['description'] %}
{{ section(3, "Description") }}
{{ word['description'] }}
{% endif %}
{% if word['etymology'] %}
{{ section(3, "Etymology") }}
{{ word['etymology'] }}
{% endif %}
{% if word['pronunciations'] %}
{{ section(3, "Pronunciation") }}
{% for pronunciation in word['pronunciations'] %}
{% if pronunciation.notation|lower == "ipa" %}
* {% if pronunciation.accent %}{{LL}}a|{{pronunciation.accent}}{{RR}} {% endif %}{{LL}}IPA|{{lang_code}}|{{pronunciation.pronunciation}}{{RR}}
{% else %}
* {% if pronunciation.accent %}{{LL}}a|{{pronunciation.accent}}{{RR}} {% endif %}{{pronunciation.pronunciation}}
{% endif %}
{% endfor %}
{% endif %}
{% for pos in word["grouped_definitions"] %}
{{ section(3, pos.capitalize()) }}
{{LL}}head|{{lang_code}}|{{pos}}{{RR}}
{% for definition in word["grouped_definitions"][pos] %}
# {{definition.definition}}
{% for usage_example in definition.usage_examples %}
#: {{LL}}uxi|{{lang_code}}|{{usage_example.text}}|{{usage_example.translation}}{{RR}}
{% endfor %}
{% endfor %}
{% endfor %}
{% if word['usage_notes'] %}
{{ section(3, "Usage notes") }}
{{ word['usage_notes'] }}
{% endif %}
{% if word['conjugation'] %}
{{ section(3, "Conjugation") }}
{{ word['conjugation'] }}
{% endif %}
{% if word['declension'] %}
{{ section(3, "Declension") }}
{{ word['declension'] }}
{% endif %}
{% if word['inflection'] %}
{{ section(3, "Inflection") }}
{{ word['inflection'] }}
{% endif %}
{% if word['references'] %}
{{ section(3, "References") }}
{{ word['references'] }}
{% endif %}
{% endfor %}
"""
)
[docs]def group_definitions_by_pos(context: dict):
"""
Return a dict mapping from part of speech to a list of all the definitions with that part of speech.
"""
definitions = context["definitions"]
parts_of_speech = set(definition["part_of_speech"] for definition in definitions)
return {pos.lower(): [d for d in definitions if d["part_of_speech"] == pos] for pos in parts_of_speech}
[docs]def format_entry(word_group: List[Word], lang_code: str, lang_name: str) -> Tuple[str, str]:
"""
Turn a list of Word objects into Wikitext.
"""
# the dict we will use to render the jinja template
context: Dict[str, Any] = dict(
lang_code=lang_code,
lang_name=lang_name,
words=[],
)
# iterate over Word objects
for word in word_group:
word_context = word.to_dict()
# a Word will in general have many definitions with different parts of speech--separate them
word_context["grouped_definitions"] = group_definitions_by_pos(word_context)
context["words"].append(word_context)
def section(depth, content):
"""
Formats string according to whether there is more than one word group or not.
Args:
depth: the level the section has in an entry with a single etymology
content: a string to be displayed in a header
Returns: Formatted section header
"""
c = len(word_group) > 1
s = "=" * (depth + c)
return s + content + s
output = ENTRY_TEMPLATE.render(section=section, LL="{{", RR="}}", **context)
# undo formatting that made the jinja template easier to read
output = "\n".join(re.sub(r"^ +", "", line) for line in output.split("\n"))
output = re.sub(r"\n\n+#", "\n#", output)
output = re.sub(r"=\n+=", "=\n=", output)
output = re.sub(r"\n\n+", "\n\n", output)
output = re.sub(r"===\n\n", "===\n", output)
output = re.sub(r"=\n=", "=\n\n=", output) # tyography: an empty section should have extra newline
output = re.sub(r"({{head\|[^\n]*})\n#", r"\1\n\n#", output) # headword template should have extra newline after
return word_group[0].word_form, output
[docs]def group_words(words: List[Word]) -> List[List[Word]]:
""" Group words based on their word_form attribute """
word_forms = set(w.word_form for w in words)
return [[w for w in words if w.word_form == word_form] for word_form in word_forms]