#!/usr/bin/env python3
"""
build-reference-docx.py
=======================
Baut die templates/reference.docx fuer die Pandoc-DOCX-Pipeline aus der
Pandoc-Default-Reference, mit gezielten Anpassungen.
Iteration B1 + B1.5 + B2 + B3 + B4 (aktuell):
B1 - Theme-Schriften (majorFont und minorFont) beide auf Calibri.
B1 - Direkte Schriftnamen-Referenzen in styles.xml auf Calibri
(Code-Schriften wie Consolas bleiben).
B1 - Tabellen-Default-Stil "Table" mit tblBorders=none.
B1.5 - Body-DocDefault 11pt, Heading 1/2/3 auf 15/13/12 pt.
B2 - Header (Name links, "Lebenslauf" rechts) ab Seite 2; Seite 1 mit
leerem Header (titlePg-Mechanik). Footer (rechts: Seite n / m) auf
allen Seiten inkl. Seite 1. Page-Setup explizit: A4, Raender
analog PDF (top/bottom 2.2 cm, left/right 2.5 cm).
B3 - DocDefault widowControl. Heading 1/2/3 mit keepNext + keepLines.
Zusaetzlich 'FirstParagraph' (Pandoc-Stil fuer den ersten Absatz
nach einem Heading) - deckt die fett formatierten Kenntnisse-
Subsection-Labels ab. Hinweis: Listen-Bullet-Schutz (3-3-Regel)
passiert nicht hier, sondern im Post-Processing
(build/post-process-docx.py), das auf das fertige DOCX angewendet
wird - ein Stil kann keine Per-Bullet-Logik abbilden.
B4 - Heading 1/2/3 in destengsblue (0B5394) gefaerbt (themeColor
entfernt, damit die Farbe nicht aus dem Word-Theme kommt).
Hinweis (S08): die zwischenzeitlich eingebauten Heading-
Trennlinien (Bottom-Border + Indent-Trick) wurden zurueck-
gerollt, weil sie in Word linksbuendig statt zentriert
gerendert wurden (Word-Border folgt bei hanging-Indent der
visuellen Absatz-Position, nicht den Indent-Werten).
Geplant in Folge-Iterationen:
C - Foto-Einbindung
D - Hyphenation-Feintuning fuer PDF
"""
from __future__ import annotations
import re
import subprocess
import sys
import tempfile
import zipfile
from pathlib import Path
from xml.etree import ElementTree as ET
SCRIPT_DIR = Path(__file__).resolve().parent
BASE_DIR = SCRIPT_DIR.parent
TEMPLATES_DIR = BASE_DIR / "templates"
OUTPUT_FILE = TEMPLATES_DIR / "reference.docx"
NS = {
"w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main",
"a": "http://schemas.openxmlformats.org/drawingml/2006/main",
"r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships",
"rel": "http://schemas.openxmlformats.org/package/2006/relationships",
"ct": "http://schemas.openxmlformats.org/package/2006/content-types",
}
for prefix, uri in NS.items():
ET.register_namespace(prefix, uri)
ET.register_namespace("", NS["rel"])
W = "{%s}" % NS["w"]
A = "{%s}" % NS["a"]
CODE_FONTS = {"consolas", "courier", "courier new", "liberation mono",
"monaco", "menlo", "fira mono", "fira code"}
TARGET_FONT = "Calibri"
SIZE_BODY = 22
SIZE_HEADING1 = 30
SIZE_HEADING2 = 26
SIZE_HEADING3 = 24
HEADING_SIZES = {"Heading1": SIZE_HEADING1,
"Heading2": SIZE_HEADING2,
"Heading3": SIZE_HEADING3}
# Compact NICHT mehr in dieser Liste - Listen-Bullet-Schutz uebernimmt das
# Post-Processing-Skript pro-Bullet.
KEEP_STYLES = ("Heading1", "Heading2", "Heading3", "FirstParagraph")
# B4 - Heading-Farben (Trennlinien wurden in S08 zurueckgerollt, siehe
# Modul-Docstring). Bleibt: Heading 1/2/3 in destengsblue, themeColor entfernt.
HEADING_COLOR = "0B5394" # destengsblue (analog template.tex)
HEADING_COLOR_STYLES = ("Heading1", "Heading2", "Heading3")
PAGE_W = 11906
PAGE_H = 16838
MARGIN_TOP = 1247
MARGIN_BOT = 1247
MARGIN_LEFT = 1417
MARGIN_RIGHT = 1417
HEADER_POS = 720
FOOTER_POS = 720
HEADER_RIGHT_TAB = PAGE_W - MARGIN_LEFT - MARGIN_RIGHT
HEADER_LEFT = "Dr.-Ing. Thomas Langer"
HEADER_RIGHT = "Lebenslauf"
def log(msg):
print(f"[build-reference-docx] {msg}", flush=True)
XML_DECL = b'\n'
def write_xml(tree, dest):
body = ET.tostring(tree.getroot(), encoding="utf-8")
dest.write_bytes(XML_DECL + body)
def write_xml_bytes(content, dest):
dest.write_bytes(XML_DECL + content)
def fetch_pandoc_default(dest):
log("Pandoc-Default-Reference extrahieren ...")
result = subprocess.run(
["pandoc", "--print-default-data-file", "reference.docx"],
capture_output=True, check=False,
)
if result.returncode != 0:
sys.stderr.write(result.stderr.decode("utf-8", errors="replace"))
raise SystemExit(f"pandoc liefert Exit-Code {result.returncode}")
dest.write_bytes(result.stdout)
log(f" -> {dest} ({dest.stat().st_size} Bytes)")
def unpack_docx(src, dest_dir):
with zipfile.ZipFile(src, "r") as z:
z.extractall(dest_dir)
def repack_docx(src_dir, dest):
files = []
for path in src_dir.rglob("*"):
if path.is_file():
arcname = path.relative_to(src_dir).as_posix()
files.append((path, arcname))
files.sort(key=lambda t: (0 if t[1] == "[Content_Types].xml" else 1, t[1]))
with zipfile.ZipFile(dest, "w", zipfile.ZIP_DEFLATED) as z:
for path, arcname in files:
z.write(path, arcname)
def is_code_font(name):
return (name or "").strip().lower() in CODE_FONTS
# --- B1: Schriften ---------------------------------------------------------
def set_theme_fonts_to_calibri(theme_xml):
tree = ET.parse(theme_xml)
root = tree.getroot()
for kind in ("majorFont", "minorFont"):
font = root.find(f".//{A}{kind}")
if font is None:
raise RuntimeError(f"{kind}-Element nicht im Theme")
latin = font.find(f"{A}latin")
if latin is None:
raise RuntimeError(f"{kind}/latin-Element nicht gefunden")
old = latin.get("typeface")
latin.set("typeface", TARGET_FONT)
log(f" Theme {kind}/latin: {old!r} -> {TARGET_FONT!r}")
write_xml(tree, theme_xml)
def replace_direct_fonts_in_styles(styles_xml):
tree = ET.parse(styles_xml)
root = tree.getroot()
changed = 0
skipped = 0
for rfonts in root.iter(f"{W}rFonts"):
for attr in (f"{W}ascii", f"{W}hAnsi", f"{W}cs", f"{W}eastAsia"):
val = rfonts.get(attr)
if val is None:
continue
if is_code_font(val):
skipped += 1
continue
if val != TARGET_FONT:
rfonts.set(attr, TARGET_FONT)
changed += 1
log(f" styles.xml: {changed} direkte Font-Attribute auf {TARGET_FONT!r}"
f" gesetzt (Code-Fonts unangetastet: {skipped})")
write_xml(tree, styles_xml)
def set_table_borders_none(styles_xml):
tree = ET.parse(styles_xml)
root = tree.getroot()
style = next((s for s in root.findall(f"{W}style")
if s.get(f"{W}styleId") == "Table"), None)
if style is None:
raise RuntimeError("Style 'Table' nicht in styles.xml")
tbl_pr = style.find(f"{W}tblPr")
if tbl_pr is None:
tbl_pr = ET.SubElement(style, f"{W}tblPr")
existing = tbl_pr.find(f"{W}tblBorders")
if existing is not None:
tbl_pr.remove(existing)
borders = ET.SubElement(tbl_pr, f"{W}tblBorders")
for side in ("top", "left", "bottom", "right", "insideH", "insideV"):
e = ET.SubElement(borders, f"{W}{side}")
e.set(f"{W}val", "none")
e.set(f"{W}sz", "0")
e.set(f"{W}space", "0")
e.set(f"{W}color", "auto")
log(" Style 'Table': tblBorders=none auf allen Sides")
write_xml(tree, styles_xml)
def set_default_body_size(styles_xml):
tree = ET.parse(styles_xml)
root = tree.getroot()
docDefaults = root.find(f"{W}docDefaults")
if docDefaults is None:
docDefaults = ET.SubElement(root, f"{W}docDefaults")
rPrDefault = docDefaults.find(f"{W}rPrDefault")
if rPrDefault is None:
rPrDefault = ET.SubElement(docDefaults, f"{W}rPrDefault")
rPr = rPrDefault.find(f"{W}rPr")
if rPr is None:
rPr = ET.SubElement(rPrDefault, f"{W}rPr")
for tag in (f"{W}sz", f"{W}szCs"):
elem = rPr.find(tag)
if elem is None:
elem = ET.SubElement(rPr, tag)
elem.set(f"{W}val", str(SIZE_BODY))
log(f" DocDefault Body-Schriftgroesse: {SIZE_BODY/2} pt")
write_xml(tree, styles_xml)
def set_heading_sizes(styles_xml):
tree = ET.parse(styles_xml)
root = tree.getroot()
for style in root.findall(f"{W}style"):
sid = style.get(f"{W}styleId")
if sid not in HEADING_SIZES:
continue
target = HEADING_SIZES[sid]
rPr = style.find(f"{W}rPr")
if rPr is None:
rPr = ET.SubElement(style, f"{W}rPr")
for tag in (f"{W}sz", f"{W}szCs"):
elem = rPr.find(tag)
if elem is None:
elem = ET.SubElement(rPr, tag)
elem.set(f"{W}val", str(target))
log(f" Stil {sid!r}: Schriftgroesse {target/2} pt")
write_xml(tree, styles_xml)
def set_widow_control_default(styles_xml):
tree = ET.parse(styles_xml)
root = tree.getroot()
docDefaults = root.find(f"{W}docDefaults")
if docDefaults is None:
docDefaults = ET.SubElement(root, f"{W}docDefaults")
pPrDefault = docDefaults.find(f"{W}pPrDefault")
if pPrDefault is None:
pPrDefault = ET.SubElement(docDefaults, f"{W}pPrDefault")
pPr = pPrDefault.find(f"{W}pPr")
if pPr is None:
pPr = ET.SubElement(pPrDefault, f"{W}pPr")
if pPr.find(f"{W}widowControl") is None:
ET.SubElement(pPr, f"{W}widowControl")
log(" pPrDefault: widowControl aktiviert")
write_xml(tree, styles_xml)
def set_keep_next_styles(styles_xml):
tree = ET.parse(styles_xml)
root = tree.getroot()
seen = set()
for style in root.findall(f"{W}style"):
sid = style.get(f"{W}styleId")
if sid not in KEEP_STYLES:
continue
pPr = style.find(f"{W}pPr")
if pPr is None:
pPr = ET.SubElement(style, f"{W}pPr")
for tag in (f"{W}keepNext", f"{W}keepLines"):
if pPr.find(tag) is None:
ET.SubElement(pPr, tag)
log(f" Stil {sid!r}: keepNext + keepLines")
seen.add(sid)
missing = set(KEEP_STYLES) - seen
if missing:
log(f" Hinweis: Stil(e) {sorted(missing)!r} nicht gefunden, uebersprungen")
write_xml(tree, styles_xml)
# --- B4: Heading-Farben ----------------------------------------------------
def set_heading_colors(styles_xml):
tree = ET.parse(styles_xml)
root = tree.getroot()
for style in root.findall(f"{W}style"):
sid = style.get(f"{W}styleId")
if sid not in HEADING_COLOR_STYLES:
continue
rPr = style.find(f"{W}rPr")
if rPr is None:
rPr = ET.SubElement(style, f"{W}rPr")
color = rPr.find(f"{W}color")
if color is None:
color = ET.SubElement(rPr, f"{W}color")
# Theme-Color-Attribute entfernen, damit die Farbe nicht aus dem
# Word-Theme abgeleitet wird (Pandoc-Default: themeColor accent1).
for attr in (f"{W}themeColor", f"{W}themeTint", f"{W}themeShade"):
if attr in color.attrib:
del color.attrib[attr]
color.set(f"{W}val", HEADING_COLOR)
log(f" Stil {sid!r}: color={HEADING_COLOR} (themeColor entfernt)")
write_xml(tree, styles_xml)
def header_default_xml():
return (
b'\n'
b' \n'
b' \n'
b' \n'
b' \n'
b' \n'
b' \n'
b' ' + HEADER_LEFT.encode() + b'\n'
b' ' + HEADER_RIGHT.encode() + b'\n'
b' \n'
b'\n'
)
def header_first_blank_xml():
return (
b'\n'
b' \n'
b'\n'
)
def footer_default_xml():
return (
b'\n'
b' \n'
b' \n'
b' \n'
b' \n'
b' \n'
b' \n'
b' Seite \n'
b' \n'
b' 1\n'
b' \n'
b' / \n'
b' \n'
b' 1\n'
b' \n'
b' \n'
b'\n'
)
REL_HEADER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header"
REL_FOOTER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer"
CT_HEADER = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"
CT_FOOTER = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"
def next_free_rel_id(rels_xml):
text = rels_xml.read_text(encoding="utf-8")
ids = [int(m.group(1)) for m in re.finditer(r'Id="rId(\d+)"', text)]
return (max(ids) + 1) if ids else 1
def add_relationship(rels_xml, rid, rtype, target):
text = rels_xml.read_text(encoding="utf-8")
new_rel = f''
if new_rel in text:
return
text = text.replace("", new_rel + "")
rels_xml.write_text(text, encoding="utf-8")
def add_content_type_override(ct_xml, part_name, ct):
text = ct_xml.read_text(encoding="utf-8")
new_override = f''
if part_name in text:
return
text = text.replace("", new_override + "")
ct_xml.write_text(text, encoding="utf-8")
def update_sectpr_with_headers(document_xml, header_default_rid, header_first_rid, footer_default_rid):
text = document_xml.read_text(encoding="utf-8")
new_sectpr = (
f''
f''
f''
f''
f''
f''
f''
f''
f''
)
new_text, n = re.subn(
r'|.*?',
new_sectpr, text, flags=re.DOTALL,
)
if n == 0:
new_text = text.replace("", new_sectpr + "")
document_xml.write_text(new_text, encoding="utf-8")
log(f" document.xml sectPr: pgSz/pgMar (A4, 2.2/2.5cm Raender), Header"
f" default+first, Footer default+first auf gleicher rId, titlePg")
def add_header_footer(unpacked):
word_dir = unpacked / "word"
rels_xml = word_dir / "_rels" / "document.xml.rels"
ct_xml = unpacked / "[Content_Types].xml"
doc_xml = word_dir / "document.xml"
write_xml_bytes(header_default_xml(), word_dir / "header1.xml")
write_xml_bytes(header_first_blank_xml(), word_dir / "header2.xml")
write_xml_bytes(footer_default_xml(), word_dir / "footer1.xml")
log(" word/header1.xml (default), header2.xml (first blank),"
" footer1.xml geschrieben")
next_id = next_free_rel_id(rels_xml)
rid_h_def, rid_h_first, rid_f_def = (f"rId{next_id+i}" for i in range(3))
add_relationship(rels_xml, rid_h_def, REL_HEADER, "header1.xml")
add_relationship(rels_xml, rid_h_first, REL_HEADER, "header2.xml")
add_relationship(rels_xml, rid_f_def, REL_FOOTER, "footer1.xml")
log(f" Beziehungen: {rid_h_def}=header1, {rid_h_first}=header2,"
f" {rid_f_def}=footer1")
add_content_type_override(ct_xml, "/word/header1.xml", CT_HEADER)
add_content_type_override(ct_xml, "/word/header2.xml", CT_HEADER)
add_content_type_override(ct_xml, "/word/footer1.xml", CT_FOOTER)
log(" [Content_Types].xml: Override-Eintraege fuer header1/2 und footer1")
update_sectpr_with_headers(doc_xml, rid_h_def, rid_h_first, rid_f_def)
def main():
log(f"Ziel: {OUTPUT_FILE}")
TEMPLATES_DIR.mkdir(parents=True, exist_ok=True)
with tempfile.TemporaryDirectory(prefix="refdocx-") as tmp:
tmp_dir = Path(tmp)
default_docx = tmp_dir / "pandoc-default.docx"
unpacked = tmp_dir / "unpacked"
fetch_pandoc_default(default_docx)
unpacked.mkdir()
unpack_docx(default_docx, unpacked)
theme_xml = unpacked / "word" / "theme" / "theme1.xml"
styles_xml = unpacked / "word" / "styles.xml"
log("Anpassung: Theme major+minor auf Calibri")
set_theme_fonts_to_calibri(theme_xml)
log("Anpassung: Direkte Font-Referenzen in styles.xml -> Calibri")
replace_direct_fonts_in_styles(styles_xml)
log("Anpassung: Tabellen-Default ohne Rahmen")
set_table_borders_none(styles_xml)
log("Anpassung: Body-Schriftgroesse 11 pt (DocDefault)")
set_default_body_size(styles_xml)
log("Anpassung: Heading-Schriftgroessen 15/13/12 pt")
set_heading_sizes(styles_xml)
log("Anpassung: Widow/Orphan-Control im DocDefault (B3)")
set_widow_control_default(styles_xml)
log("Anpassung: keepNext + keepLines auf Heading 1/2/3 + FirstParagraph (B3)")
set_keep_next_styles(styles_xml)
log("Anpassung: Heading 1/2/3 in destengsblue (B4)")
set_heading_colors(styles_xml)
log("Anpassung: Header und Footer einbauen (B2)")
add_header_footer(unpacked)
log("Repack als reference.docx")
repack_docx(unpacked, OUTPUT_FILE)
log(f" -> {OUTPUT_FILE} ({OUTPUT_FILE.stat().st_size} Bytes)")
log("Fertig.")
return 0
if __name__ == "__main__":
sys.exit(main())