#!/usr/bin/env python3 """ build-reference-docx.py ======================= Baut die templates/reference.docx fuer die Pandoc-DOCX-Pipeline aus der Pandoc-Default-Reference, mit gezielten Anpassungen. Iteration B1 + B1.5 + B2 + B3 + B4 (aktuell): B1 - Theme-Schriften (majorFont und minorFont) beide auf Calibri. B1 - Direkte Schriftnamen-Referenzen in styles.xml auf Calibri (Code-Schriften wie Consolas bleiben). B1 - Tabellen-Default-Stil "Table" mit tblBorders=none. B1.5 - Body-DocDefault 11pt, Heading 1/2/3 auf 15/13/12 pt. B2 - Header (Name links, "Lebenslauf" rechts) ab Seite 2; Seite 1 mit leerem Header (titlePg-Mechanik). Footer (rechts: Seite n / m) auf allen Seiten inkl. Seite 1. Page-Setup explizit: A4, Raender analog PDF (top/bottom 2.2 cm, left/right 2.5 cm). B3 - DocDefault widowControl. Heading 1/2/3 mit keepNext + keepLines. Zusaetzlich 'FirstParagraph' (Pandoc-Stil fuer den ersten Absatz nach einem Heading) - deckt die fett formatierten Kenntnisse- Subsection-Labels ab. Hinweis: Listen-Bullet-Schutz (3-3-Regel) passiert nicht hier, sondern im Post-Processing (build/post-process-docx.py), das auf das fertige DOCX angewendet wird - ein Stil kann keine Per-Bullet-Logik abbilden. B4 - Heading 1/2/3 in destengsblue (3C68AE) gefaerbt (themeColor entfernt, damit die Farbe nicht aus dem Word-Theme kommt). Wichtig (S10-Fix): NICHT NUR die Paragraph-Styles Heading1/2/3, sondern AUCH die Linked Character Styles Heading1Char/2Char/3Char muessen gefaerbt werden. Pandoc-3.x-Default-Reference enthaelt diese Char-Styles mit eigener color val=0F4761 themeColor=accent1 themeShade=BF (Aptos-Petrol). Char-Styles haben in Word Vorrang vor Para-Styles bei Run-Eigenschaften (Schrift, Farbe), d.h. ohne Char-Style-Aenderung gewinnt das Theme-Aptos-Blau gegenueber dem Para-Style-destengsblue. B4-Bold (S10) - Heading 1 und 2 fett (Para- UND Char-Stil), Heading 3 bleibt normal. Pandoc 3.x Default-Reference hat Headings nicht fett. Char-Stil-Vorrangs-Logik identisch wie bei der Farbe. Hinweis (S08): die zwischenzeitlich eingebauten Heading- Trennlinien (Bottom-Border + Indent-Trick) wurden zurueck- gerollt, weil sie in Word linksbuendig statt zentriert gerendert wurden (Word-Border folgt bei hanging-Indent der visuellen Absatz-Position, nicht den Indent-Werten). Geplant in Folge-Iterationen: C - Foto-Einbindung D - Hyphenation-Feintuning fuer PDF """ from __future__ import annotations import re import subprocess import sys import tempfile import zipfile from pathlib import Path from xml.etree import ElementTree as ET SCRIPT_DIR = Path(__file__).resolve().parent BASE_DIR = SCRIPT_DIR.parent TEMPLATES_DIR = BASE_DIR / "templates" OUTPUT_FILE = TEMPLATES_DIR / "reference.docx" NS = { "w": "http://schemas.openxmlformats.org/wordprocessingml/2006/main", "a": "http://schemas.openxmlformats.org/drawingml/2006/main", "r": "http://schemas.openxmlformats.org/officeDocument/2006/relationships", "rel": "http://schemas.openxmlformats.org/package/2006/relationships", "ct": "http://schemas.openxmlformats.org/package/2006/content-types", } for prefix, uri in NS.items(): ET.register_namespace(prefix, uri) ET.register_namespace("", NS["rel"]) W = "{%s}" % NS["w"] A = "{%s}" % NS["a"] CODE_FONTS = {"consolas", "courier", "courier new", "liberation mono", "monaco", "menlo", "fira mono", "fira code"} TARGET_FONT = "Calibri" SIZE_BODY = 22 SIZE_HEADING1 = 30 SIZE_HEADING2 = 26 SIZE_HEADING3 = 24 HEADING_SIZES = {"Heading1": SIZE_HEADING1, "Heading2": SIZE_HEADING2, "Heading3": SIZE_HEADING3} # Compact NICHT mehr in dieser Liste - Listen-Bullet-Schutz uebernimmt das # Post-Processing-Skript pro-Bullet. KEEP_STYLES = ("Heading1", "Heading2", "Heading3", "FirstParagraph") # B4 - Heading-Farben (Trennlinien wurden in S08 zurueckgerollt, siehe # Modul-Docstring). Bleibt: Heading 1/2/3 in destengsblue, themeColor entfernt. HEADING_COLOR = "3C68AE" # destengsblue (analog template.tex) HEADING_COLOR_STYLES = ("Heading1", "Heading2", "Heading3", "Heading1Char", "Heading2Char", "Heading3Char") # B4-Bold (S10) - Heading 1+2 fett, Heading 3 bleibt normal. # Auch hier: Para- UND Char-Style modifizieren, weil Char-Style # in Word Run-Properties dominiert (analog Farb-Fix). HEADING_BOLD_STYLES = ("Heading1", "Heading2", "Heading1Char", "Heading2Char") PAGE_W = 11906 PAGE_H = 16838 MARGIN_TOP = 1247 MARGIN_BOT = 1247 MARGIN_LEFT = 1417 MARGIN_RIGHT = 1417 HEADER_POS = 720 FOOTER_POS = 720 HEADER_RIGHT_TAB = PAGE_W - MARGIN_LEFT - MARGIN_RIGHT HEADER_LEFT = "Dr.-Ing. Thomas Langer" HEADER_RIGHT = "Lebenslauf" def log(msg): print(f"[build-reference-docx] {msg}", flush=True) XML_DECL = b'\n' def write_xml(tree, dest): body = ET.tostring(tree.getroot(), encoding="utf-8") dest.write_bytes(XML_DECL + body) def write_xml_bytes(content, dest): dest.write_bytes(XML_DECL + content) def fetch_pandoc_default(dest): log("Pandoc-Default-Reference extrahieren ...") result = subprocess.run( ["pandoc", "--print-default-data-file", "reference.docx"], capture_output=True, check=False, ) if result.returncode != 0: sys.stderr.write(result.stderr.decode("utf-8", errors="replace")) raise SystemExit(f"pandoc liefert Exit-Code {result.returncode}") dest.write_bytes(result.stdout) log(f" -> {dest} ({dest.stat().st_size} Bytes)") def unpack_docx(src, dest_dir): with zipfile.ZipFile(src, "r") as z: z.extractall(dest_dir) def repack_docx(src_dir, dest): files = [] for path in src_dir.rglob("*"): if path.is_file(): arcname = path.relative_to(src_dir).as_posix() files.append((path, arcname)) files.sort(key=lambda t: (0 if t[1] == "[Content_Types].xml" else 1, t[1])) with zipfile.ZipFile(dest, "w", zipfile.ZIP_DEFLATED) as z: for path, arcname in files: z.write(path, arcname) def is_code_font(name): return (name or "").strip().lower() in CODE_FONTS # --- B1: Schriften --------------------------------------------------------- def set_theme_fonts_to_calibri(theme_xml): tree = ET.parse(theme_xml) root = tree.getroot() for kind in ("majorFont", "minorFont"): font = root.find(f".//{A}{kind}") if font is None: raise RuntimeError(f"{kind}-Element nicht im Theme") latin = font.find(f"{A}latin") if latin is None: raise RuntimeError(f"{kind}/latin-Element nicht gefunden") old = latin.get("typeface") latin.set("typeface", TARGET_FONT) log(f" Theme {kind}/latin: {old!r} -> {TARGET_FONT!r}") write_xml(tree, theme_xml) def replace_direct_fonts_in_styles(styles_xml): tree = ET.parse(styles_xml) root = tree.getroot() changed = 0 skipped = 0 for rfonts in root.iter(f"{W}rFonts"): for attr in (f"{W}ascii", f"{W}hAnsi", f"{W}cs", f"{W}eastAsia"): val = rfonts.get(attr) if val is None: continue if is_code_font(val): skipped += 1 continue if val != TARGET_FONT: rfonts.set(attr, TARGET_FONT) changed += 1 log(f" styles.xml: {changed} direkte Font-Attribute auf {TARGET_FONT!r}" f" gesetzt (Code-Fonts unangetastet: {skipped})") write_xml(tree, styles_xml) def set_table_borders_none(styles_xml): tree = ET.parse(styles_xml) root = tree.getroot() style = next((s for s in root.findall(f"{W}style") if s.get(f"{W}styleId") == "Table"), None) if style is None: raise RuntimeError("Style 'Table' nicht in styles.xml") tbl_pr = style.find(f"{W}tblPr") if tbl_pr is None: tbl_pr = ET.SubElement(style, f"{W}tblPr") existing = tbl_pr.find(f"{W}tblBorders") if existing is not None: tbl_pr.remove(existing) borders = ET.SubElement(tbl_pr, f"{W}tblBorders") for side in ("top", "left", "bottom", "right", "insideH", "insideV"): e = ET.SubElement(borders, f"{W}{side}") e.set(f"{W}val", "none") e.set(f"{W}sz", "0") e.set(f"{W}space", "0") e.set(f"{W}color", "auto") log(" Style 'Table': tblBorders=none auf allen Sides") write_xml(tree, styles_xml) def set_default_body_size(styles_xml): tree = ET.parse(styles_xml) root = tree.getroot() docDefaults = root.find(f"{W}docDefaults") if docDefaults is None: docDefaults = ET.SubElement(root, f"{W}docDefaults") rPrDefault = docDefaults.find(f"{W}rPrDefault") if rPrDefault is None: rPrDefault = ET.SubElement(docDefaults, f"{W}rPrDefault") rPr = rPrDefault.find(f"{W}rPr") if rPr is None: rPr = ET.SubElement(rPrDefault, f"{W}rPr") for tag in (f"{W}sz", f"{W}szCs"): elem = rPr.find(tag) if elem is None: elem = ET.SubElement(rPr, tag) elem.set(f"{W}val", str(SIZE_BODY)) log(f" DocDefault Body-Schriftgroesse: {SIZE_BODY/2} pt") write_xml(tree, styles_xml) def set_heading_sizes(styles_xml): tree = ET.parse(styles_xml) root = tree.getroot() for style in root.findall(f"{W}style"): sid = style.get(f"{W}styleId") if sid not in HEADING_SIZES: continue target = HEADING_SIZES[sid] rPr = style.find(f"{W}rPr") if rPr is None: rPr = ET.SubElement(style, f"{W}rPr") for tag in (f"{W}sz", f"{W}szCs"): elem = rPr.find(tag) if elem is None: elem = ET.SubElement(rPr, tag) elem.set(f"{W}val", str(target)) log(f" Stil {sid!r}: Schriftgroesse {target/2} pt") write_xml(tree, styles_xml) def set_widow_control_default(styles_xml): tree = ET.parse(styles_xml) root = tree.getroot() docDefaults = root.find(f"{W}docDefaults") if docDefaults is None: docDefaults = ET.SubElement(root, f"{W}docDefaults") pPrDefault = docDefaults.find(f"{W}pPrDefault") if pPrDefault is None: pPrDefault = ET.SubElement(docDefaults, f"{W}pPrDefault") pPr = pPrDefault.find(f"{W}pPr") if pPr is None: pPr = ET.SubElement(pPrDefault, f"{W}pPr") if pPr.find(f"{W}widowControl") is None: ET.SubElement(pPr, f"{W}widowControl") log(" pPrDefault: widowControl aktiviert") write_xml(tree, styles_xml) def set_keep_next_styles(styles_xml): tree = ET.parse(styles_xml) root = tree.getroot() seen = set() for style in root.findall(f"{W}style"): sid = style.get(f"{W}styleId") if sid not in KEEP_STYLES: continue pPr = style.find(f"{W}pPr") if pPr is None: pPr = ET.SubElement(style, f"{W}pPr") for tag in (f"{W}keepNext", f"{W}keepLines"): if pPr.find(tag) is None: ET.SubElement(pPr, tag) log(f" Stil {sid!r}: keepNext + keepLines") seen.add(sid) missing = set(KEEP_STYLES) - seen if missing: log(f" Hinweis: Stil(e) {sorted(missing)!r} nicht gefunden, uebersprungen") write_xml(tree, styles_xml) # --- B4: Heading-Farben ---------------------------------------------------- def set_heading_colors(styles_xml): tree = ET.parse(styles_xml) root = tree.getroot() for style in root.findall(f"{W}style"): sid = style.get(f"{W}styleId") if sid not in HEADING_COLOR_STYLES: continue rPr = style.find(f"{W}rPr") if rPr is None: rPr = ET.SubElement(style, f"{W}rPr") color = rPr.find(f"{W}color") if color is None: color = ET.SubElement(rPr, f"{W}color") # Theme-Color-Attribute entfernen, damit die Farbe nicht aus dem # Word-Theme abgeleitet wird (Pandoc-Default: themeColor accent1). for attr in (f"{W}themeColor", f"{W}themeTint", f"{W}themeShade"): if attr in color.attrib: del color.attrib[attr] color.set(f"{W}val", HEADING_COLOR) log(f" Stil {sid!r}: color={HEADING_COLOR} (themeColor entfernt)") write_xml(tree, styles_xml) def set_heading_bold(styles_xml): """Setzt und auf Heading1 + Heading2 (Para- und Char-Stil). Pandoc 3.x Default-Reference hat weder Para- noch Char-Stil fett gesetzt. Char-Styles werden mit modifiziert, weil sie in Word fuer Run-Properties Vorrang vor Para-Styles haben (analog Farb-Fix in set_heading_colors).""" tree = ET.parse(styles_xml) root = tree.getroot() seen = set() for style in root.findall(f"{W}style"): sid = style.get(f"{W}styleId") if sid not in HEADING_BOLD_STYLES: continue rPr = style.find(f"{W}rPr") if rPr is None: rPr = ET.SubElement(style, f"{W}rPr") for tag in (f"{W}b", f"{W}bCs"): if rPr.find(tag) is None: ET.SubElement(rPr, tag) log(f" Stil {sid!r}: bold (b + bCs)") seen.add(sid) missing = set(HEADING_BOLD_STYLES) - seen if missing: log(f" Hinweis: Stil(e) {sorted(missing)!r} nicht gefunden, uebersprungen") write_xml(tree, styles_xml) def header_default_xml(): return ( b'\n' b' \n' b' \n' b' \n' b' \n' b' \n' b' \n' b' ' + HEADER_LEFT.encode() + b'\n' b' ' + HEADER_RIGHT.encode() + b'\n' b' \n' b'\n' ) def header_first_blank_xml(): return ( b'\n' b' \n' b'\n' ) def footer_default_xml(): return ( b'\n' b' \n' b' \n' b' \n' b' \n' b' \n' b' \n' b' Seite \n' b' \n' b' 1\n' b' \n' b' / \n' b' \n' b' 1\n' b' \n' b' \n' b'\n' ) REL_HEADER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/header" REL_FOOTER = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footer" CT_HEADER = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml" CT_FOOTER = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml" def next_free_rel_id(rels_xml): text = rels_xml.read_text(encoding="utf-8") ids = [int(m.group(1)) for m in re.finditer(r'Id="rId(\d+)"', text)] return (max(ids) + 1) if ids else 1 def add_relationship(rels_xml, rid, rtype, target): text = rels_xml.read_text(encoding="utf-8") new_rel = f'' if new_rel in text: return text = text.replace("", new_rel + "") rels_xml.write_text(text, encoding="utf-8") def add_content_type_override(ct_xml, part_name, ct): text = ct_xml.read_text(encoding="utf-8") new_override = f'' if part_name in text: return text = text.replace("", new_override + "") ct_xml.write_text(text, encoding="utf-8") def update_sectpr_with_headers(document_xml, header_default_rid, header_first_rid, footer_default_rid): text = document_xml.read_text(encoding="utf-8") new_sectpr = ( f'' f'' f'' f'' f'' f'' f'' f'' f'' ) new_text, n = re.subn( r'|.*?', new_sectpr, text, flags=re.DOTALL, ) if n == 0: new_text = text.replace("", new_sectpr + "") document_xml.write_text(new_text, encoding="utf-8") log(f" document.xml sectPr: pgSz/pgMar (A4, 2.2/2.5cm Raender), Header" f" default+first, Footer default+first auf gleicher rId, titlePg") def add_header_footer(unpacked): word_dir = unpacked / "word" rels_xml = word_dir / "_rels" / "document.xml.rels" ct_xml = unpacked / "[Content_Types].xml" doc_xml = word_dir / "document.xml" write_xml_bytes(header_default_xml(), word_dir / "header1.xml") write_xml_bytes(header_first_blank_xml(), word_dir / "header2.xml") write_xml_bytes(footer_default_xml(), word_dir / "footer1.xml") log(" word/header1.xml (default), header2.xml (first blank)," " footer1.xml geschrieben") next_id = next_free_rel_id(rels_xml) rid_h_def, rid_h_first, rid_f_def = (f"rId{next_id+i}" for i in range(3)) add_relationship(rels_xml, rid_h_def, REL_HEADER, "header1.xml") add_relationship(rels_xml, rid_h_first, REL_HEADER, "header2.xml") add_relationship(rels_xml, rid_f_def, REL_FOOTER, "footer1.xml") log(f" Beziehungen: {rid_h_def}=header1, {rid_h_first}=header2," f" {rid_f_def}=footer1") add_content_type_override(ct_xml, "/word/header1.xml", CT_HEADER) add_content_type_override(ct_xml, "/word/header2.xml", CT_HEADER) add_content_type_override(ct_xml, "/word/footer1.xml", CT_FOOTER) log(" [Content_Types].xml: Override-Eintraege fuer header1/2 und footer1") update_sectpr_with_headers(doc_xml, rid_h_def, rid_h_first, rid_f_def) def main(): log(f"Ziel: {OUTPUT_FILE}") TEMPLATES_DIR.mkdir(parents=True, exist_ok=True) with tempfile.TemporaryDirectory(prefix="refdocx-") as tmp: tmp_dir = Path(tmp) default_docx = tmp_dir / "pandoc-default.docx" unpacked = tmp_dir / "unpacked" fetch_pandoc_default(default_docx) unpacked.mkdir() unpack_docx(default_docx, unpacked) theme_xml = unpacked / "word" / "theme" / "theme1.xml" styles_xml = unpacked / "word" / "styles.xml" log("Anpassung: Theme major+minor auf Calibri") set_theme_fonts_to_calibri(theme_xml) log("Anpassung: Direkte Font-Referenzen in styles.xml -> Calibri") replace_direct_fonts_in_styles(styles_xml) log("Anpassung: Tabellen-Default ohne Rahmen") set_table_borders_none(styles_xml) log("Anpassung: Body-Schriftgroesse 11 pt (DocDefault)") set_default_body_size(styles_xml) log("Anpassung: Heading-Schriftgroessen 15/13/12 pt") set_heading_sizes(styles_xml) log("Anpassung: Widow/Orphan-Control im DocDefault (B3)") set_widow_control_default(styles_xml) log("Anpassung: keepNext + keepLines auf Heading 1/2/3 + FirstParagraph (B3)") set_keep_next_styles(styles_xml) log("Anpassung: Heading 1/2/3 in destengsblue (B4)") set_heading_colors(styles_xml) log("Anpassung: Heading 1+2 fett (B4-Bold, S10)") set_heading_bold(styles_xml) log("Anpassung: Header und Footer einbauen (B2)") add_header_footer(unpacked) log("Repack als reference.docx") repack_docx(unpacked, OUTPUT_FILE) log(f" -> {OUTPUT_FILE} ({OUTPUT_FILE.stat().st_size} Bytes)") log("Fertig.") return 0 if __name__ == "__main__": sys.exit(main())