#!/usr/bin/env python3 """ Convert Markdown documentation to Word documents (.docx) With proper code block formatting (shaded boxes) """ import re import os from docx import Document from docx.shared import Inches, Pt, RGBColor, Twips from docx.enum.text import WD_ALIGN_PARAGRAPH from docx.enum.style import WD_STYLE_TYPE from docx.enum.table import WD_TABLE_ALIGNMENT from docx.oxml.ns import qn, nsmap from docx.oxml import OxmlElement def set_cell_shading(cell, color="E8E8E8"): """Set cell background shading color.""" tc = cell._tc tcPr = tc.get_or_add_tcPr() shd = OxmlElement('w:shd') shd.set(qn('w:fill'), color) shd.set(qn('w:val'), 'clear') tcPr.append(shd) def set_cell_borders(cell, color="CCCCCC"): """Set cell border color.""" tc = cell._tc tcPr = tc.get_or_add_tcPr() tcBorders = OxmlElement('w:tcBorders') for border_name in ['top', 'left', 'bottom', 'right']: border = OxmlElement(f'w:{border_name}') border.set(qn('w:val'), 'single') border.set(qn('w:sz'), '4') border.set(qn('w:color'), color) tcBorders.append(border) tcPr.append(tcBorders) def add_code_block(doc, code_text, language=""): """Add a formatted code block with shading.""" # Create a single-cell table for the code block table = doc.add_table(rows=1, cols=1) table.autofit = True cell = table.rows[0].cells[0] # Set cell shading (light gray background) set_cell_shading(cell, "F5F5F5") set_cell_borders(cell, "DDDDDD") # Clear default paragraph and add code cell.paragraphs[0].clear() # Add each line of code lines = code_text.split('\n') for i, line in enumerate(lines): if i == 0: para = cell.paragraphs[0] else: para = cell.add_paragraph() para.paragraph_format.space_before = Pt(0) para.paragraph_format.space_after = Pt(0) para.paragraph_format.line_spacing = 1.0 run = para.add_run(line if line else ' ') # Use space for empty lines run.font.name = 'Consolas' run.font.size = Pt(9) run.font.color.rgb = RGBColor(0, 0, 0) # Add spacing after the code block doc.add_paragraph() def parse_markdown(md_content): """Parse markdown content into structured elements.""" lines = md_content.split('\n') elements = [] i = 0 while i < len(lines): line = lines[i] # Skip empty lines if not line.strip(): i += 1 continue # Headers if line.startswith('# '): elements.append(('h1', line[2:].strip())) i += 1 elif line.startswith('## '): elements.append(('h2', line[3:].strip())) i += 1 elif line.startswith('### '): elements.append(('h3', line[4:].strip())) i += 1 elif line.startswith('#### '): elements.append(('h4', line[5:].strip())) i += 1 # Horizontal rule elif line.strip() == '---': elements.append(('hr', '')) i += 1 # Code blocks elif line.strip().startswith('```'): code_lang = line.strip()[3:] code_lines = [] i += 1 while i < len(lines) and not lines[i].strip().startswith('```'): code_lines.append(lines[i]) i += 1 # Store language info with code elements.append(('code', (code_lang, '\n'.join(code_lines)))) i += 1 # Skip closing ``` # Tables elif '|' in line and i + 1 < len(lines) and '---' in lines[i + 1]: table_lines = [line] i += 1 while i < len(lines) and '|' in lines[i]: table_lines.append(lines[i]) i += 1 elements.append(('table', table_lines)) # Bullet lists elif line.strip().startswith('- ') or line.strip().startswith('* '): list_items = [] while i < len(lines) and (lines[i].strip().startswith('- ') or lines[i].strip().startswith('* ') or (lines[i].startswith(' ') and lines[i].strip())): if lines[i].strip().startswith('- ') or lines[i].strip().startswith('* '): list_items.append(lines[i].strip()[2:]) elif lines[i].startswith(' ') and list_items: list_items[-1] += ' ' + lines[i].strip() i += 1 elements.append(('bullet', list_items)) # Numbered lists elif re.match(r'^\d+\.\s', line.strip()): list_items = [] while i < len(lines) and (re.match(r'^\d+\.\s', lines[i].strip()) or lines[i].startswith(' ')): if re.match(r'^\d+\.\s', lines[i].strip()): list_items.append(re.sub(r'^\d+\.\s', '', lines[i].strip())) elif lines[i].startswith(' ') and list_items: list_items[-1] += ' ' + lines[i].strip() i += 1 elements.append(('numbered', list_items)) # Regular paragraph else: para_lines = [line] i += 1 while i < len(lines) and lines[i].strip() and not lines[i].startswith('#') and not lines[i].startswith('```') and not lines[i].startswith('- ') and not lines[i].startswith('* ') and '|' not in lines[i] and not re.match(r'^\d+\.\s', lines[i].strip()): para_lines.append(lines[i]) i += 1 elements.append(('para', ' '.join(para_lines))) return elements def clean_text(text): """Remove markdown formatting from text.""" # Bold text = re.sub(r'\*\*([^*]+)\*\*', r'\1', text) # Italic text = re.sub(r'\*([^*]+)\*', r'\1', text) # Code text = re.sub(r'`([^`]+)`', r'\1', text) # Links text = re.sub(r'\[([^\]]+)\]\([^)]+\)', r'\1', text) return text def add_formatted_text(paragraph, text): """Add text with basic formatting to a paragraph.""" # Split by formatting markers and add runs parts = re.split(r'(\*\*[^*]+\*\*|`[^`]+`|\[[^\]]+\]\([^)]+\))', text) for part in parts: if not part: continue if part.startswith('**') and part.endswith('**'): run = paragraph.add_run(part[2:-2]) run.bold = True elif part.startswith('`') and part.endswith('`'): run = paragraph.add_run(part[1:-1]) run.font.name = 'Consolas' run.font.size = Pt(9) # Add light background for inline code run.font.highlight_color = 15 # Light gray (WD_COLOR_INDEX.GRAY_25) elif part.startswith('[') and '](' in part: match = re.match(r'\[([^\]]+)\]\(([^)]+)\)', part) if match: run = paragraph.add_run(match.group(1)) run.font.color.rgb = RGBColor(0, 0, 255) run.underline = True else: paragraph.add_run(part) def convert_md_to_docx(md_file, docx_file): """Convert a markdown file to a Word document.""" print(f"Converting {md_file} to {docx_file}...") with open(md_file, 'r', encoding='utf-8') as f: content = f.read() elements = parse_markdown(content) doc = Document() # Set default font style = doc.styles['Normal'] style.font.name = 'Calibri' style.font.size = Pt(11) for elem_type, elem_content in elements: if elem_type == 'h1': p = doc.add_heading(clean_text(elem_content), level=0) p.alignment = WD_ALIGN_PARAGRAPH.CENTER elif elem_type == 'h2': doc.add_heading(clean_text(elem_content), level=1) elif elem_type == 'h3': doc.add_heading(clean_text(elem_content), level=2) elif elem_type == 'h4': doc.add_heading(clean_text(elem_content), level=3) elif elem_type == 'hr': p = doc.add_paragraph() p.add_run('─' * 70) p.alignment = WD_ALIGN_PARAGRAPH.CENTER elif elem_type == 'para': p = doc.add_paragraph() add_formatted_text(p, elem_content) elif elem_type == 'code': code_lang, code_text = elem_content add_code_block(doc, code_text, code_lang) elif elem_type == 'bullet': for item in elem_content: p = doc.add_paragraph(style='List Bullet') add_formatted_text(p, item) elif elem_type == 'numbered': for item in elem_content: p = doc.add_paragraph(style='List Number') add_formatted_text(p, item) elif elem_type == 'table': # Parse table rows = [] for line in elem_content: if '---' in line: continue cells = [c.strip() for c in line.split('|')[1:-1]] if cells: rows.append(cells) if rows: num_cols = len(rows[0]) table = doc.add_table(rows=len(rows), cols=num_cols) table.style = 'Table Grid' table.alignment = WD_TABLE_ALIGNMENT.CENTER for i, row in enumerate(rows): for j, cell in enumerate(row): if j < num_cols: table.rows[i].cells[j].text = clean_text(cell) # Bold and shade header row if i == 0: set_cell_shading(table.rows[i].cells[j], "E0E0E0") for para in table.rows[i].cells[j].paragraphs: for run in para.runs: run.bold = True # Add spacing after table doc.add_paragraph() doc.save(docx_file) print(f" Created: {docx_file}") def main(): docs_dir = '/home/camp/projects/powershell/docs' md_files = [ 'Update-ShopfloorPCs-Remote.md', 'Invoke-RemoteMaintenance.md', 'Update-PC-CompleteAsset.md', 'DATA_COLLECTION_PARITY.md' ] for md_file in md_files: md_path = os.path.join(docs_dir, md_file) docx_path = os.path.join(docs_dir, md_file.replace('.md', '.docx')) if os.path.exists(md_path): convert_md_to_docx(md_path, docx_path) else: print(f"Warning: {md_path} not found") print("\nConversion complete!") print(f"Word documents saved to: {docs_dir}") if __name__ == '__main__': main()