diff --git a/.github/workflows/build-pdf.yaml b/.github/workflows/build-pdf.yaml new file mode 100644 index 00000000..3d4e1bff --- /dev/null +++ b/.github/workflows/build-pdf.yaml @@ -0,0 +1,143 @@ +name: Build and Release PDF + +on: + push: + tags: + - 'v*' + - 'release*' + workflow_dispatch: + +permissions: + contents: write + +jobs: + build-pdf: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + run: | + # Install pandoc + wget -q https://github.com/jgm/pandoc/releases/download/3.1.11/pandoc-3.1.11-1-amd64.deb + sudo dpkg -i pandoc-3.1.11-1-amd64.deb + rm pandoc-3.1.11-1-amd64.deb + + # Install TeX Live with XeLaTeX and Chinese support + sudo apt-get update + sudo apt-get install -y \ + texlive-xetex \ + texlive-lang-chinese \ + texlive-fonts-recommended \ + texlive-fonts-extra \ + fonts-noto-cjk \ + fonts-noto-cjk-extra + + # Verify installations + pandoc --version + xelatex --version + + - name: Build PDF + run: | + chmod +x bin/pdf bin/preprocess-epub.py + mkdir -p output output/temp + + # Preprocess Markdown files + python3 bin/preprocess-epub.py content/zh output/temp + + # Generate PDF with CI-specific fonts (Noto CJK) + pandoc -o output/ddia.pdf \ + --metadata-file=metadata.yaml \ + -H bin/header-ci.tex \ + --toc \ + --toc-depth=2 \ + --top-level-division=chapter \ + --file-scope=true \ + --pdf-engine=xelatex \ + -V geometry:margin=1in \ + -V linestretch=1.5 \ + output/temp/_index.md \ + output/temp/preface.md \ + output/temp/part-i.md \ + output/temp/ch1.md \ + output/temp/ch2.md \ + output/temp/ch3.md \ + output/temp/ch4.md \ + output/temp/part-ii.md \ + output/temp/ch5.md \ + output/temp/ch6.md \ + output/temp/ch7.md \ + output/temp/ch8.md \ + output/temp/ch9.md \ + output/temp/part-iii.md \ + output/temp/ch10.md \ + output/temp/ch11.md \ + output/temp/ch12.md \ + output/temp/ch13.md \ + output/temp/ch14.md \ + output/temp/colophon.md \ + output/temp/glossary.md + + rm -rf output/temp + + if [ ! -f "output/ddia.pdf" ]; then + echo "Error: PDF file was not created" + exit 1 + fi + + ls -lh output/ddia.pdf + file output/ddia.pdf + + - name: Upload PDF artifact + uses: actions/upload-artifact@v4 + with: + name: ddia-pdf + path: output/ddia.pdf + retention-days: 30 + + - name: Create/Update Release + if: startsWith(github.ref, 'refs/tags/') + uses: softprops/action-gh-release@v1 + with: + files: output/ddia.pdf + name: ${{ github.ref_name }} + body: | + ## 《设计数据密集型应用》PDF 版本 + + 此版本为自动生成的 PDF 电子书。 + + ### 文件信息 + - 文件名: `ddia.pdf` + - 生成时间: ${{ github.event.head_commit.timestamp }} + - 标签: ${{ github.ref_name }} + + ### 依赖工具 + - Pandoc 3.1.11 + - XeLaTeX (TeX Live) + - 中文字体: Noto CJK + + ### 本地生成 + 如需本地生成 PDF,请确保安装以下依赖: + ```bash + # macOS + brew install pandoc + brew install --cask mactex + + # Ubuntu/Debian + sudo apt install pandoc texlive-xetex texlive-lang-chinese + + # 生成 PDF + make pdf + ``` + draft: false + prerelease: false + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.gitignore b/.gitignore index 7446723e..938f15f0 100644 --- a/.gitignore +++ b/.gitignore @@ -10,4 +10,7 @@ public/ CLAUDE.md content/cn/ zh.md -en.md \ No newline at end of file +en.md +.venv +AGENTS.md + diff --git a/Makefile b/Makefile index e0c85d22..148a9c46 100644 --- a/Makefile +++ b/Makefile @@ -17,4 +17,7 @@ translate: epub: bin/epub -.PHONY: default doc translate +pdf: + bin/pdf + +.PHONY: default doc translate epub pdf diff --git a/README.md b/README.md index 413e6976..26d07b9c 100644 --- a/README.md +++ b/README.md @@ -12,6 +12,19 @@ **阅读**:访问 [https://ddia.vonng.com](https://ddia.vonng.com) 阅读本书在线版本,或使用 [hugo](https://gohugo.io/documentation/) / [hextra](https://imfing.github.io/hextra/zh-cn/) 主题自行构建。 +**下载**:可以使用以下命令生成 PDF 电子书: + +```bash +# 安装依赖 +brew install pandoc +brew install poppler # 用于 pdftotext + +# 生成 PDF +make pdf +``` + +生成的 PDF 文件位于 `output/ddia.pdf` + > [!NOTE] > [**DDIA 第二版**](https://ddia.vonng.com) 正在翻译中(翻译至至第十章),欢迎阅览并提出您的宝贵意见![点击此处阅览第一版](https://ddia.vonng.com/v1)。 diff --git a/bin/header-ci.tex b/bin/header-ci.tex new file mode 100644 index 00000000..3988b71a --- /dev/null +++ b/bin/header-ci.tex @@ -0,0 +1,31 @@ +% Chinese support with xeCJK +\usepackage{xeCJK} +\setCJKmainfont{Noto Serif CJK SC} +\setCJKsansfont{Noto Sans CJK SC} + +% Enable Chinese line breaking +\XeTeXlinebreaklocale "zh" + +% Paragraph settings +\usepackage{parskip} +\setlength{\parindent}{2em} +\usepackage{ragged2e} + +% Chinese punctuation style +\punctstyle{quanjiao} + +% Cover page +\AtBeginDocument{% + \thispagestyle{empty} + \begin{center} + \vspace*{0.4\textheight} + {\Huge\bfseries 设计数据密集型应用} + \vspace{1cm} + {\LARGE 第二版} + \vspace{2cm} + {\Large Martin Kleppmann} + \vspace{0.5cm} + {\large 冯若航 译} + \end{center} + \clearpage +} diff --git a/bin/header.tex b/bin/header.tex new file mode 100644 index 00000000..c243e2b7 --- /dev/null +++ b/bin/header.tex @@ -0,0 +1,31 @@ +% Chinese support with xeCJK +\usepackage{xeCJK} +\setCJKmainfont{PingFang SC} +\setCJKsansfont{Heiti SC} + +% Enable Chinese line breaking +\XeTeXlinebreaklocale "zh" + +% Paragraph settings +\usepackage{parskip} +\setlength{\parindent}{2em} +\usepackage{ragged2e} + +% Chinese punctuation style +\punctstyle{quanjiao} + +% Cover page +\AtBeginDocument{% + \thispagestyle{empty} + \begin{center} + \vspace*{0.4\textheight} + {\Huge\bfseries 设计数据密集型应用} + \vspace{1cm} + {\LARGE 第二版} + \vspace{2cm} + {\Large Martin Kleppmann} + \vspace{0.5cm} + {\large 冯若航 译} + \end{center} + \clearpage +} diff --git a/bin/pdf b/bin/pdf new file mode 100755 index 00000000..37575436 --- /dev/null +++ b/bin/pdf @@ -0,0 +1,108 @@ +#!/usr/bin/env bash + +set -e + +# Check for required dependencies +check_dependencies() { + local missing_deps=() + + if ! command -v pandoc &> /dev/null; then + missing_deps+=("pandoc") + fi + + if ! command -v xelatex &> /dev/null; then + # Try lualatex as fallback + if ! command -v lualatex &> /dev/null; then + missing_deps+=("xelatex or lualatex (LaTeX engine)") + fi + fi + + if [ ${#missing_deps[@]} -ne 0 ]; then + echo "Error: Missing required dependencies:" + for dep in "${missing_deps[@]}"; do + echo " - $dep" + done + echo "" + echo "Installation:" + echo " macOS: brew install pandoc" + echo " macOS: brew install --cask mactex" + echo "" + echo " Linux: apt install pandoc texlive-xetex" + exit 1 + fi +} + +check_dependencies + +# Detect available PDF engine +if command -v xelatex &> /dev/null; then + PDF_ENGINE="xelatex" +elif command -v lualatex &> /dev/null; then + PDF_ENGINE="lualatex" +else + echo "Error: No suitable PDF engine found" + exit 1 +fi + +SCRIPT_DIR=$(dirname "$0") +INPUT_DIR=$(cd "$(dirname "$SCRIPT_DIR")" && pwd) +OUTPUT_DIR="$INPUT_DIR/output" +TEMP_DIR="$OUTPUT_DIR/temp" + +# Create output directory if it doesn't exist +mkdir -p "$OUTPUT_DIR" +mkdir -p "$TEMP_DIR" + +# Preprocess Markdown files to convert Hugo shortcodes +echo "Preprocessing Markdown files..." +python3 "${SCRIPT_DIR}/preprocess-epub.py" "${INPUT_DIR}/content/zh" "$TEMP_DIR" + +convert_to_pdf() { + # Convert all Markdown files into a single PDF book + OUTPUT_BOOK="$OUTPUT_DIR/ddia.pdf" + rm -f "$OUTPUT_BOOK" + echo "Converting all Markdown files into $OUTPUT_BOOK..." + + local meta_file=${INPUT_DIR}/metadata.yaml + local header_file=${SCRIPT_DIR}/header.tex + + # Use xelatex for Chinese support with custom header + pandoc -o "$OUTPUT_BOOK" \ + --metadata-file="$meta_file" \ + -H "$header_file" \ + --toc \ + --toc-depth=2 \ + --top-level-division=chapter \ + --file-scope=true \ + --pdf-engine="$PDF_ENGINE" \ + -V geometry:margin=1in \ + -V linestretch=1.5 \ + "${TEMP_DIR}"/_index.md \ + "${TEMP_DIR}"/preface.md \ + "${TEMP_DIR}"/part-i.md \ + "${TEMP_DIR}"/ch1.md \ + "${TEMP_DIR}"/ch2.md \ + "${TEMP_DIR}"/ch3.md \ + "${TEMP_DIR}"/ch4.md \ + "${TEMP_DIR}"/part-ii.md \ + "${TEMP_DIR}"/ch5.md \ + "${TEMP_DIR}"/ch6.md \ + "${TEMP_DIR}"/ch7.md \ + "${TEMP_DIR}"/ch8.md \ + "${TEMP_DIR}"/ch9.md \ + "${TEMP_DIR}"/part-iii.md \ + "${TEMP_DIR}"/ch10.md \ + "${TEMP_DIR}"/ch11.md \ + "${TEMP_DIR}"/ch12.md \ + "${TEMP_DIR}"/ch13.md \ + "${TEMP_DIR}"/ch14.md \ + "${TEMP_DIR}"/colophon.md \ + "${TEMP_DIR}"/glossary.md + + echo "PDF book created at $OUTPUT_BOOK." +} + +convert_to_pdf + +# Clean up temporary files +rm -rf "$TEMP_DIR" diff --git a/bin/pdf.py b/bin/pdf.py new file mode 100755 index 00000000..8f59d220 --- /dev/null +++ b/bin/pdf.py @@ -0,0 +1,220 @@ +#!/usr/bin/env python3 +"""PDF generation from Markdown using pandoc + LaTeX.""" + +import os +import re +import sys +import subprocess +import argparse +from pathlib import Path +from typing import Optional, List, Dict +import shutil +import importlib.util + + +CHAPTER_ORDER = [ + "_index.md", + "preface.md", + "part-i.md", + "ch1.md", "ch2.md", "ch3.md", "ch4.md", + "part-ii.md", + "ch5.md", "ch6.md", "ch7.md", "ch8.md", "ch9.md", + "part-iii.md", + "ch10.md", "ch11.md", "ch12.md", "ch13.md", "ch14.md", + "colophon.md", "glossary.md", +] + +DEFAULT_FONTS = { + "mainfont": "PingFang SC", + "sansfont": "Heiti SC", +} + +YAML_FRONT_RE = re.compile(r'^---\n(.*?)\n---\n', re.DOTALL) +TITLE_RE = re.compile(r'^title:\s*["\']?([^"\'\n]+)["\']?\s*$', re.MULTILINE) +CHAPTER_NUM_RE = re.compile(r'^\d+\.\s*') +CHAPTER_FILE_RE = re.compile(r'^ch(\d+)\.md$') +CALLOUT_RE = re.compile(r'^> \[!(NOTE|TIP|WARNING|CAUTION|DANGER)\] ', re.MULTILINE) + + +def convert_pdf_markdown(text: str, filename: str) -> str: + """PDF-specific markdown conversions.""" + text = _convert_callouts(text) + text = _add_title_heading(text, filename) + return text + + +def _convert_callouts(text: str) -> str: + """Convert [!NOTE], [!TIP], etc. to Chinese.""" + def replace_callout(match): + callout_type = match.group(1).lower() + title_map = { + 'note': '注', + 'tip': '提示', + 'warning': '警告', + 'caution': '注意', + 'danger': '危险' + } + return f"**{title_map.get(callout_type, callout_type)}**: " + + text = CALLOUT_RE.sub(replace_callout, text) + text = re.sub(r'^> ?', '', text, flags=re.MULTILINE) + return text + + +def _add_title_heading(text: str, filename: str) -> str: + """Add title heading from YAML frontmatter.""" + match = YAML_FRONT_RE.match(text) + if match: + frontmatter = match.group(1) + title_match = TITLE_RE.search(frontmatter) + if title_match: + title = title_match.group(1) + body = text[match.end():] + clean_title = CHAPTER_NUM_RE.sub('', title) + + if CHAPTER_FILE_RE.match(filename): + heading = f"# {clean_title}" + else: + heading = f"## {clean_title}" + + return f"---\n{frontmatter}\n---\n\n{heading}\n\n{body}" + return text + + +def check_cmd(cmd: str) -> bool: + return subprocess.run(["which", cmd], capture_output=True).returncode == 0 + + +def get_available_engine() -> Optional[str]: + if check_cmd("xelatex"): + return "xelatex" + if check_cmd("lualatex"): + return "lualatex" + return None + + +def check_dependencies() -> List[str]: + missing = [] + if not check_cmd("pandoc"): + missing.append("pandoc") + if not get_available_engine(): + missing.append("xelatex or lualatex (LaTeX engine)") + return missing + + +def preprocess_markdown(input_dir: Path, output_dir: Path) -> None: + script_dir = Path(__file__).parent + preprocess_script = script_dir / "preprocess-epub.py" + spec = importlib.util.spec_from_file_location("preprocess_epub", preprocess_script) + if spec is None: + raise RuntimeError("Failed to load preprocess module") + module = importlib.util.module_from_spec(spec) + if spec.loader is None: + raise RuntimeError("Failed to load preprocess module loader") + spec.loader.exec_module(module) + + output_dir.mkdir(parents=True, exist_ok=True) + md_files = sorted(input_dir.glob("*.md")) + + print(f"Preprocessing {len(md_files)} files...") + for md_file in md_files: + temp_output = output_dir / "tmp_preprocess.md" + module.process_file(str(md_file), str(temp_output)) + + with open(temp_output, 'r', encoding='utf-8') as f: + content = f.read() + + content = convert_pdf_markdown(content, md_file.name) + + with open(output_dir / md_file.name, 'w', encoding='utf-8') as f: + f.write(content) + + temp_output.unlink() + + +def generate_pdf( + temp_dir: Path, + output_file: Path, + metadata_file: Optional[str], + engine: str, + fonts: Dict[str, str], + margin: str = "1in", +) -> None: + chapters = [str(temp_dir / ch) for ch in CHAPTER_ORDER if (temp_dir / ch).exists()] + + if not chapters: + raise ValueError("No valid chapter files found") + + script_dir = Path(__file__).parent + header_file = script_dir / "header.tex" + + cmd = [ + "pandoc", "-o", str(output_file), + "--metadata-file", metadata_file or "", + "-H", str(header_file), + "--toc", + "--toc-depth=2", + "--top-level-division=chapter", + "--file-scope", + f"--pdf-engine={engine}", + f"-V geometry:margin={margin}", + "-V linestretch=1.5", + "-V book=true", + "-V classoption=openany", + "-V mainfont=PingFang SC", + ] + cmd = [c for c in cmd if c] + cmd.extend(chapters) + + print(f"Generating PDF with {engine}...") + result = subprocess.run(cmd, capture_output=True, text=True) + + if result.returncode != 0: + raise RuntimeError(f"PDF generation failed: {result.stderr}") + + +def main(): + parser = argparse.ArgumentParser(description="Generate PDF from Markdown") + parser.add_argument("-i", "--input", default="content/zh", help="Input directory") + parser.add_argument("-o", "--output", default="output", help="Output directory") + parser.add_argument("-m", "--metadata", help="Metadata YAML file") + parser.add_argument("-e", "--engine", choices=["xelatex", "lualatex"], help="PDF engine") + parser.add_argument("--no-cleanup", action="store_true", help="Keep temp files") + args = parser.parse_args() + + project_root = Path(__file__).parent.parent + input_dir = project_root / args.input + output_dir = project_root / args.output + temp_dir = output_dir / "temp" + + missing = check_dependencies() + if missing: + print("Error: Missing dependencies:") + for dep in missing: + print(f" - {dep}") + print("\nInstall: brew install pandoc && brew install --cask mactex") + sys.exit(1) + + detected_engine = get_available_engine() + if detected_engine is None: + print("Error: No PDF engine available") + sys.exit(1) + + engine = args.engine or detected_engine + metadata = args.metadata or str(project_root / "metadata.yaml") + + output_dir.mkdir(parents=True, exist_ok=True) + output_file = output_dir / "ddia.pdf" + output_file.unlink(missing_ok=True) + + preprocess_markdown(input_dir, temp_dir) + generate_pdf(temp_dir, output_file, metadata, engine, DEFAULT_FONTS) + + if not args.no_cleanup and temp_dir.exists(): + shutil.rmtree(temp_dir) + + print(f"PDF created: {output_file}") + + +if __name__ == "__main__": + main()