Today I needed to convert a PDF to Markdown on macOS. I tried CLIs I could install with brew, but none of them gave me decent output for structure and formatting. marker-pdf was the first one that consistently gave me clean Markdown, even though it required more work to set up.

I used this convert.py script:

#!/usr/bin/env python3
import subprocess
from pathlib import Path

def convert_pdf_to_markdown(pdf_file: str) -> None:
    pdf_path = Path(pdf_file).expanduser().resolve()
    if not pdf_path.exists():
        raise FileNotFoundError(f"PDF not found: {pdf_path}")

    root_dir = pdf_path.parent
    venv_dir = root_dir / ".venv"
    output_dir = root_dir / "output"
    python_bin = venv_dir / "bin" / "python"
    pip_bin = venv_dir / "bin" / "pip"
    marker_bin = venv_dir / "bin" / "marker_single"

    def run(cmd: list[str]) -> None:
        print("+", " ".join(str(c) for c in cmd))
        subprocess.run(cmd, check=True)

    if not python_bin.exists():
        run(["python3", "-m", "venv", str(venv_dir)])

    if not marker_bin.exists():
        run([str(pip_bin), "install", "marker-pdf"])

    output_dir.mkdir(parents=True, exist_ok=True)
    run(
        [
            str(marker_bin),
            str(pdf_path),
            "--output_format",
            "markdown",
            "--output_dir",
            str(output_dir),
        ]
    )


if __name__ == "__main__":
    convert_pdf_to_markdown("path/to/the/pdf/you/want/to/convert.pdf")

Then run:

cd path/to/the/folder/with/convert.py
chmod +x convert.py
./convert.py

Output goes to:

path/to/the/pdf/folder/output/<pdf-name>/<pdf-name>.md

First run will be slower because it creates .venv, installs marker-pdf, and downloads model files.