Convert PDF to Markdown Locally on macOS
Today I needed to convert a PDF to Markdown on macOS. I tried CLIs I could install with brew, but none of them gave me decent output for structure and formatting. marker-pdf was the first one that consistently gave me clean Markdown, even though it required more work to set up. I used this convert.py script: #!/usr/bin/env python3 import subprocess from pathlib import Path def convert_pdf_to_markdown(pdf_file: str) -> None: pdf_path = Path(pdf_file).expanduser().resolve() if not pdf_path.exists(): raise FileNotFoundError(f"PDF not found: {pdf_path}") root_dir = pdf_path.parent venv_dir = root_dir / ".venv" output_dir = root_dir / "output" python_bin = venv_dir / "bin" / "python" pip_bin = venv_dir / "bin" / "pip" marker_bin = venv_dir / "bin" / "marker_single" def run(cmd: list[str]) -> None: print("+", " ".join(str(c) for c in cmd)) subprocess.run(cmd, check=True) if not python_bin.exists(): run(["python3", "-m", "venv", str(venv_dir)]) if not marker_bin.exists(): run([str(pip_bin), "install", "marker-pdf"]) output_dir.mkdir(parents=True, exist_ok=True) run( [ str(marker_bin), str(pdf_path), "--output_format", "markdown", "--output_dir", str(output_dir), ] ) if __name__ == "__main__": convert_pdf_to_markdown("path/to/the/pdf/you/want/to/convert.pdf") Then run: ...