Files
rag-from-scratch/conversione/_pipeline/__init__.py
T
davide e1b5298b20 feat: integra pipeline PDF→Markdown a 9 stadi e test suite
Porta da branch marker la riscrittura completa di conversione/_pipeline/
(9 stadi PyMuPDF) e la suite tests/ senza modificare il resto del progetto
RAG (ollama/, step-5/, step-6/, step-8/, rag.py, retrieve.py, config.py).

requirements.txt: aggiunge PyMuPDF>=1.24.0 e pytest>=8.0, mantiene chromadb,
rimuove opendataloader-pdf e pymupdf4llm.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-11 14:44:16 +02:00

31 lines
1.1 KiB
Python

from .extract import validate_pdf, extract_metadata
from .structure import analyze
from .report import build_report
from .runner import run
from .validator import validate
from .models import Block, Section, FontProfile
from .stage1_metadata import extract_raw_data
from .stage2_layout import analyze_layout
from .stage3_font import build_font_profile
from .stage4_headers import classify_blocks
from .stage5_hierarchy import infer_hierarchy
from .stage6_tree import build_tree
from .stage7_markdown import serialize_tree
from .stage8_normalize import normalize_hierarchy
from .stage9_validate import validate_markdown, ValidationResult
__all__ = [
"validate_pdf", "extract_metadata",
"analyze", "build_report", "run", "validate",
"Block", "Section", "FontProfile",
"extract_raw_data",
"analyze_layout",
"build_font_profile",
"classify_blocks",
"infer_hierarchy",
"build_tree",
"serialize_tree",
"normalize_hierarchy",
"validate_markdown", "ValidationResult",
]