ebd2a43f84
Porta da main la riscrittura completa di conversione/_pipeline/ (9 stadi PyMuPDF) e la suite tests/ senza modificare chunks/, step-8/, rag.py, ollama/, retrieve.py, config.py. requirements.txt: aggiunge PyMuPDF>=1.24.0 e pytest>=8.0, mantiene chromadb, rimuove opendataloader-pdf e pymupdf4llm. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
31 lines
1.1 KiB
Python
31 lines
1.1 KiB
Python
from .extract import validate_pdf, extract_metadata
|
|
from .structure import analyze
|
|
from .report import build_report
|
|
from .runner import run
|
|
from .validator import validate
|
|
from .models import Block, Section, FontProfile
|
|
from .stage1_metadata import extract_raw_data
|
|
from .stage2_layout import analyze_layout
|
|
from .stage3_font import build_font_profile
|
|
from .stage4_headers import classify_blocks
|
|
from .stage5_hierarchy import infer_hierarchy
|
|
from .stage6_tree import build_tree
|
|
from .stage7_markdown import serialize_tree
|
|
from .stage8_normalize import normalize_hierarchy
|
|
from .stage9_validate import validate_markdown, ValidationResult
|
|
|
|
__all__ = [
|
|
"validate_pdf", "extract_metadata",
|
|
"analyze", "build_report", "run", "validate",
|
|
"Block", "Section", "FontProfile",
|
|
"extract_raw_data",
|
|
"analyze_layout",
|
|
"build_font_profile",
|
|
"classify_blocks",
|
|
"infer_hierarchy",
|
|
"build_tree",
|
|
"serialize_tree",
|
|
"normalize_hierarchy",
|
|
"validate_markdown", "ValidationResult",
|
|
]
|