feat: integra pipeline PDF→Markdown a 9 stadi e test suite
Porta da main la riscrittura completa di conversione/_pipeline/ (9 stadi PyMuPDF) e la suite tests/ senza modificare chunks/, step-8/, rag.py, ollama/, retrieve.py, config.py. requirements.txt: aggiunge PyMuPDF>=1.24.0 e pytest>=8.0, mantiene chromadb, rimuove opendataloader-pdf e pymupdf4llm. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -0,0 +1,47 @@
|
||||
"""Test dataclass Block, Section, FontProfile."""
|
||||
from conversione._pipeline.models import Block, Section, FontProfile
|
||||
|
||||
|
||||
def test_block_creation():
|
||||
b = Block(
|
||||
text="Titolo", page=1,
|
||||
bbox=(0, 0, 100, 14),
|
||||
font_size=16.0, font_name="Arial-Bold",
|
||||
is_bold=True,
|
||||
)
|
||||
assert b.text == "Titolo"
|
||||
assert b.is_bold
|
||||
assert b.block_type == "paragraph"
|
||||
assert b.level == 0
|
||||
assert b.x0 == 0.0
|
||||
assert b.y1 == 14.0
|
||||
|
||||
|
||||
def test_block_properties():
|
||||
b = Block("x", 1, (10.0, 20.0, 110.0, 34.0), 12.0, "Helvetica", False)
|
||||
assert b.x0 == 10.0
|
||||
assert b.y0 == 20.0
|
||||
assert b.x1 == 110.0
|
||||
assert b.y1 == 34.0
|
||||
|
||||
|
||||
def test_section_defaults():
|
||||
s = Section(title="Intro", level=1)
|
||||
assert s.content == []
|
||||
assert s.children == []
|
||||
assert s.page_start == 0
|
||||
|
||||
|
||||
def test_section_nesting():
|
||||
parent = Section("Parent", level=1)
|
||||
child = Section("Child", level=2)
|
||||
parent.children.append(child)
|
||||
assert len(parent.children) == 1
|
||||
assert parent.children[0].title == "Child"
|
||||
|
||||
|
||||
def test_font_profile():
|
||||
fp = FontProfile(body_size=11.0, cluster_map={18.0: 1, 15.0: 2}, header_sizes=[18.0, 15.0])
|
||||
assert fp.body_size == 11.0
|
||||
assert fp.cluster_map[18.0] == 1
|
||||
assert len(fp.header_sizes) == 2
|
||||
Reference in New Issue
Block a user