e1b5298b20
Porta da branch marker la riscrittura completa di conversione/_pipeline/ (9 stadi PyMuPDF) e la suite tests/ senza modificare il resto del progetto RAG (ollama/, step-5/, step-6/, step-8/, rag.py, retrieve.py, config.py). requirements.txt: aggiunge PyMuPDF>=1.24.0 e pytest>=8.0, mantiene chromadb, rimuove opendataloader-pdf e pymupdf4llm. Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
48 lines
1.2 KiB
Python
48 lines
1.2 KiB
Python
"""Test dataclass Block, Section, FontProfile."""
|
|
from conversione._pipeline.models import Block, Section, FontProfile
|
|
|
|
|
|
def test_block_creation():
|
|
b = Block(
|
|
text="Titolo", page=1,
|
|
bbox=(0, 0, 100, 14),
|
|
font_size=16.0, font_name="Arial-Bold",
|
|
is_bold=True,
|
|
)
|
|
assert b.text == "Titolo"
|
|
assert b.is_bold
|
|
assert b.block_type == "paragraph"
|
|
assert b.level == 0
|
|
assert b.x0 == 0.0
|
|
assert b.y1 == 14.0
|
|
|
|
|
|
def test_block_properties():
|
|
b = Block("x", 1, (10.0, 20.0, 110.0, 34.0), 12.0, "Helvetica", False)
|
|
assert b.x0 == 10.0
|
|
assert b.y0 == 20.0
|
|
assert b.x1 == 110.0
|
|
assert b.y1 == 34.0
|
|
|
|
|
|
def test_section_defaults():
|
|
s = Section(title="Intro", level=1)
|
|
assert s.content == []
|
|
assert s.children == []
|
|
assert s.page_start == 0
|
|
|
|
|
|
def test_section_nesting():
|
|
parent = Section("Parent", level=1)
|
|
child = Section("Child", level=2)
|
|
parent.children.append(child)
|
|
assert len(parent.children) == 1
|
|
assert parent.children[0].title == "Child"
|
|
|
|
|
|
def test_font_profile():
|
|
fp = FontProfile(body_size=11.0, cluster_map={18.0: 1, 15.0: 2}, header_sizes=[18.0, 15.0])
|
|
assert fp.body_size == 11.0
|
|
assert fp.cluster_map[18.0] == 1
|
|
assert len(fp.header_sizes) == 2
|