"""Strutture dati intermedie della pipeline: Block, Section, FontProfile.""" from __future__ import annotations from dataclasses import dataclass, field @dataclass class Block: text: str page: int bbox: tuple[float, float, float, float] # x0, y0, x1, y1 font_size: float font_name: str is_bold: bool block_type: str = "paragraph" # paragraph|header_candidate|list_item|table|ignore space_before: float = 0.0 level: int = 0 # assegnato da stage5 (0 = non header) origin_spans: list[dict] = field(default_factory=list, repr=False) @property def x0(self) -> float: return self.bbox[0] @property def y0(self) -> float: return self.bbox[1] @property def x1(self) -> float: return self.bbox[2] @property def y1(self) -> float: return self.bbox[3] @dataclass class Section: title: str level: int # 1, 2, 3 content: list[Block] = field(default_factory=list) children: list[Section] = field(default_factory=list) page_start: int = 0 source_block: Block | None = field(default=None, repr=False) @dataclass class FontProfile: body_size: float cluster_map: dict[float, int] # font_size arrotondato → livello (1/2/3) header_sizes: list[float] # taglie candidate header, ordinate desc