"""Funzioni helper pure condivise tra i moduli di trasformazione.""" import re from ._constants import _ORDINALS_IT, _ORDINALS_EN def _sentence_case(s: str) -> str: if not s: return s lower = s.lower() return lower[0].upper() + lower[1:] def _is_allcaps_line(line: str) -> bool: stripped = line.strip() letters = [c for c in stripped if c.isalpha()] return ( len(letters) >= 3 and all(c.isupper() for c in letters) and not stripped.startswith("#") and not stripped.startswith("|") ) def _allcaps_to_header(raw_line: str) -> str: text = re.sub(r"^[-*+]\s+", "", raw_line.strip()) text = text.rstrip(".").rstrip("?").strip() _ORD_IT_PAT = "|".join(_ORDINALS_IT.keys()) m = re.match(rf"^CAPITOLO ({_ORD_IT_PAT})\. (.+)", text) if m: roman = _ORDINALS_IT[m.group(1)] titolo = m.group(2).rstrip(".").rstrip("?").strip() return f"## Capitolo {roman} — {_sentence_case(titolo)}" _ORD_EN_PAT = "|".join(_ORDINALS_EN.keys()) m = re.match(rf"^CHAPTER ({_ORD_EN_PAT}|\d+)\.? (.+)", text) if m: n = _ORDINALS_EN.get(m.group(1), m.group(1)) titolo = m.group(2).rstrip(".").rstrip("?").strip() return f"## Chapter {n} — {_sentence_case(titolo)}" m = re.match(r"^([IVXLCDM]+|[0-9]+)\. (.+)", text) if m: return f"## {m.group(1)}. {_sentence_case(m.group(2).rstrip('.').strip())}" return f"## {_sentence_case(text)}" def _extract_math_environments(text: str) -> tuple[str, int]: _ENVS = ( r"Definizione|Definition|Teorema|Theorem|Lemma|" r"Proposizione|Proposition|Corollario|Corollary|" r"Osservazione|Remark|Nota|Note|Esempio|Example" ) count = 0 blocks = text.split("\n\n") result = [] for block in blocks: stripped = block.strip() if not stripped or stripped.startswith("#"): result.append(block) continue m = re.match( rf"^({_ENVS})\s+((?:\d+\.?){{1,4}})\s*(.*)", stripped, re.DOTALL, ) if not m: result.append(block) continue env = m.group(1) num = m.group(2).rstrip(".") rest = m.group(3).strip() title_m = re.match(r"^(\([^)]{2,60}\))\s+(.*)", rest, re.DOTALL) if title_m: header = f"### {env} {num} {title_m.group(1)}" body = title_m.group(2).strip() else: header = f"### {env} {num}." body = rest result.append(f"{header}\n\n{body}" if body else header) count += 1 return "\n\n".join(result), count def _merge_title_headers(text: str) -> tuple[str, int]: count = 0 blocks = re.split(r"\n{2,}", text) result = [] i = 0 while i < len(blocks): block = blocks[i] stripped = block.strip() if ( re.match(r"^#{2,3} \d+\.\s*$", stripped) and i + 1 < len(blocks) ): nxt = blocks[i + 1].strip() if ( nxt and "\n" not in nxt and len(nxt) <= 80 and not nxt.startswith("#") and not re.match(r"^\d+[\.\)]\s", nxt) ): result.append(stripped.rstrip() + " " + nxt) count += 1 i += 2 continue result.append(block) i += 1 return re.sub(r"\n{3,}", "\n\n", "\n\n".join(result)), count def _extract_article_headers(text: str) -> tuple[str, int]: count = 0 def _repl(m: re.Match) -> str: nonlocal count num = m.group(1) rest = m.group(2).strip() title_m = re.match( r"^([A-Z\xc0\xc8\xc9\xcc\xcd\xd2\xd3\xd9\xda].{1,74}?)\.\s+" r"([A-Z\xc0\xc8\xc9\xcc\xcd\xd2\xd3\xd9\xda\(\d].{4,})", rest, ) if title_m: count += 1 return ( f"### Art. {num}. {title_m.group(1)}.\n\n" f"{title_m.group(2).strip()}" ) if rest: count += 1 return f"### Art. {num}.\n\n{rest}" count += 1 return f"### Art. {num}." text = re.sub( r"^-\s+Art\.\s+([\d]+[a-z\-]*)\.\s*(.*)", _repl, text, flags=re.MULTILINE, ) return text, count