From b5fb3631043cf16caa942d157173b73754ed3168 Mon Sep 17 00:00:00 2001
From: Davide Grilli <davide.grilli@outlook.com>
Date: Tue, 12 May 2026 10:37:39 +0200
Subject: [PATCH] =?UTF-8?q?chore(config):=20tuning=20RAG=20=E2=80=94=20mod?=
 =?UTF-8?q?ello=204b,=20temperatura=200.2,=20chunk=20target=20300?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 chunks/config.py | 2 +-
 config.py        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/chunks/config.py b/chunks/config.py
index 0eb8e3d..1a73450 100644
--- a/chunks/config.py
+++ b/chunks/config.py
@@ -16,7 +16,7 @@ verify_chunks.py e fix_chunks.py senza toccare il codice applicativo.
 # Con TARGET=600 e TOL=0.25 → ogni chunk sarà tra 450 e 750 char,
 # il più vicino possibile a 600, terminando sempre su un confine di frase.
 #
-TARGET_CHARS    = 600
+TARGET_CHARS    = 300
 CHUNK_TOLERANCE = 0.25
 
 # ─── Overlap ──────────────────────────────────────────────────────────────────
diff --git a/config.py b/config.py
index efd9d55..067b608 100644
--- a/config.py
+++ b/config.py
@@ -18,7 +18,7 @@ TOP_K = 6
 # Temperatura del modello LLM.
 # 0.0 = completamente deterministico (stessa risposta ad ogni run)
 # 0.7 = più creativo e vario
-TEMPERATURE = 0.0
+TEMPERATURE = 0.2
 
 # Disabilita il "thinking" (ragionamento interno) nei modelli Qwen3/Qwen3.5.
 # True  = risposta diretta, più veloce
@@ -38,7 +38,7 @@ EMBED_MODEL = "nomic-embed-text"
 OLLAMA_URL = "http://localhost:11434"
 
 # Modello LLM. Scegli in base alla RAM disponibile (vedi README).
-OLLAMA_MODEL = "qwen3.5:0.8b"
+OLLAMA_MODEL = "qwen3.5:4b"
 
 # ── Prompt di sistema ─────────────────────────────────────────────────────────