chore(config): tuning RAG — modello 4b, temperatura 0.2, chunk target 300

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-12 10:37:39 +02:00
parent 602dc87045
commit b5fb363104
2 changed files with 3 additions and 3 deletions
@@ -16,7 +16,7 @@ verify_chunks.py e fix_chunks.py senza toccare il codice applicativo.
 # Con TARGET=600 e TOL=0.25 → ogni chunk sarà tra 450 e 750 char,
 # il più vicino possibile a 600, terminando sempre su un confine di frase.
 #
-TARGET_CHARS    = 600
+TARGET_CHARS    = 300
 CHUNK_TOLERANCE = 0.25

 # ─── Overlap ──────────────────────────────────────────────────────────────────
@@ -18,7 +18,7 @@ TOP_K = 6
 # Temperatura del modello LLM.
 # 0.0 = completamente deterministico (stessa risposta ad ogni run)
 # 0.7 = più creativo e vario
-TEMPERATURE = 0.0
+TEMPERATURE = 0.2

 # Disabilita il "thinking" (ragionamento interno) nei modelli Qwen3/Qwen3.5.
 # True  = risposta diretta, più veloce
@@ -38,7 +38,7 @@ EMBED_MODEL = "nomic-embed-text"
 OLLAMA_URL = "http://localhost:11434"

 # Modello LLM. Scegli in base alla RAM disponibile (vedi README).
-OLLAMA_MODEL = "qwen3.5:0.8b"
+OLLAMA_MODEL = "qwen3.5:4b"

 # ── Prompt di sistema ─────────────────────────────────────────────────────────