From 9c25099579df2c5b7053ab1daf524987bdf40eb9 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Wed, 3 Dec 2025 20:29:24 +1030
Subject: [PATCH 01/54] Build settings for inclusion in flatpak build

---
 configure.ac       | 2 +-
 raylib/Makefile.am | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/configure.ac b/configure.ac
index a9f12a5..f3a23f0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -70,7 +70,7 @@ case "${host_os}" in
     WEBSOCKET_LDFLAGS=""
     GTK_SERVER_LDFLAGS="`pkg-config --libs gtk+-3.0` -lXm -lXt"
     GTK_SERVER_CPPFLAGS="`pkg-config --cflags gtk+-3.0` -DGTK_SERVER_FFI -DGTK_SERVER_LIBRARY -DGTK_SERVER_UNIX -DGTK_SERVER_GTK3x"
-    RAYLIB_LDFLAGS="-lGL -lm -lpthread -ldl -lrt -lX11"
+    RAYLIB_LDFLAGS="-lGL -lm -lpthread -ldl -lrt -lX11 -lwayland-client -lwayland-cursor -lwayland-egl -lxkbcommon"
     JVM_CPPFLAGS="-I/usr/lib/jvm/java-1.8.0-openjdk-amd64/include -I/usr/lib/jvm/java-1.8.0-openjdk-amd64/include/linux"
     JVM_LDFLAGS="-L/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/amd64/server -ljvm"
     NUKLEAR_CPPFLAGS="-D_GLFW_X11=1"
diff --git a/raylib/Makefile.am b/raylib/Makefile.am
index 60b17a4..875d5dc 100644
--- a/raylib/Makefile.am
+++ b/raylib/Makefile.am
@@ -25,8 +25,9 @@ gen: $(generated) README.md
 
 AM_CXXFLAGS=-fno-rtti -std=c++14 -fpermissive
 AM_CPPFLAGS = -Iraylib/src -Iraylib/src/external/glfw/include -Iraylib/src/external/glfw/deps/mingw \
+  -Iraylib/src/external/glfw/src \
   -DPLATFORM_DESKTOP=1 -DSUPPORT_BUSY_WAIT_LOOP=1 -DSUPPORT_SCREEN_CAPTURE=1 \
-  -DSUPPORT_GIF_RECORDING=1 -DSUPPORT_COMPRESSION_API=1 -D_GLFW_BUILD_DLL=1 \
+  -DSUPPORT_GIF_RECORDING=1 -DSUPPORT_COMPRESSION_API=1 -D_GLFW_WAYLAND=1 \
   -Wall -Wextra -Wshadow -Wdouble-promotion -Wno-unused-parameter -fPIC
 
 lib_LTLIBRARIES = libraylib.la

From e8500cfe9582b61b5c2cbe43af7a26361b0b2ae7 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Fri, 2 Jan 2026 08:25:40 +1030
Subject: [PATCH 02/54] RAYLIB: build with SDL3 platform backend

---
 configure.ac       | 11 ++++++++++-
 raylib/Makefile.am |  8 +++-----
 raylib/main.cpp    | 18 +++++++-----------
 3 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/configure.ac b/configure.ac
index f3a23f0..ed3c8a5 100644
--- a/configure.ac
+++ b/configure.ac
@@ -48,6 +48,13 @@ AC_ARG_WITH(ioio,
   [IOIO="yes"],
   [IOIO="no"])
 
+dnl configure SDL3 for raylib
+PKG_CHECK_MODULES([SDL3], [sdl3])
+AC_MSG_CHECKING([for SDL3 library linking])
+SDL3_LIBS=`pkg-config sdl3 --libs --static`
+SDL3_CFLAGS=`pkg-config sdl3 --cflags`
+SDL3_INCLUDE=-I`pkg-config --variable=includedir sdl3`/SDL3
+
 case "${host_os}" in
   *mingw* | *msys*)
     AC_DEFINE(_WIN32, 1, [building for win32])
@@ -70,7 +77,8 @@ case "${host_os}" in
     WEBSOCKET_LDFLAGS=""
     GTK_SERVER_LDFLAGS="`pkg-config --libs gtk+-3.0` -lXm -lXt"
     GTK_SERVER_CPPFLAGS="`pkg-config --cflags gtk+-3.0` -DGTK_SERVER_FFI -DGTK_SERVER_LIBRARY -DGTK_SERVER_UNIX -DGTK_SERVER_GTK3x"
-    RAYLIB_LDFLAGS="-lGL -lm -lpthread -ldl -lrt -lX11 -lwayland-client -lwayland-cursor -lwayland-egl -lxkbcommon"
+    RAYLIB_CPPFLAGS="${SDL3_INCLUDE} ${SDL3_CFLAGS}"
+    RAYLIB_LDFLAGS="${SDL3_LIBS}"
     JVM_CPPFLAGS="-I/usr/lib/jvm/java-1.8.0-openjdk-amd64/include -I/usr/lib/jvm/java-1.8.0-openjdk-amd64/include/linux"
     JVM_LDFLAGS="-L/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/amd64/server -ljvm"
     NUKLEAR_CPPFLAGS="-D_GLFW_X11=1"
@@ -79,6 +87,7 @@ esac
 AC_SUBST(DEBUG_LDFLAGS)
 AC_SUBST(CLIPBOARD_LDFLAGS)
 AC_SUBST(RAYLIB_LDFLAGS)
+AC_SUBST(RAYLIB_CPPFLAGS)
 AC_SUBST(NUKLEAR_LDFLAGS)
 AC_SUBST(WEBSOCKET_LDFLAGS)
 AC_SUBST(PLATFORM_LDFLAGS)
diff --git a/raylib/Makefile.am b/raylib/Makefile.am
index 875d5dc..ab155c1 100644
--- a/raylib/Makefile.am
+++ b/raylib/Makefile.am
@@ -24,16 +24,14 @@ $(generated): raylib/parser/raylib_api.json mkraylib.bas
 gen: $(generated) README.md
 
 AM_CXXFLAGS=-fno-rtti -std=c++14 -fpermissive
-AM_CPPFLAGS = -Iraylib/src -Iraylib/src/external/glfw/include -Iraylib/src/external/glfw/deps/mingw \
-  -Iraylib/src/external/glfw/src \
-  -DPLATFORM_DESKTOP=1 -DSUPPORT_BUSY_WAIT_LOOP=1 -DSUPPORT_SCREEN_CAPTURE=1 \
-  -DSUPPORT_GIF_RECORDING=1 -DSUPPORT_COMPRESSION_API=1 -D_GLFW_WAYLAND=1 \
+AM_CPPFLAGS = -Iraylib/src @RAYLIB_CPPFLAGS@ \
+  -DPLATFORM_DESKTOP_SDL=1 -DPLATFORM_DESKTOP_SDL3=1 -DSUPPORT_BUSY_WAIT_LOOP=1 -DSUPPORT_SCREEN_CAPTURE=1 \
+  -DSUPPORT_GIF_RECORDING=1 -DSUPPORT_COMPRESSION_API=1 \
   -Wall -Wextra -Wshadow -Wdouble-promotion -Wno-unused-parameter -fPIC
 
 lib_LTLIBRARIES = libraylib.la
 
 libraylib_la_SOURCES = \
-   raylib/src/rglfw.c \
    raylib/src/rmodels.c \
    raylib/src/raudio.c \
    raylib/src/rcore.c \
diff --git a/raylib/main.cpp b/raylib/main.cpp
index 8bed447..4d8a576 100644
--- a/raylib/main.cpp
+++ b/raylib/main.cpp
@@ -24,10 +24,10 @@
 #pragma GCC diagnostic pop
 #pragma GCC diagnostic pop
 #include <physac/src/physac.h>
-#include <GLFW/glfw3.h>
 #include <cstring>
 
 #include "robin-hood-hashing/src/include/robin_hood.h"
+#include "SDL_events.h"
 #include "include/var.h"
 #include "include/module.h"
 #include "include/param.h"
@@ -1377,18 +1377,15 @@ static int cmd_guiunlock(int argc, slib_par_t *params, var_t *retval) {
   return 1;
 }
 
-static int cmd_poll_events(int argc, slib_par_t *params, var_t *retval) {
-  glfwPollEvents();
-  return 1;
-}
-
 static int cmd_wait_events(int argc, slib_par_t *params, var_t *retval) {
-  float waitMillis = get_param_int(argc, params, 0, -1);
-  if (waitMillis > 0) {
-    glfwWaitEventsTimeout(waitMillis / 1000);
+  auto timeoutMS = get_param_int(argc, params, 0, -1);
+  SDL_Event event;
+  if (timeoutMS > 0) {
+    SDL_WaitEventTimeout(&event, timeoutMS);
   } else {
-    glfwWaitEvents();
+    SDL_WaitEvent(&event);
   }
+  SDL_PushEvent(&event);
   return 1;
 }
 
@@ -1835,7 +1832,6 @@ static FUNC_SIG lib_proc[] = {
   {3, 3, "GUISETSTYLE", cmd_guisetstyle},
   {2, 2, "GUISTATUSBAR", cmd_guistatusbar},
   {0, 0, "GUIUNLOCK", cmd_guiunlock},
-  {0, 0, "POLLEVENTS", cmd_poll_events},
   {0, 1, "WAITEVENTS", cmd_wait_events},
   {0, 0, "CLOSEPHYSICS", cmd_closephysics},
   {1, 1, "DESTROYPHYSICSBODY", cmd_destroyphysicsbody},

From 4a59ea1621021ef6ee6748f8ecb2ca54df64ef82 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Fri, 2 Jan 2026 13:00:55 +1030
Subject: [PATCH 03/54] RAYLIB: add missing include for flatpak build

---
 raylib/main.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/raylib/main.cpp b/raylib/main.cpp
index 4d8a576..ebc1759 100644
--- a/raylib/main.cpp
+++ b/raylib/main.cpp
@@ -25,6 +25,7 @@
 #pragma GCC diagnostic pop
 #include <physac/src/physac.h>
 #include <cstring>
+#include <cstdint>
 
 #include "robin-hood-hashing/src/include/robin_hood.h"
 #include "SDL_events.h"

From c5ace4e9ed22c459ea259df6036a021a0e823440 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Mon, 5 Jan 2026 09:11:25 +1030
Subject: [PATCH 04/54] Revert "RAYLIB: build with SDL3 platform backend"

This reverts commit e8500cfe9582b61b5c2cbe43af7a26361b0b2ae7.

Switching to SDL from libgl doesn't completely solve running from the IDE
---
 configure.ac       | 11 +----------
 raylib/Makefile.am |  8 +++++---
 raylib/main.cpp    | 18 +++++++++++-------
 3 files changed, 17 insertions(+), 20 deletions(-)

diff --git a/configure.ac b/configure.ac
index ed3c8a5..f3a23f0 100644
--- a/configure.ac
+++ b/configure.ac
@@ -48,13 +48,6 @@ AC_ARG_WITH(ioio,
   [IOIO="yes"],
   [IOIO="no"])
 
-dnl configure SDL3 for raylib
-PKG_CHECK_MODULES([SDL3], [sdl3])
-AC_MSG_CHECKING([for SDL3 library linking])
-SDL3_LIBS=`pkg-config sdl3 --libs --static`
-SDL3_CFLAGS=`pkg-config sdl3 --cflags`
-SDL3_INCLUDE=-I`pkg-config --variable=includedir sdl3`/SDL3
-
 case "${host_os}" in
   *mingw* | *msys*)
     AC_DEFINE(_WIN32, 1, [building for win32])
@@ -77,8 +70,7 @@ case "${host_os}" in
     WEBSOCKET_LDFLAGS=""
     GTK_SERVER_LDFLAGS="`pkg-config --libs gtk+-3.0` -lXm -lXt"
     GTK_SERVER_CPPFLAGS="`pkg-config --cflags gtk+-3.0` -DGTK_SERVER_FFI -DGTK_SERVER_LIBRARY -DGTK_SERVER_UNIX -DGTK_SERVER_GTK3x"
-    RAYLIB_CPPFLAGS="${SDL3_INCLUDE} ${SDL3_CFLAGS}"
-    RAYLIB_LDFLAGS="${SDL3_LIBS}"
+    RAYLIB_LDFLAGS="-lGL -lm -lpthread -ldl -lrt -lX11 -lwayland-client -lwayland-cursor -lwayland-egl -lxkbcommon"
     JVM_CPPFLAGS="-I/usr/lib/jvm/java-1.8.0-openjdk-amd64/include -I/usr/lib/jvm/java-1.8.0-openjdk-amd64/include/linux"
     JVM_LDFLAGS="-L/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/amd64/server -ljvm"
     NUKLEAR_CPPFLAGS="-D_GLFW_X11=1"
@@ -87,7 +79,6 @@ esac
 AC_SUBST(DEBUG_LDFLAGS)
 AC_SUBST(CLIPBOARD_LDFLAGS)
 AC_SUBST(RAYLIB_LDFLAGS)
-AC_SUBST(RAYLIB_CPPFLAGS)
 AC_SUBST(NUKLEAR_LDFLAGS)
 AC_SUBST(WEBSOCKET_LDFLAGS)
 AC_SUBST(PLATFORM_LDFLAGS)
diff --git a/raylib/Makefile.am b/raylib/Makefile.am
index ab155c1..875d5dc 100644
--- a/raylib/Makefile.am
+++ b/raylib/Makefile.am
@@ -24,14 +24,16 @@ $(generated): raylib/parser/raylib_api.json mkraylib.bas
 gen: $(generated) README.md
 
 AM_CXXFLAGS=-fno-rtti -std=c++14 -fpermissive
-AM_CPPFLAGS = -Iraylib/src @RAYLIB_CPPFLAGS@ \
-  -DPLATFORM_DESKTOP_SDL=1 -DPLATFORM_DESKTOP_SDL3=1 -DSUPPORT_BUSY_WAIT_LOOP=1 -DSUPPORT_SCREEN_CAPTURE=1 \
-  -DSUPPORT_GIF_RECORDING=1 -DSUPPORT_COMPRESSION_API=1 \
+AM_CPPFLAGS = -Iraylib/src -Iraylib/src/external/glfw/include -Iraylib/src/external/glfw/deps/mingw \
+  -Iraylib/src/external/glfw/src \
+  -DPLATFORM_DESKTOP=1 -DSUPPORT_BUSY_WAIT_LOOP=1 -DSUPPORT_SCREEN_CAPTURE=1 \
+  -DSUPPORT_GIF_RECORDING=1 -DSUPPORT_COMPRESSION_API=1 -D_GLFW_WAYLAND=1 \
   -Wall -Wextra -Wshadow -Wdouble-promotion -Wno-unused-parameter -fPIC
 
 lib_LTLIBRARIES = libraylib.la
 
 libraylib_la_SOURCES = \
+   raylib/src/rglfw.c \
    raylib/src/rmodels.c \
    raylib/src/raudio.c \
    raylib/src/rcore.c \
diff --git a/raylib/main.cpp b/raylib/main.cpp
index ebc1759..667f0dc 100644
--- a/raylib/main.cpp
+++ b/raylib/main.cpp
@@ -24,11 +24,11 @@
 #pragma GCC diagnostic pop
 #pragma GCC diagnostic pop
 #include <physac/src/physac.h>
+#include <GLFW/glfw3.h>
 #include <cstring>
 #include <cstdint>
 
 #include "robin-hood-hashing/src/include/robin_hood.h"
-#include "SDL_events.h"
 #include "include/var.h"
 #include "include/module.h"
 #include "include/param.h"
@@ -1378,15 +1378,18 @@ static int cmd_guiunlock(int argc, slib_par_t *params, var_t *retval) {
   return 1;
 }
 
+static int cmd_poll_events(int argc, slib_par_t *params, var_t *retval) {
+  glfwPollEvents();
+  return 1;
+}
+
 static int cmd_wait_events(int argc, slib_par_t *params, var_t *retval) {
-  auto timeoutMS = get_param_int(argc, params, 0, -1);
-  SDL_Event event;
-  if (timeoutMS > 0) {
-    SDL_WaitEventTimeout(&event, timeoutMS);
+  float waitMillis = get_param_int(argc, params, 0, -1);
+  if (waitMillis > 0) {
+    glfwWaitEventsTimeout(waitMillis / 1000);
   } else {
-    SDL_WaitEvent(&event);
+    glfwWaitEvents();
   }
-  SDL_PushEvent(&event);
   return 1;
 }
 
@@ -1833,6 +1836,7 @@ static FUNC_SIG lib_proc[] = {
   {3, 3, "GUISETSTYLE", cmd_guisetstyle},
   {2, 2, "GUISTATUSBAR", cmd_guistatusbar},
   {0, 0, "GUIUNLOCK", cmd_guiunlock},
+  {0, 0, "POLLEVENTS", cmd_poll_events},
   {0, 1, "WAITEVENTS", cmd_wait_events},
   {0, 0, "CLOSEPHYSICS", cmd_closephysics},
   {1, 1, "DESTROYPHYSICSBODY", cmd_destroyphysicsbody},

From d9f64b1d968cc2f380109fcbe070eeed7c3d6fee Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Mon, 5 Jan 2026 20:06:52 +1030
Subject: [PATCH 05/54] RAYLIB: programs must run in a separate thread or via
 the command line

---
 include/module.h | 11 ++++++++++-
 raylib/main.cpp  |  8 +++++++-
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/include/module.h b/include/module.h
index d27ea52..b9176aa 100644
--- a/include/module.h
+++ b/include/module.h
@@ -25,6 +25,15 @@ extern "C" {
  */
 int sblib_init(const char *sourceFile);
 
+/**
+ * @ingroup modstd
+ *
+ * Returns whether the module is compatible with IDE builds
+ *
+ * @return non-zero on success
+ */
+int sblib_is_ide_compatible(void);
+
 /**
  * @ingroup modstd
  *
@@ -116,7 +125,7 @@ int sblib_func_exec(int index, int param_count, slib_par_t *params, var_t *retva
  * @param cls_id the variable class identifier
  * @param id the variable instance identifier
  */
-void sblib_free(int cls_id, int id);
+int sblib_free(int cls_id, int id);
 
 /**
  * @ingroup modlib
diff --git a/raylib/main.cpp b/raylib/main.cpp
index 667f0dc..98ad484 100644
--- a/raylib/main.cpp
+++ b/raylib/main.cpp
@@ -1938,7 +1938,7 @@ SBLIB_API int sblib_func_exec(int index, int argc, slib_par_t *params, var_t *re
   return result;
 }
 
-SBLIB_API void sblib_free(int cls_id, int id) {
+SBLIB_API int sblib_free(int cls_id, int id) {
   if (id != -1) {
     switch (cls_id) {
     case CLS_AUDIOSTREAM:
@@ -2031,6 +2031,7 @@ SBLIB_API void sblib_free(int cls_id, int id) {
       break;
     }
   }
+  return 0;
 }
 
 SBLIB_API void sblib_close(void) {
@@ -2095,3 +2096,8 @@ SBLIB_API void sblib_close(void) {
     _waveMap.clear();
   }
 }
+
+SBLIB_API void sblib_is_ide_compatible(void) {
+  // when using the SQL build, programs must be run via a separate thread
+  return false;
+}

From 8c4a66facc1ed4bb306be16d67d62ae617bce3cc Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Mon, 5 Jan 2026 20:09:23 +1030
Subject: [PATCH 06/54] RAYLIB: programs must run in a separate thread or via
 the command line

---
 raylib/main.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/raylib/main.cpp b/raylib/main.cpp
index 98ad484..8238cb4 100644
--- a/raylib/main.cpp
+++ b/raylib/main.cpp
@@ -2097,7 +2097,7 @@ SBLIB_API void sblib_close(void) {
   }
 }
 
-SBLIB_API void sblib_is_ide_compatible(void) {
+SBLIB_API int sblib_is_ide_compatible(void) {
   // when using the SQL build, programs must be run via a separate thread
-  return false;
+  return 0;
 }

From f615e24344adecb842741afdfc2d8ce85844c16d Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Tue, 6 Jan 2026 21:22:39 +1030
Subject: [PATCH 07/54] RAYLIB: rename UI detection to sblib_has_window_ui()

---
 include/module.h | 2 +-
 raylib/main.cpp  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/module.h b/include/module.h
index b9176aa..c7753b2 100644
--- a/include/module.h
+++ b/include/module.h
@@ -32,7 +32,7 @@ int sblib_init(const char *sourceFile);
  *
  * @return non-zero on success
  */
-int sblib_is_ide_compatible(void);
+int sblib_has_window_ui(void);
 
 /**
  * @ingroup modstd
diff --git a/raylib/main.cpp b/raylib/main.cpp
index 8238cb4..18ee3cd 100644
--- a/raylib/main.cpp
+++ b/raylib/main.cpp
@@ -2097,7 +2097,7 @@ SBLIB_API void sblib_close(void) {
   }
 }
 
-SBLIB_API int sblib_is_ide_compatible(void) {
-  // when using the SQL build, programs must be run via a separate thread
-  return 0;
+SBLIB_API int sblib_has_window_ui(void) {
+  // raylib module creates a UI in a new window
+  return 1;
 }

From 744b20036d98f67dcba9f9998df888fa99f546c7 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Thu, 15 Jan 2026 08:50:34 +1030
Subject: [PATCH 08/54] FLATPAK: implements sblib_has_window_ui() in UI based
 modules

---
 glfw/main.cpp    | 4 ++++
 nuklear/main.cpp | 4 ++++
 raylib/main.cpp  | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/glfw/main.cpp b/glfw/main.cpp
index 2a49198..07f8872 100644
--- a/glfw/main.cpp
+++ b/glfw/main.cpp
@@ -404,3 +404,7 @@ SBLIB_API void sblib_ellipse(int xc, int yc, int xr, int yr, int fill) {
   glEnd();
 }
 
+SBLIB_API int sblib_has_window_ui(void) {
+  // module creates a UI in a new window
+  return 1;
+}
diff --git a/nuklear/main.cpp b/nuklear/main.cpp
index 00ac348..9553b7c 100644
--- a/nuklear/main.cpp
+++ b/nuklear/main.cpp
@@ -1181,3 +1181,7 @@ SBLIB_API void sblib_ellipse(int xc, int yc, int xr, int yr, int fill) {
   drawEnd();
 }
 
+SBLIB_API int sblib_has_window_ui(void) {
+  // module creates a UI in a new window
+  return 1;
+}
diff --git a/raylib/main.cpp b/raylib/main.cpp
index 18ee3cd..06598c9 100644
--- a/raylib/main.cpp
+++ b/raylib/main.cpp
@@ -2098,6 +2098,6 @@ SBLIB_API void sblib_close(void) {
 }
 
 SBLIB_API int sblib_has_window_ui(void) {
-  // raylib module creates a UI in a new window
+  // module creates a UI in a new window
   return 1;
 }

From 5cf27c37cc948728ac11b15abdbe21b687f661c5 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Sun, 18 Jan 2026 15:18:09 +1030
Subject: [PATCH 09/54] FLATPAK: ui modules now target wayland in linux build

---
 configure.ac        | 35 ++++++++++++++++++++++++++++++++---
 glfw/Makefile.am    |  1 +
 nuklear/Makefile.am |  1 +
 nuklear/main.cpp    | 26 ++++++++++++++++----------
 4 files changed, 50 insertions(+), 13 deletions(-)

diff --git a/configure.ac b/configure.ac
index f3a23f0..e3f5f79 100644
--- a/configure.ac
+++ b/configure.ac
@@ -33,6 +33,27 @@ function checkDebugMode() {
   AC_SUBST(CFLAGS)
 }
 
+function generate_wayland_protocols() {
+  RAYLIB_SRC_PATH="${srcdir}/raylib/raylib/src"
+  WL_PROTOCOLS_DIR="${RAYLIB_SRC_PATH}/external/glfw/deps/wayland"
+  AC_MSG_NOTICE([Generating Wayland protocol headers])
+  wl_generate() {
+    protocol="$1"
+    basename="$2"
+    "$WAYLAND_SCANNER" client-header "$protocol" "$RAYLIB_SRC_PATH/$basename.h" || exit 1
+    "$WAYLAND_SCANNER" private-code "$protocol" "$RAYLIB_SRC_PATH/$basename-code.h" || exit 1
+  }
+  wl_generate "$WL_PROTOCOLS_DIR/wayland.xml" wayland-client-protocol
+  wl_generate "$WL_PROTOCOLS_DIR/xdg-shell.xml" xdg-shell-client-protocol
+  wl_generate "$WL_PROTOCOLS_DIR/xdg-decoration-unstable-v1.xml" xdg-decoration-unstable-v1-client-protocol
+  wl_generate "$WL_PROTOCOLS_DIR/viewporter.xml" viewporter-client-protocol
+  wl_generate "$WL_PROTOCOLS_DIR/relative-pointer-unstable-v1.xml" relative-pointer-unstable-v1-client-protocol
+  wl_generate "$WL_PROTOCOLS_DIR/pointer-constraints-unstable-v1.xml" pointer-constraints-unstable-v1-client-protocol
+  wl_generate "$WL_PROTOCOLS_DIR/fractional-scale-v1.xml" fractional-scale-v1-client-protocol
+  wl_generate "$WL_PROTOCOLS_DIR/xdg-activation-v1.xml" xdg-activation-v1-client-protocol
+  wl_generate "$WL_PROTOCOLS_DIR/idle-inhibit-unstable-v1.xml" idle-inhibit-unstable-v1-client-protocol
+}
+
 AC_ARG_WITH(mlpack,
   [AS_HELP_STRING([--with-mlpack], [Build the mlpack module])],
   [MLPACK="yes"],
@@ -66,14 +87,22 @@ case "${host_os}" in
   *)
     PLATFORM_LDFLAGS="-Wl,--no-undefined -avoid-version"
     CLIPBOARD_LDFLAGS="`pkg-config xcb --libs` -lpthread"
-    NUKLEAR_LDFLAGS="-lGL -lm -lpthread -ldl -lrt -lX11"
     WEBSOCKET_LDFLAGS=""
     GTK_SERVER_LDFLAGS="`pkg-config --libs gtk+-3.0` -lXm -lXt"
     GTK_SERVER_CPPFLAGS="`pkg-config --cflags gtk+-3.0` -DGTK_SERVER_FFI -DGTK_SERVER_LIBRARY -DGTK_SERVER_UNIX -DGTK_SERVER_GTK3x"
-    RAYLIB_LDFLAGS="-lGL -lm -lpthread -ldl -lrt -lX11 -lwayland-client -lwayland-cursor -lwayland-egl -lxkbcommon"
+    RAYLIB_LDFLAGS="`pkg-config wayland-client wayland-cursor wayland-egl xkbcommon --libs`"
     JVM_CPPFLAGS="-I/usr/lib/jvm/java-1.8.0-openjdk-amd64/include -I/usr/lib/jvm/java-1.8.0-openjdk-amd64/include/linux"
     JVM_LDFLAGS="-L/usr/lib/jvm/java-1.8.0-openjdk-amd64/jre/lib/amd64/server -ljvm"
-    NUKLEAR_CPPFLAGS="-D_GLFW_X11=1"
+    NUKLEAR_CPPFLAGS="-D_GLFW_WAYLAND=1"
+    NUKLEAR_LDFLAGS="`pkg-config wayland-client wayland-cursor wayland-egl xkbcommon --libs`"
+
+    AC_ARG_VAR([WAYLAND_SCANNER], [Path to wayland-scanner])
+    AC_PATH_PROG([WAYLAND_SCANNER], [wayland-scanner])
+    AS_IF([test -n "$WAYLAND_SCANNER"], [
+      generate_wayland_protocols
+    ], [
+      AC_MSG_WARN([wayland-scanner not found; Wayland support disabled])
+    ])
 esac
 
 AC_SUBST(DEBUG_LDFLAGS)
diff --git a/glfw/Makefile.am b/glfw/Makefile.am
index dc0dbaa..35ff783 100644
--- a/glfw/Makefile.am
+++ b/glfw/Makefile.am
@@ -7,6 +7,7 @@
 
 AM_CXXFLAGS=-fno-rtti -std=c++14
 AM_CPPFLAGS = \
+  -I../raylib/raylib/src \
   -I../raylib/raylib/src/external/glfw/include \
   -I../raylib/raylib/src/external/glfw/deps \
 	-Wall -Wextra -Wshadow -Wdouble-promotion -Wno-unused-parameter -D_GLFW_BUILD_DLL=1
diff --git a/nuklear/Makefile.am b/nuklear/Makefile.am
index 5b4f06f..d96ea60 100644
--- a/nuklear/Makefile.am
+++ b/nuklear/Makefile.am
@@ -7,6 +7,7 @@
 
 AM_CXXFLAGS=-fno-rtti -std=c++14
 AM_CPPFLAGS = -D_GLFW_BUILD_DLL=1 @NUKLEAR_CPPFLAGS@ \
+  -I../raylib/raylib/src \
   -I../raylib/raylib/src/external/glfw/include \
   -I../raylib/raylib/src/external/glfw/deps
 lib_LTLIBRARIES = libnuklear.la
diff --git a/nuklear/main.cpp b/nuklear/main.cpp
index 9553b7c..f30a1f4 100644
--- a/nuklear/main.cpp
+++ b/nuklear/main.cpp
@@ -95,7 +95,7 @@ static void window_size_callback(GLFWwindow* window, int width, int height) {
 nk_context *nkp_create_window(const char *title, int width, int height) {
   if (!glfwInit()) {
     fprintf(stdout, "[GFLW] failed to init!\n");
-    exit(1);
+    return nullptr;
   }
 
   glfwWindowHint(GLFW_CONTEXT_VERSION_MAJOR, 2);
@@ -873,16 +873,22 @@ static int cmd_widgetishovered(int argc, slib_par_t *params, var_t *retval) {
 }
 
 static int cmd_windowbegin(int argc, slib_par_t *params, var_t *retval) {
-  nkp_process_events();
-  nkbd_begin(_ctx);
-  const char *title = get_param_str(argc, params, 0, "Untitled");
-  struct nk_rect rc = get_param_rect(argc, params, 1);
-  nk_flags flags = get_param_window_flags(argc, params, 5);
-  v_setint(retval, nk_begin(_ctx, title, rc, flags));
-  if ((flags & NK_WINDOW_TITLE) == 0) {
-    nkp_set_window_title(title);
+  int result;
+  if (_ctx != nullptr) {
+    nkp_process_events();
+    nkbd_begin(_ctx);
+    const char *title = get_param_str(argc, params, 0, "Untitled");
+    struct nk_rect rc = get_param_rect(argc, params, 1);
+    nk_flags flags = get_param_window_flags(argc, params, 5);
+    v_setint(retval, nk_begin(_ctx, title, rc, flags));
+    if ((flags & NK_WINDOW_TITLE) == 0) {
+      nkp_set_window_title(title);
+    }
+    result = 1;
+  } else {
+    result = 0;
   }
-  return 1;
+  return result;
 }
 
 static int cmd_windowend(int argc, slib_par_t *params, var_t *retval) {

From 9d9d017273737f3607b1eae827e2e9830f35e22f Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Tue, 20 Jan 2026 12:43:54 +1030
Subject: [PATCH 10/54] NUCLEAR: show window focused at startup

---
 nuklear/main.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/nuklear/main.cpp b/nuklear/main.cpp
index f30a1f4..d4ec06d 100644
--- a/nuklear/main.cpp
+++ b/nuklear/main.cpp
@@ -106,6 +106,8 @@ nk_context *nkp_create_window(const char *title, int width, int height) {
                              title, nullptr, nullptr);
 
   glfwMakeContextCurrent(_window);
+  glfwSwapBuffers(_window);
+
   gladLoadGL((GLADloadfunc) glfwGetProcAddress);
   glfwSetErrorCallback(error_callback);
   glfwSetWindowSizeCallback(_window, window_size_callback);

From 44281e7b65837abcfb9f9dd89743e658a21517a6 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Fri, 23 Jan 2026 15:30:22 +1030
Subject: [PATCH 11/54] RAYLIB: allow make to depend on sbasic as part of
 flatpak processing

---
 raylib/Makefile.am  |  11 +++-
 raylib/README.md    |  51 +++++++++------
 raylib/func-def.h   |  14 +++-
 raylib/func.h       | 152 ++++++++++++++++++++++++++++++++++++++------
 raylib/mkraylib.bas |   2 +-
 raylib/mkreadme.bas |   2 +-
 raylib/proc-def.h   |   3 +
 raylib/proc.h       |  55 +++++++++++++---
 raylib/raylib       |   2 +-
 9 files changed, 237 insertions(+), 55 deletions(-)

diff --git a/raylib/Makefile.am b/raylib/Makefile.am
index 875d5dc..0885662 100644
--- a/raylib/Makefile.am
+++ b/raylib/Makefile.am
@@ -8,8 +8,10 @@
 generated = func-def.h proc-def.h proc.h func.h
 sbasic=sbasic
 
+CLEANFILES = $(generated)
+
 raylib/tools/rlparser/output/raylib_api.json: raylib/src/raylib.h raylib/tools/rlparser/rlparser.c
-	(cd raylib/tools/rlparser && make && ./rlparser --format JSON --input ../../src/raylib.h --output raylib_api.json)
+	(cd raylib/tools/rlparser && make && ./rlparser --format JSON --input ../../src/raylib.h --output output/raylib_api.json)
 
 UNSUPPORTED.md: $(generated)
 	$(sbasic)	mkraylib.bas unsupported > $@
@@ -21,7 +23,11 @@ $(generated): raylib/parser/raylib_api.json mkraylib.bas
 	$(sbasic) mkraylib.bas $@ > $@
 	@touch main.cpp
 
-gen: $(generated) README.md
+gen: $(generated)
+
+gen: $(generated)
+
+all-am: $(generated) README.md
 
 AM_CXXFLAGS=-fno-rtti -std=c++14 -fpermissive
 AM_CPPFLAGS = -Iraylib/src -Iraylib/src/external/glfw/include -Iraylib/src/external/glfw/deps/mingw \
@@ -40,7 +46,6 @@ libraylib_la_SOURCES = \
    raylib/src/rshapes.c \
    raylib/src/rtextures.c \
    raylib/src/rtext.c \
-   raylib/src/utils.c \
    ../include/param.cpp \
    ../include/hashmap.cpp \
    physac.cpp \
diff --git a/raylib/README.md b/raylib/README.md
index b0f9723..08f508e 100644
--- a/raylib/README.md
+++ b/raylib/README.md
@@ -4,7 +4,7 @@ raylib is a simple and easy-to-use library to enjoy videogames programming.
 
 https://www.raylib.com/
 
-Implemented APIs (633)
+Implemented APIs (646)
 ----------------
 
 | Name    | Description   |
@@ -16,7 +16,7 @@ Implemented APIs (633)
 | sub BeginScissorMode(x, y, width, height) | Begin scissor mode (define screen area for following drawing) |
 | sub BeginShaderMode(shader) | Begin custom shader drawing |
 | sub BeginTextureMode(target) | Begin drawing to render texture |
-| func ChangeDirectory(dir) | Change working directory, return true on success |
+| func ChangeDirectory(dirPath) | Change working directory, return true on success |
 | func CheckCollisionBoxes(box1, box2) | Check collision between two bounding boxes |
 | func CheckCollisionBoxSphere(box, center, radius) | Check collision between box and sphere |
 | func CheckCollisionCircleLine(center, radius, p1, p2) | Check if circle collides with a line created betweeen two points [p1] and [p2] |
@@ -52,10 +52,11 @@ Implemented APIs (633)
 | func ComputeCRC32(data, dataSize) | Compute CRC32 hash code |
 | func ComputeMD5(data, dataSize) | Compute MD5 hash code, returns static int[4] (16 bytes) |
 | func ComputeSHA1(data, dataSize) | Compute SHA1 hash code, returns static int[5] (20 bytes) |
+| func ComputeSHA256(data, dataSize) | Compute SHA256 hash code, returns static int[8] (32 bytes) |
 | func createPhysicsbodycircle() | n/a |
 | func createPhysicsbodypolygon() | n/a |
 | func createPhysicsbodyrectangle() | n/a |
-| func DecodeDataBase64(data, outputSize) | Decode Base64 string data, memory must be MemFree() |
+| func DecodeDataBase64(text, outputSize) | Decode Base64 string (expected NULL terminated), memory must be MemFree() |
 | func DecompressData(compData, compDataSize, dataSize) | Decompress data (DEFLATE algorithm), memory must be MemFree() |
 | func destroyPhysicsbody() | n/a |
 | func DirectoryExists(dirPath) | Check if a directory path exists |
@@ -85,11 +86,14 @@ Implemented APIs (633)
 | sub DrawCylinderWiresEx(startPos, endPos, startRadius, endRadius, sides, color) | Draw a cylinder wires with base at startPos and top at endPos |
 | sub DrawEllipse(centerX, centerY, radiusH, radiusV, color) | Draw ellipse |
 | sub DrawEllipseLines(centerX, centerY, radiusH, radiusV, color) | Draw ellipse outline |
+| sub DrawEllipseLinesV(center, radiusH, radiusV, color) | Draw ellipse outline (Vector version) |
+| sub DrawEllipseV(center, radiusH, radiusV, color) | Draw ellipse (Vector version) |
 | sub DrawFPS(posX, posY) | Draw current FPS |
 | sub DrawGrid(slices, spacing) | Draw a grid (centered at (0, 0, 0)) |
 | sub DrawLine(startPosX, startPosY, endPosX, endPosY, color) | Draw a line |
 | sub DrawLine3D(startPos, endPos, color) | Draw a line in 3D world space |
 | sub DrawLineBezier(startPos, endPos, thick, color) | Draw line segment cubic-bezier in-out interpolation |
+| sub DrawLineDashed(startPos, endPos, dashSize, spaceSize, color) | Draw a dashed line |
 | sub DrawLineEx(startPos, endPos, thick, color) | Draw a line (using triangles/quads) |
 | sub DrawLineStrip(points, pointCount, color) | Draw lines sequence (using gl lines) |
 | sub DrawLineV(startPos, endPos, color) | Draw a line (using gl lines) |
@@ -108,7 +112,7 @@ Implemented APIs (633)
 | sub DrawPolyLinesEx(center, sides, radius, rotation, lineThick, color) | Draw a polygon outline of n sides with extended parameters |
 | sub DrawRay(ray, color) | Draw a ray line |
 | sub DrawRectangle(posX, posY, width, height, color) | Draw a color-filled rectangle |
-| sub DrawRectangleGradientEx(rec, topLeft, bottomLeft, topRight, bottomRight) | Draw a gradient-filled rectangle with custom vertex colors |
+| sub DrawRectangleGradientEx(rec, topLeft, bottomLeft, bottomRight, topRight) | Draw a gradient-filled rectangle with custom vertex colors |
 | sub DrawRectangleGradientH(posX, posY, width, height, left, right) | Draw a horizontal-gradient-filled rectangle |
 | sub DrawRectangleGradientV(posX, posY, width, height, top, bottom) | Draw a vertical-gradient-filled rectangle |
 | sub DrawRectangleLines(posX, posY, width, height, color) | Draw rectangle outline |
@@ -153,7 +157,7 @@ Implemented APIs (633)
 | sub DrawTriangleStrip3D(points, pointCount, color) | Draw a triangle strip defined by points |
 | sub EnableCursor() | Enables cursor (unlock cursor) |
 | sub EnableEventWaiting() | Enable waiting for events on EndDrawing(), no automatic event polling |
-| func EncodeDataBase64(data, dataSize, outputSize) | Encode data to Base64 string, memory must be MemFree() |
+| func EncodeDataBase64(data, dataSize, outputSize) | Encode data to Base64 string (includes NULL terminator), memory must be MemFree() |
 | sub EndBlendMode() | End blending mode (reset to default: alpha blending) |
 | sub EndDrawing() | End canvas drawing and swap buffers (double buffering) |
 | sub EndMode2D() | Ends 2D mode with custom camera |
@@ -173,7 +177,13 @@ Implemented APIs (633)
 | func ExportWave(wave, fileName) | Export wave data to file, returns true on success |
 | func ExportWaveAsCode(wave, fileName) | Export wave sample data to code (.h), returns true on success |
 | func Fade(color, alpha) | Get color with alpha applied, alpha goes from 0.0f to 1.0f |
+| func FileCopy(srcPath, dstPath) | Copy file from one path to another, dstPath created if it doesn't exist |
 | func FileExists(fileName) | Check if file exists |
+| func FileMove(srcPath, dstPath) | Move file from one directory to another, dstPath created if it doesn't exist |
+| func FileRemove(fileName) | Remove file (if exists) |
+| func FileRename(fileName, fileRename) | Rename file (if exists) |
+| func FileTextFindIndex(fileName, search) | Find text in existing file |
+| func FileTextReplace(fileName, search, replacement) | Replace text in an existing file |
 | func GenImageCellular(width, height, tileSize) | Generate image: cellular algorithm, bigger tileSize means bigger cells |
 | func GenImageChecked(width, height, checksX, checksY, col1, col2) | Generate image: checked |
 | func GenImageColor(width, height, color) | Generate image: plain color |
@@ -218,8 +228,8 @@ Implemented APIs (633)
 | func GetFontDefault() | Get the default Font |
 | func GetFPS() | Get current FPS |
 | func GetFrameTime() | Get time in seconds for last frame drawn (delta time) |
-| func GetGamepadAxisCount(gamepad) | Get gamepad axis count for a gamepad |
-| func GetGamepadAxisMovement(gamepad, axis) | Get axis movement value for a gamepad axis |
+| func GetGamepadAxisCount(gamepad) | Get axis count for a gamepad |
+| func GetGamepadAxisMovement(gamepad, axis) | Get movement value for a gamepad axis |
 | func GetGamepadButtonPressed() | Get the last gamepad button pressed |
 | func GetGamepadName(gamepad) | Get gamepad internal name id |
 | func GetGestureDetected() | Get latest detected gesture |
@@ -282,6 +292,7 @@ Implemented APIs (633)
 | func GetSplinePointBezierQuad(p1, c2, p3, t) | Get (evaluate) spline point: Quadratic Bezier |
 | func GetSplinePointCatmullRom(p1, p2, p3, p4, t) | Get (evaluate) spline point: Catmull-Rom |
 | func GetSplinePointLinear(startPos, endPos, t) | Get (evaluate) spline point: Linear |
+| func GetTextBetween(text, begin, end) | Get text between two strings |
 | func GetTime() | Get elapsed time in seconds since InitWindow() |
 | func GetTouchPointCount() | Get number of touch points |
 | func GetTouchPointId(index) | Get touch point identifier for given index |
@@ -396,7 +407,7 @@ Implemented APIs (633)
 | func IsCursorHidden() | Check if cursor is not visible |
 | func IsCursorOnScreen() | Check if cursor is on the screen |
 | func IsFileDropped() | Check if a file has been dropped into window |
-| func IsFileExtension(fileName, ext) | Check file extension (including point: .png, .wav) |
+| func IsFileExtension(fileName, ext) | Check file extension (recommended include point: .png, .wav) |
 | func IsFileNameValid(fileName) | Check if fileName is valid for the platform/OS |
 | func IsFontValid(font) | Check if a font is valid (font data loaded, WARNING: GPU texture not checked) |
 | func IsGamepadAvailable(gamepad) | Check if a gamepad is available |
@@ -498,7 +509,7 @@ Implemented APIs (633)
 | func pollevents() | n/a |
 | sub PollInputEvents() | Register all input events |
 | func resetPhysics() | n/a |
-| sub RestoreWindow() | Set window state: not minimized/maximized |
+| sub RestoreWindow() | Restore window from being minimized/maximized |
 | sub ResumeAudioStream(stream) | Resume audio stream |
 | sub ResumeMusicStream(music) | Resume playing paused music |
 | sub ResumeSound(sound) | Resume a paused sound |
@@ -525,7 +536,7 @@ Implemented APIs (633)
 | sub SetMouseOffset(offsetX, offsetY) | Set mouse offset |
 | sub SetMousePosition(x, y) | Set mouse position XY |
 | sub SetMouseScale(scaleX, scaleY) | Set mouse scaling |
-| sub SetMusicPan(music, pan) | Set pan for a music (0.5 is center) |
+| sub SetMusicPan(music, pan) | Set pan for a music (-1.0 left, 0.0 center, 1.0 right) |
 | sub SetMusicPitch(music, pitch) | Set pitch for a music (1.0 is base level) |
 | sub SetMusicVolume(music, volume) | Set volume for music (1.0 is max level) |
 | func setPhysicsbodyangularvelocity() | n/a |
@@ -555,7 +566,7 @@ Implemented APIs (633)
 | sub SetShaderValueTexture(shader, locIndex, texture) | Set shader uniform value and bind the texture (sampler2d) |
 | sub SetShaderValueV(shader, locIndex, value, uniformType, count) | Set shader uniform value vector |
 | sub SetShapesTexture(texture, source) | Set texture and rectangle to be used on shapes drawing |
-| sub SetSoundPan(sound, pan) | Set pan for a sound (0.5 is center) |
+| sub SetSoundPan(sound, pan) | Set pan for a sound (-1.0 left, 0.0 center, 1.0 right) |
 | sub SetSoundPitch(sound, pitch) | Set pitch for a sound (1.0 is base level) |
 | sub SetSoundVolume(sound, volume) | Set volume for a sound (1.0 is max level) |
 | sub SetTargetFPS(fps) | Set target FPS (maximum) |
@@ -582,14 +593,16 @@ Implemented APIs (633)
 | sub StopSound(sound) | Stop playing a sound |
 | sub SwapScreenBuffer() | Swap back buffer with front buffer (screen drawing) |
 | sub TakeScreenshot(fileName) | Takes a screenshot of current screen (filename extension defines format) |
-| sub TextAppend(text, append, position) | Append text at specific position and move cursor! |
+| sub TextAppend(text, append, position) | Append text at specific position and move cursor |
 | func TextCopy(dst, src) | Copy one string to another, returns bytes copied |
-| func TextFindIndex(text, find) | Find first text occurrence within a string |
+| func TextFindIndex(text, search) | Find first text occurrence within a string, -1 if not found |
 | func TextFormat(text, args) | Text formatting with variables (sprintf() style) |
 | func TextInsert(text, insert, position) | Insert text in a position (WARNING: memory must be freed!) |
 | func TextIsEqual(text1, text2) | Check if two text string are equal |
 | func TextLength(text) | Get text length, checks for '\\0' ending |
-| func TextReplace(text, replace, by) | Replace text string (WARNING: memory must be freed!) |
+| func TextRemoveSpaces(text) | Remove text spaces, concat words |
+| func TextReplace(text, search, replacement) | Replace text string (WARNING: memory must be freed!) |
+| func TextReplaceBetween(text, begin, end, replacement) | Replace text between two specific strings (WARNING: memory must be freed!) |
 | func TextSubtext(text, position, length) | Get a piece of a text string |
 | func TextToCamel(text) | Get Camel case notation version of provided string |
 | func TextToFloat(text) | Get float value from text |
@@ -632,9 +645,9 @@ Implemented APIs (633)
 | sub UpdateModelAnimationBones(model, anim, frame) | Update model animation mesh bone matrices (GPU skinning) |
 | sub UpdateMusicStream(music) | Updates buffers for music streaming |
 | func updatePhysics() | n/a |
-| sub UpdateSound(sound, data, sampleCount) | Update sound buffer with new data |
-| sub UpdateTexture(texture, pixels) | Update GPU texture with new data |
-| sub UpdateTextureRec(texture, rec, pixels) | Update GPU texture rectangle with new data |
+| sub UpdateSound(sound, data, sampleCount) | Update sound buffer with new data (default data format: 32 bit float, stereo) |
+| sub UpdateTexture(texture, pixels) | Update GPU texture with new data (pixels should be able to fill texture) |
+| sub UpdateTextureRec(texture, rec, pixels) | Update GPU texture rectangle with new data (pixels and rec should fit in texture) |
 | sub UploadMesh(mesh, dynamic) | Upload mesh vertex data in GPU and provide VAO/VBO ids |
 | func waitevents() | n/a |
 | sub WaitTime(seconds) | Wait for some time (halt program execution) |
@@ -661,6 +674,7 @@ Unimplemented APIs
 | LoadFontData | Load font data for further use |
 | LoadMaterialDefault | Load default material (Supports: DIFFUSE, SPECULAR, NORMAL maps) |
 | LoadMaterials | Load materials from model file |
+| LoadTextLines | Load text as separate lines ('\\n') |
 | LoadVrStereoConfig | Load VR stereo config for VR simulator device parameters |
 | SetAudioStreamCallback | Audio thread callback to request new data |
 | SetLoadFileDataCallback | Set custom file binary data loader |
@@ -670,8 +684,9 @@ Unimplemented APIs
 | SetSaveFileTextCallback | Set custom file text data saver |
 | SetTraceLogCallback | Set custom trace log |
 | TextJoin | Join text strings with delimiter |
-| TextSplit | Split text into multiple strings |
+| TextSplit | Split text into multiple strings, using MAX_TEXTSPLIT_COUNT static strings |
 | UnloadFontData | Unload font chars info data (RAM) |
 | UnloadMaterial | Unload material from GPU memory (VRAM) |
+| UnloadTextLines | Unload text lines |
 | UnloadVrStereoConfig | Unload VR stereo config |
 
diff --git a/raylib/func-def.h b/raylib/func-def.h
index 831f691..42552b2 100644
--- a/raylib/func-def.h
+++ b/raylib/func-def.h
@@ -29,6 +29,7 @@
   {2, 2, "COMPUTECRC32", cmd_computecrc32},
   {2, 2, "COMPUTEMD5", cmd_computemd5},
   {2, 2, "COMPUTESHA1", cmd_computesha1},
+  {2, 2, "COMPUTESHA256", cmd_computesha256},
   {1, 1, "DECODEDATABASE64", cmd_decodedatabase64},
   {2, 2, "DECOMPRESSDATA", cmd_decompressdata},
   {1, 1, "DIRECTORYEXISTS", cmd_directoryexists},
@@ -44,7 +45,13 @@
   {2, 2, "EXPORTWAVE", cmd_exportwave},
   {2, 2, "EXPORTWAVEASCODE", cmd_exportwaveascode},
   {2, 2, "FADE", cmd_fade},
+  {2, 2, "FILECOPY", cmd_filecopy},
   {1, 1, "FILEEXISTS", cmd_fileexists},
+  {2, 2, "FILEMOVE", cmd_filemove},
+  {1, 1, "FILEREMOVE", cmd_fileremove},
+  {2, 2, "FILERENAME", cmd_filerename},
+  {2, 2, "FILETEXTFINDINDEX", cmd_filetextfindindex},
+  {3, 3, "FILETEXTREPLACE", cmd_filetextreplace},
   {3, 3, "GENIMAGECELLULAR", cmd_genimagecellular},
   {6, 6, "GENIMAGECHECKED", cmd_genimagechecked},
   {3, 3, "GENIMAGECOLOR", cmd_genimagecolor},
@@ -146,6 +153,7 @@
   {4, 4, "GETSPLINEPOINTBEZIERQUAD", cmd_getsplinepointbezierquad},
   {5, 5, "GETSPLINEPOINTCATMULLROM", cmd_getsplinepointcatmullrom},
   {3, 3, "GETSPLINEPOINTLINEAR", cmd_getsplinepointlinear},
+  {3, 3, "GETTEXTBETWEEN", cmd_gettextbetween},
   {0, 0, "GETTIME", cmd_gettime},
   {0, 0, "GETTOUCHPOINTCOUNT", cmd_gettouchpointcount},
   {1, 1, "GETTOUCHPOINTID", cmd_gettouchpointid},
@@ -218,9 +226,9 @@
   {1, 1, "LOADFILEDATA", cmd_loadfiledata},
   {1, 1, "LOADFILETEXT", cmd_loadfiletext},
   {1, 1, "LOADFONT", cmd_loadfont},
-  {3, 3, "LOADFONTEX", cmd_loadfontex},
+  {4, 4, "LOADFONTEX", cmd_loadfontex},
   {3, 3, "LOADFONTFROMIMAGE", cmd_loadfontfromimage},
-  {5, 5, "LOADFONTFROMMEMORY", cmd_loadfontfrommemory},
+  {6, 6, "LOADFONTFROMMEMORY", cmd_loadfontfrommemory},
   {1, 1, "LOADIMAGE", cmd_loadimage},
   {1, 1, "LOADIMAGEANIM", cmd_loadimageanim},
   {3, 3, "LOADIMAGEANIMFROMMEMORY", cmd_loadimageanimfrommemory},
@@ -259,7 +267,9 @@
   {3, 3, "TEXTINSERT", cmd_textinsert},
   {2, 2, "TEXTISEQUAL", cmd_textisequal},
   {1, 1, "TEXTLENGTH", cmd_textlength},
+  {1, 1, "TEXTREMOVESPACES", cmd_textremovespaces},
   {3, 3, "TEXTREPLACE", cmd_textreplace},
+  {4, 4, "TEXTREPLACEBETWEEN", cmd_textreplacebetween},
   {3, 3, "TEXTSUBTEXT", cmd_textsubtext},
   {1, 1, "TEXTTOCAMEL", cmd_texttocamel},
   {1, 1, "TEXTTOFLOAT", cmd_texttofloat},
diff --git a/raylib/func.h b/raylib/func.h
index cc97d4a..53ccee6 100644
--- a/raylib/func.h
+++ b/raylib/func.h
@@ -2,8 +2,8 @@
 // Change working directory, return true on success
 //
 static int cmd_changedirectory(int argc, slib_par_t *params, var_t *retval) {
-  auto dir = get_param_str(argc, params, 0, 0);
-  auto fnResult = ChangeDirectory(dir);
+  auto dirPath = get_param_str(argc, params, 0, 0);
+  auto fnResult = ChangeDirectory(dirPath);
   v_setint(retval, fnResult);
   return 1;
 }
@@ -358,12 +358,23 @@ static int cmd_computesha1(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Decode Base64 string data, memory must be MemFree()
+// Compute SHA256 hash code, returns static int[8] (32 bytes)
+//
+static int cmd_computesha256(int argc, slib_par_t *params, var_t *retval) {
+  auto data = (unsigned char *)get_param_str(argc, params, 0, 0);
+  auto dataSize = get_param_int(argc, params, 1, 0);
+  auto fnResult = (var_int_t)ComputeSHA256(data, dataSize);
+  v_setint(retval, fnResult);
+  return 1;
+}
+
+//
+// Decode Base64 string (expected NULL terminated), memory must be MemFree()
 //
 static int cmd_decodedatabase64(int argc, slib_par_t *params, var_t *retval) {
-  auto data = (const unsigned char *)get_param_str(argc, params, 0, 0);
+  auto text = get_param_str(argc, params, 0, 0);
   auto outputSize = 0;
-  auto fnResult = (const char *)DecodeDataBase64(data, &outputSize);
+  auto fnResult = (const char *)DecodeDataBase64(text, &outputSize);
   v_setstrn(retval, fnResult, outputSize);
   MemFree((void *)fnResult);
   return 1;
@@ -393,7 +404,7 @@ static int cmd_directoryexists(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Encode data to Base64 string, memory must be MemFree()
+// Encode data to Base64 string (includes NULL terminator), memory must be MemFree()
 //
 static int cmd_encodedatabase64(int argc, slib_par_t *params, var_t *retval) {
   auto data = (const unsigned char *)get_param_str(argc, params, 0, 0);
@@ -583,6 +594,17 @@ static int cmd_fade(int argc, slib_par_t *params, var_t *retval) {
   return 1;
 }
 
+//
+// Copy file from one path to another, dstPath created if it doesn't exist
+//
+static int cmd_filecopy(int argc, slib_par_t *params, var_t *retval) {
+  auto srcPath = get_param_str(argc, params, 0, 0);
+  auto dstPath = get_param_str(argc, params, 1, 0);
+  auto fnResult = FileCopy(srcPath, dstPath);
+  v_setint(retval, fnResult);
+  return 1;
+}
+
 //
 // Check if file exists
 //
@@ -593,6 +615,61 @@ static int cmd_fileexists(int argc, slib_par_t *params, var_t *retval) {
   return 1;
 }
 
+//
+// Move file from one directory to another, dstPath created if it doesn't exist
+//
+static int cmd_filemove(int argc, slib_par_t *params, var_t *retval) {
+  auto srcPath = get_param_str(argc, params, 0, 0);
+  auto dstPath = get_param_str(argc, params, 1, 0);
+  auto fnResult = FileMove(srcPath, dstPath);
+  v_setint(retval, fnResult);
+  return 1;
+}
+
+//
+// Remove file (if exists)
+//
+static int cmd_fileremove(int argc, slib_par_t *params, var_t *retval) {
+  auto fileName = get_param_str(argc, params, 0, 0);
+  auto fnResult = FileRemove(fileName);
+  v_setint(retval, fnResult);
+  return 1;
+}
+
+//
+// Rename file (if exists)
+//
+static int cmd_filerename(int argc, slib_par_t *params, var_t *retval) {
+  auto fileName = get_param_str(argc, params, 0, 0);
+  auto fileRename = get_param_str(argc, params, 1, 0);
+  auto fnResult = FileRename(fileName, fileRename);
+  v_setint(retval, fnResult);
+  return 1;
+}
+
+//
+// Find text in existing file
+//
+static int cmd_filetextfindindex(int argc, slib_par_t *params, var_t *retval) {
+  auto fileName = get_param_str(argc, params, 0, 0);
+  auto search = get_param_str(argc, params, 1, 0);
+  auto fnResult = FileTextFindIndex(fileName, search);
+  v_setint(retval, fnResult);
+  return 1;
+}
+
+//
+// Replace text in an existing file
+//
+static int cmd_filetextreplace(int argc, slib_par_t *params, var_t *retval) {
+  auto fileName = get_param_str(argc, params, 0, 0);
+  auto search = get_param_str(argc, params, 1, 0);
+  auto replacement = get_param_str(argc, params, 2, 0);
+  auto fnResult = FileTextReplace(fileName, search, replacement);
+  v_setint(retval, fnResult);
+  return 1;
+}
+
 //
 // Generate image: cellular algorithm, bigger tileSize means bigger cells
 //
@@ -1073,7 +1150,7 @@ static int cmd_getframetime(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Get gamepad axis count for a gamepad
+// Get axis count for a gamepad
 //
 static int cmd_getgamepadaxiscount(int argc, slib_par_t *params, var_t *retval) {
   auto gamepad = get_param_int(argc, params, 0, 0);
@@ -1083,7 +1160,7 @@ static int cmd_getgamepadaxiscount(int argc, slib_par_t *params, var_t *retval)
 }
 
 //
-// Get axis movement value for a gamepad axis
+// Get movement value for a gamepad axis
 //
 static int cmd_getgamepadaxismovement(int argc, slib_par_t *params, var_t *retval) {
   auto gamepad = get_param_int(argc, params, 0, 0);
@@ -1734,6 +1811,18 @@ static int cmd_getsplinepointlinear(int argc, slib_par_t *params, var_t *retval)
   return 1;
 }
 
+//
+// Get text between two strings
+//
+static int cmd_gettextbetween(int argc, slib_par_t *params, var_t *retval) {
+  auto text = get_param_str(argc, params, 0, 0);
+  auto begin = get_param_str(argc, params, 1, 0);
+  auto end = get_param_str(argc, params, 2, 0);
+  auto fnResult = (const char *)GetTextBetween(text, begin, end);
+  v_setstr(retval, fnResult);
+  return 1;
+}
+
 //
 // Get elapsed time in seconds since InitWindow()
 //
@@ -2028,7 +2117,7 @@ static int cmd_isfiledropped(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Check file extension (including point: .png, .wav)
+// Check file extension (recommended include point: .png, .wav)
 //
 static int cmd_isfileextension(int argc, slib_par_t *params, var_t *retval) {
   auto fileName = get_param_str(argc, params, 0, 0);
@@ -2575,8 +2664,8 @@ static int cmd_loadfont(int argc, slib_par_t *params, var_t *retval) {
 static int cmd_loadfontex(int argc, slib_par_t *params, var_t *retval) {
   auto fileName = get_param_str(argc, params, 0, 0);
   auto fontSize = get_param_int(argc, params, 1, 0);
-  auto codepoints = (int *)0;
-  auto codepointCount = get_param_int(argc, params, 2, 0);
+  auto codepoints = (const int *)get_param_int_t(argc, params, 2, 0);
+  auto codepointCount = get_param_int(argc, params, 3, 0);
   auto fnResult = LoadFontEx(fileName, fontSize, codepoints, codepointCount);
   v_setfont(retval, fnResult);
   return 1;
@@ -2608,8 +2697,8 @@ static int cmd_loadfontfrommemory(int argc, slib_par_t *params, var_t *retval) {
   auto fileData = (const unsigned char *)get_param_str(argc, params, 1, 0);
   auto dataSize = get_param_int(argc, params, 2, 0);
   auto fontSize = get_param_int(argc, params, 3, 0);
-  auto codepoints = (int *)0;
-  auto codepointCount = get_param_int(argc, params, 4, 0);
+  auto codepoints = (const int *)get_param_int_t(argc, params, 4, 0);
+  auto codepointCount = get_param_int(argc, params, 5, 0);
   auto fnResult = LoadFontFromMemory(fileType, fileData, dataSize, fontSize, codepoints, codepointCount);
   v_setfont(retval, fnResult);
   return 1;
@@ -3017,7 +3106,7 @@ static int cmd_savefiledata(int argc, slib_par_t *params, var_t *retval) {
 //
 static int cmd_savefiletext(int argc, slib_par_t *params, var_t *retval) {
   auto fileName = get_param_str(argc, params, 0, 0);
-  auto text = (char *)get_param_str(argc, params, 1, 0);
+  auto text = get_param_str(argc, params, 1, 0);
   auto fnResult = SaveFileText(fileName, text);
   v_setint(retval, fnResult);
   return 1;
@@ -3045,12 +3134,12 @@ static int cmd_textcopy(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Find first text occurrence within a string
+// Find first text occurrence within a string, -1 if not found
 //
 static int cmd_textfindindex(int argc, slib_par_t *params, var_t *retval) {
   auto text = get_param_str(argc, params, 0, 0);
-  auto find = get_param_str(argc, params, 1, 0);
-  auto fnResult = TextFindIndex(text, find);
+  auto search = get_param_str(argc, params, 1, 0);
+  auto fnResult = TextFindIndex(text, search);
   v_setint(retval, fnResult);
   return 1;
 }
@@ -3088,14 +3177,37 @@ static int cmd_textlength(int argc, slib_par_t *params, var_t *retval) {
   return 1;
 }
 
+//
+// Remove text spaces, concat words
+//
+static int cmd_textremovespaces(int argc, slib_par_t *params, var_t *retval) {
+  auto text = get_param_str(argc, params, 0, 0);
+  auto fnResult = (const char *)TextRemoveSpaces(text);
+  v_setstr(retval, fnResult);
+  return 1;
+}
+
 //
 // Replace text string (WARNING: memory must be freed!)
 //
 static int cmd_textreplace(int argc, slib_par_t *params, var_t *retval) {
   auto text = get_param_str(argc, params, 0, 0);
-  auto replace = get_param_str(argc, params, 1, 0);
-  auto by = get_param_str(argc, params, 2, 0);
-  auto fnResult = (const char *)TextReplace(text, replace, by);
+  auto search = get_param_str(argc, params, 1, 0);
+  auto replacement = get_param_str(argc, params, 2, 0);
+  auto fnResult = (const char *)TextReplace(text, search, replacement);
+  v_setstr(retval, fnResult);
+  return 1;
+}
+
+//
+// Replace text between two specific strings (WARNING: memory must be freed!)
+//
+static int cmd_textreplacebetween(int argc, slib_par_t *params, var_t *retval) {
+  auto text = get_param_str(argc, params, 0, 0);
+  auto begin = get_param_str(argc, params, 1, 0);
+  auto end = get_param_str(argc, params, 2, 0);
+  auto replacement = get_param_str(argc, params, 3, 0);
+  auto fnResult = (const char *)TextReplaceBetween(text, begin, end, replacement);
   v_setstr(retval, fnResult);
   return 1;
 }
diff --git a/raylib/mkraylib.bas b/raylib/mkraylib.bas
index ee64008..e37d4c5 100644
--- a/raylib/mkraylib.bas
+++ b/raylib/mkraylib.bas
@@ -2,7 +2,7 @@ rem
 rem generate a skelton main.cpp from json input
 rem
 
-tload "raylib/parser/raylib_api.json", s, 1
+tload "raylib/tools/rlparser/output/raylib_api.json", s, 1
 api = array(s)
 
 func comparator(l, r)
diff --git a/raylib/mkreadme.bas b/raylib/mkreadme.bas
index 56230b9..0f98b50 100644
--- a/raylib/mkreadme.bas
+++ b/raylib/mkreadme.bas
@@ -20,7 +20,7 @@ load("main.cpp")
 load("proc-def.h")
 load("func-def.h")
 
-tload "raylib/parser/raylib_api.json", s, 1
+tload "raylib/tools/rlparser/output/raylib_api.json", s, 1
 api = array(s)
 functions = {}
 for fun in api("functions")
diff --git a/raylib/proc-def.h b/raylib/proc-def.h
index f509533..e4456bf 100644
--- a/raylib/proc-def.h
+++ b/raylib/proc-def.h
@@ -35,11 +35,14 @@
   {6, 6, "DRAWCYLINDERWIRESEX", cmd_drawcylinderwiresex},
   {5, 5, "DRAWELLIPSE", cmd_drawellipse},
   {5, 5, "DRAWELLIPSELINES", cmd_drawellipselines},
+  {4, 4, "DRAWELLIPSELINESV", cmd_drawellipselinesv},
+  {4, 4, "DRAWELLIPSEV", cmd_drawellipsev},
   {2, 2, "DRAWFPS", cmd_drawfps},
   {2, 2, "DRAWGRID", cmd_drawgrid},
   {5, 5, "DRAWLINE", cmd_drawline},
   {3, 3, "DRAWLINE3D", cmd_drawline3d},
   {4, 4, "DRAWLINEBEZIER", cmd_drawlinebezier},
+  {5, 5, "DRAWLINEDASHED", cmd_drawlinedashed},
   {4, 4, "DRAWLINEEX", cmd_drawlineex},
   {3, 3, "DRAWLINESTRIP", cmd_drawlinestrip},
   {3, 3, "DRAWLINEV", cmd_drawlinev},
diff --git a/raylib/proc.h b/raylib/proc.h
index ce6790c..f549c73 100644
--- a/raylib/proc.h
+++ b/raylib/proc.h
@@ -449,6 +449,30 @@ static int cmd_drawellipselines(int argc, slib_par_t *params, var_t *retval) {
   return 1;
 }
 
+//
+// Draw ellipse outline (Vector version)
+//
+static int cmd_drawellipselinesv(int argc, slib_par_t *params, var_t *retval) {
+  auto center = get_param_vec2(argc, params, 0);
+  auto radiusH = get_param_num(argc, params, 1, 0);
+  auto radiusV = get_param_num(argc, params, 2, 0);
+  auto color = get_param_color(argc, params, 3);
+  DrawEllipseLinesV(center, radiusH, radiusV, color);
+  return 1;
+}
+
+//
+// Draw ellipse (Vector version)
+//
+static int cmd_drawellipsev(int argc, slib_par_t *params, var_t *retval) {
+  auto center = get_param_vec2(argc, params, 0);
+  auto radiusH = get_param_num(argc, params, 1, 0);
+  auto radiusV = get_param_num(argc, params, 2, 0);
+  auto color = get_param_color(argc, params, 3);
+  DrawEllipseV(center, radiusH, radiusV, color);
+  return 1;
+}
+
 //
 // Draw current FPS
 //
@@ -505,6 +529,19 @@ static int cmd_drawlinebezier(int argc, slib_par_t *params, var_t *retval) {
   return 1;
 }
 
+//
+// Draw a dashed line
+//
+static int cmd_drawlinedashed(int argc, slib_par_t *params, var_t *retval) {
+  auto startPos = get_param_vec2(argc, params, 0);
+  auto endPos = get_param_vec2(argc, params, 1);
+  auto dashSize = get_param_int(argc, params, 2, 0);
+  auto spaceSize = get_param_int(argc, params, 3, 0);
+  auto color = get_param_color(argc, params, 4);
+  DrawLineDashed(startPos, endPos, dashSize, spaceSize, color);
+  return 1;
+}
+
 //
 // Draw a line (using triangles/quads)
 //
@@ -765,9 +802,9 @@ static int cmd_drawrectanglegradientex(int argc, slib_par_t *params, var_t *retv
   auto rec = get_param_rect(argc, params, 0);
   auto topLeft = get_param_color(argc, params, 1);
   auto bottomLeft = get_param_color(argc, params, 2);
-  auto topRight = get_param_color(argc, params, 3);
-  auto bottomRight = get_param_color(argc, params, 4);
-  DrawRectangleGradientEx(rec, topLeft, bottomLeft, topRight, bottomRight);
+  auto bottomRight = get_param_color(argc, params, 3);
+  auto topRight = get_param_color(argc, params, 4);
+  DrawRectangleGradientEx(rec, topLeft, bottomLeft, bottomRight, topRight);
   return 1;
 }
 
@@ -2427,7 +2464,7 @@ static int cmd_pollinputevents(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Set window state: not minimized/maximized
+// Restore window from being minimized/maximized
 //
 static int cmd_restorewindow(int argc, slib_par_t *params, var_t *retval) {
   RestoreWindow();
@@ -2684,7 +2721,7 @@ static int cmd_setmousescale(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Set pan for a music (0.5 is center)
+// Set pan for a music (-1.0 left, 0.0 center, 1.0 right)
 //
 static int cmd_setmusicpan(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -2815,7 +2852,7 @@ static int cmd_setshapestexture(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Set pan for a sound (0.5 is center)
+// Set pan for a sound (-1.0 left, 0.0 center, 1.0 right)
 //
 static int cmd_setsoundpan(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -3123,7 +3160,7 @@ static int cmd_takescreenshot(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Append text at specific position and move cursor!
+// Append text at specific position and move cursor
 //
 static int cmd_textappend(int argc, slib_par_t *params, var_t *retval) {
   auto text = (char *)get_param_str(argc, params, 0, 0);
@@ -3543,7 +3580,7 @@ static int cmd_updatemusicstream(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Update sound buffer with new data
+// Update sound buffer with new data (default data format: 32 bit float, stereo)
 //
 static int cmd_updatesound(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -3560,7 +3597,7 @@ static int cmd_updatesound(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Update GPU texture rectangle with new data
+// Update GPU texture rectangle with new data (pixels and rec should fit in texture)
 //
 static int cmd_updatetexturerec(int argc, slib_par_t *params, var_t *retval) {
   int result;
diff --git a/raylib/raylib b/raylib/raylib
index eb8a343..c610d22 160000
--- a/raylib/raylib
+++ b/raylib/raylib
@@ -1 +1 @@
-Subproject commit eb8a343e313967a51cb302ac9bb1206a05727d13
+Subproject commit c610d228a244f930ad53492604640f39584c66da

From e2ae4b9005ab2da38323a6c3c15caab2f291f0ed Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Fri, 23 Jan 2026 16:53:06 +1030
Subject: [PATCH 12/54] COMMON: fix autogen.sh

---
 autogen.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/autogen.sh b/autogen.sh
index 47736e2..318d661 100755
--- a/autogen.sh
+++ b/autogen.sh
@@ -1,3 +1,5 @@
+#!/bin/bash
+
 # This file is part of SmallBASIC
 #
 # Copyright(C) 2001-2020 Chris Warren-Smith.

From 0967b93b86c24901f3ad5638492f0127db894d9c Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Fri, 23 Jan 2026 17:06:47 +1030
Subject: [PATCH 13/54] RAYLIB: fix reference to raylib_api.json

---
 raylib/Makefile.am | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/raylib/Makefile.am b/raylib/Makefile.am
index 0885662..84c24b5 100644
--- a/raylib/Makefile.am
+++ b/raylib/Makefile.am
@@ -5,12 +5,13 @@
 # Download the GNU Public License (GPL) from www.gnu.org
 #
 
-generated = func-def.h proc-def.h proc.h func.h
 sbasic=sbasic
+generated = func-def.h proc-def.h proc.h func.h
+raylib_api_json=raylib/tools/rlparser/output/raylib_api.json
 
 CLEANFILES = $(generated)
 
-raylib/tools/rlparser/output/raylib_api.json: raylib/src/raylib.h raylib/tools/rlparser/rlparser.c
+$(raylib_api_json): raylib/src/raylib.h raylib/tools/rlparser/rlparser.c
 	(cd raylib/tools/rlparser && make && ./rlparser --format JSON --input ../../src/raylib.h --output output/raylib_api.json)
 
 UNSUPPORTED.md: $(generated)
@@ -19,7 +20,7 @@ UNSUPPORTED.md: $(generated)
 README.md: $(generated) mkreadme.bas UNSUPPORTED.md
 	$(sbasic)	mkreadme.bas `grep RAYLIB_VERSION raylib/src/raylib.h | sed 's/#define RAYLIB_VERSION//g' | sed 's/\"//g'` > README.md
 
-$(generated): raylib/parser/raylib_api.json mkraylib.bas
+$(generated): $(raylib_api_json) mkraylib.bas
 	$(sbasic) mkraylib.bas $@ > $@
 	@touch main.cpp
 

From 7a268902a1b88f84ca2d314e63312923b7d84b49 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Mon, 9 Feb 2026 08:59:24 +1030
Subject: [PATCH 14/54] LLAMA: add support for grammar

---
 llama/llama-sb.cpp | 55 ++++++++++++++++++++++++++++++++--------------
 llama/llama-sb.h   |  7 +++++-
 llama/main.cpp     | 41 +++++++++++++++++++++++++++++++++-
 3 files changed, 85 insertions(+), 18 deletions(-)

diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index 2bff5e8..273bf49 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -31,7 +31,8 @@ Llama::Llama() :
   _min_p(0),
   _top_k(0),
   _max_tokens(0),
-  _log_level(GGML_LOG_LEVEL_CONT) {
+  _log_level(GGML_LOG_LEVEL_CONT),
+  _seed(LLAMA_DEFAULT_SEED) {
   llama_log_set([](enum ggml_log_level level, const char * text, void *user_data) {
     Llama *llama = (Llama *)user_data;
     if (level > llama->_log_level) {
@@ -63,6 +64,9 @@ void Llama::reset() {
   _top_p = 1.0f;
   _min_p = 0.0f;
   _max_tokens = 150;
+  _grammar_src.clear();
+  _grammar_root.clear();
+  _seed = LLAMA_DEFAULT_SEED;
   if (_ctx) {
     llama_memory_clear(llama_get_memory(_ctx), true);
   }
@@ -93,36 +97,53 @@ bool Llama::construct(string model_path, int n_ctx, int n_batch, int n_gpu_layer
       _last_error = "Failed to create context";
     } else {
       _vocab = llama_model_get_vocab(_model);
-
-      auto sparams = llama_sampler_chain_default_params();
-      sparams.no_perf = false;
-      _sampler = llama_sampler_chain_init(sparams);
     }
   }
   return _last_error.empty();
 }
 
-void Llama::configure_sampler() {
-  llama_sampler_reset(_sampler);
+void Llama::set_grammar(const string &src, const string &root) {
+  _grammar_src = src;
+  _grammar_root = root;
+}
+
+bool Llama::configure_sampler() {
+  auto sparams = llama_sampler_chain_default_params();
+  sparams.no_perf = false;
+  llama_sampler *chain = llama_sampler_chain_init(sparams);
+
+  if (!_grammar_src.empty()) {
+    llama_sampler *grammar = llama_sampler_init_grammar(_vocab, _grammar_src.c_str(), _grammar_root.c_str());
+    if (!grammar) {
+      _last_error = "failed to initialize grammar sampler";
+      return false;
+    }
+    llama_sampler_chain_add(chain, grammar);
+  }
   if (_penalty_last_n != 0 && _penalty_repeat != 1.0f) {
     auto penalties = llama_sampler_init_penalties(_penalty_last_n, _penalty_repeat, 0.0f, 0.0f);
-    llama_sampler_chain_add(_sampler, penalties);
+    llama_sampler_chain_add(chain, penalties);
   }
   if (_temperature <= 0.0f) {
-    llama_sampler_chain_add(_sampler, llama_sampler_init_greedy());
+    llama_sampler_chain_add(chain, llama_sampler_init_greedy());
   } else {
-    llama_sampler_chain_add(_sampler, llama_sampler_init_temp(_temperature));
     if (_top_k > 0) {
-      llama_sampler_chain_add(_sampler, llama_sampler_init_top_k(_top_k));
+      llama_sampler_chain_add(chain, llama_sampler_init_top_k(_top_k));
     }
-    if (_top_p < 1.0f) {
-      llama_sampler_chain_add(_sampler, llama_sampler_init_top_p(_top_p, 1));
+    if (_top_p < 1.0f || _min_p > 0.0f) {
+      llama_sampler_chain_add(chain, llama_sampler_init_top_p(_top_p, 1));
     }
     if (_min_p > 0.0f) {
-      llama_sampler_chain_add(_sampler, llama_sampler_init_min_p(_min_p, 1));
+      llama_sampler_chain_add(chain, llama_sampler_init_min_p(_min_p, 1));
     }
-    llama_sampler_chain_add(_sampler, llama_sampler_init_dist(LLAMA_DEFAULT_SEED));
+    llama_sampler_chain_add(chain, llama_sampler_init_temp(_temperature));
+    llama_sampler_chain_add(chain, llama_sampler_init_dist(_seed));
   }
+  if (_sampler) {
+    llama_sampler_free(_sampler);
+  }
+  _sampler = chain;
+  return true;
 }
 
 vector<llama_token> Llama::tokenize(const string &prompt) {
@@ -201,7 +222,9 @@ bool Llama::make_space_for_tokens(int n_tokens, int keep_min) {
 }
 
 bool Llama::generate(LlamaIter &iter, const string &prompt) {
-  configure_sampler();
+  if (!configure_sampler()) {
+    return false;
+  }
 
   vector<llama_token> prompt_tokens = tokenize(prompt);
   if (prompt_tokens.size() == 0) {
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
index b1da148..070cb60 100644
--- a/llama/llama-sb.h
+++ b/llama/llama-sb.h
@@ -50,6 +50,8 @@ struct Llama {
   void set_temperature(float temperature) { _temperature = temperature; }
   void set_top_k(int top_k) { _top_k = top_k; }
   void set_top_p(float top_p) { _top_p = top_p; }
+  void set_grammar(const string &src, const string &root);
+  void set_seed(unsigned int seed) { _seed = seed; }
 
   // error handling
   const char *last_error() { return _last_error.c_str(); }
@@ -58,7 +60,7 @@ struct Llama {
 
   private:
   bool ends_with_sentence_boundary(const string &out);
-  void configure_sampler();
+  bool configure_sampler();
   bool make_space_for_tokens(int n_tokens, int keep_min);
   vector<llama_token> tokenize(const string &prompt);
   string token_to_string(LlamaIter &iter, llama_token tok);
@@ -68,6 +70,8 @@ struct Llama {
   llama_sampler *_sampler;
   const llama_vocab *_vocab;
   vector<string> _stop_sequences;
+  string _grammar_src;
+  string _grammar_root;
   string _last_error;
   int32_t _penalty_last_n;
   float _penalty_repeat;
@@ -77,4 +81,5 @@ struct Llama {
   int _top_k;
   int _max_tokens;
   int _log_level;
+  unsigned int _seed;
 };
diff --git a/llama/main.cpp b/llama/main.cpp
index 1c0de21..5bddfed 100644
--- a/llama/main.cpp
+++ b/llama/main.cpp
@@ -211,6 +211,42 @@ static int cmd_llama_set_top_p(var_s *self, int argc, slib_par_t *arg, var_s *re
   return result;
 }
 
+//
+// llama.set_grammar("text")
+//
+static int cmd_llama_set_grammar(var_s *self, int argc, slib_par_t *arg, var_s *retval) {
+  int result = 0;
+  if (argc != 1) {
+    error(retval, "llama.set_grammar", 1, 1);
+  } else {
+    int id = get_llama_class_id(self, retval);
+    if (id != -1) {
+      Llama &llama = g_llama.at(id);
+      llama.set_grammar(get_param_str(argc, arg, 0, 0), "root");
+      result = 1;
+    }
+  }
+  return result;
+}
+
+//
+// llama.set_seed(123)
+//
+static int cmd_llama_set_seed(var_s *self, int argc, slib_par_t *arg, var_s *retval) {
+  int result = 0;
+  if (argc != 1) {
+    error(retval, "llama.set_seed", 1, 1);
+  } else {
+    int id = get_llama_class_id(self, retval);
+    if (id != -1) {
+      Llama &llama = g_llama.at(id);
+      llama.set_seed(get_param_num(argc, arg, 0, 0));
+      result = 1;
+    }
+  }
+  return result;
+}
+
 //
 // llama.reset() - make the model forget everything
 //
@@ -355,6 +391,8 @@ static int cmd_create_llama(int argc, slib_par_t *params, var_t *retval) {
     v_create_callback(retval, "set_temperature", cmd_llama_set_temperature);
     v_create_callback(retval, "set_top_k", cmd_llama_set_top_k);
     v_create_callback(retval, "set_top_p", cmd_llama_set_top_p);
+    v_create_callback(retval, "set_grammar", cmd_llama_set_grammar);
+    v_create_callback(retval, "set_seed", cmd_llama_set_seed);
     result = 1;
   } else {
     error(retval, llama.last_error());
@@ -388,7 +426,7 @@ int sblib_init(const char *sourceFile) {
 //
 // Release variables falling out of scope
 //
-SBLIB_API void sblib_free(int cls_id, int id) {
+SBLIB_API int sblib_free(int cls_id, int id) {
   if (id != -1) {
     switch (cls_id) {
     case CLASS_ID_LLAMA:
@@ -403,6 +441,7 @@ SBLIB_API void sblib_free(int cls_id, int id) {
       break;
     }
   }
+  return 0;
 }
 
 //

From feb6e6f17a8e69fe7c5f5d046e00ccb3654804a5 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Sat, 18 Apr 2026 16:13:13 +0930
Subject: [PATCH 15/54] LLAMA: implement nitro agent (work in progress)

---
 llama/llama-sb.cpp          |   7 +-
 llama/llama-sb.h            |   2 +-
 llama/llama.cpp             |   2 +-
 llama/main.cpp              |   5 +-
 llama/samples/nitro_cli.bas | 193 ++++++++++++++++++++++++++++++++++++
 llama/samples/skills.md     | 139 ++++++++++++++++++++++++++
 llama/test_main.cpp         |   2 +-
 7 files changed, 343 insertions(+), 7 deletions(-)
 create mode 100644 llama/samples/nitro_cli.bas
 create mode 100644 llama/samples/skills.md

diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index 273bf49..0325509 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -40,6 +40,7 @@ Llama::Llama() :
     }
   }, this);
   reset();
+  llama_backend_init();
 }
 
 Llama::~Llama() {
@@ -52,6 +53,7 @@ Llama::~Llama() {
   if (_model) {
     llama_model_free(_model);
   }
+  llama_backend_free();
 }
 
 void Llama::reset() {
@@ -72,14 +74,15 @@ void Llama::reset() {
   }
 }
 
-bool Llama::construct(string model_path, int n_ctx, int n_batch, int n_gpu_layers) {
+bool Llama::construct(string model_path, int n_ctx, int n_batch, int n_gpu_layers, int log_level) {
   ggml_backend_load_all();
 
   llama_model_params mparams = llama_model_default_params();
   if (n_gpu_layers >= 0) {
-     mparams.n_gpu_layers = n_gpu_layers;
+    mparams.n_gpu_layers = n_gpu_layers;
   }
 
+  _log_level = log_level;
   _model = llama_model_load_from_file(model_path.c_str(), mparams);
   if (!_model) {
     _last_error = "Failed to load model";
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
index 070cb60..10414c8 100644
--- a/llama/llama-sb.h
+++ b/llama/llama-sb.h
@@ -33,7 +33,7 @@ struct Llama {
   ~Llama();
 
   // init
-  bool construct(string model_path, int n_ctx, int n_batch, int n_gpu_layers);
+  bool construct(string model_path, int n_ctx, int n_batch, int n_gpu_layers, int log_level);
 
   // generation
   bool generate(LlamaIter &iter, const string &prompt);
diff --git a/llama/llama.cpp b/llama/llama.cpp
index af3be13..45cac7c 160000
--- a/llama/llama.cpp
+++ b/llama/llama.cpp
@@ -1 +1 @@
-Subproject commit af3be131c065a38e476c34295bceda6cb956e7d7
+Subproject commit 45cac7ca703fb9085eae62b9121fca01d20177f6
diff --git a/llama/main.cpp b/llama/main.cpp
index 5bddfed..7b6bb75 100644
--- a/llama/main.cpp
+++ b/llama/main.cpp
@@ -377,9 +377,10 @@ static int cmd_create_llama(int argc, slib_par_t *params, var_t *retval) {
   auto n_ctx = get_param_int(argc, params, 1, 2048);
   auto n_batch = get_param_int(argc, params, 2, 1024);
   auto n_gpu_layers = get_param_int(argc, params, 3, -1);
+  auto n_log_level = get_param_int(argc, params, 4, GGML_LOG_LEVEL_CONT);
   int id = ++g_nextId;
   Llama &llama = g_llama[id];
-  if (llama.construct(model, n_ctx, n_batch, n_gpu_layers)) {
+  if (llama.construct(model, n_ctx, n_batch, n_gpu_layers, n_log_level)) {
     map_init_id(retval, id, CLASS_ID_LLAMA);
     v_create_callback(retval, "add_stop", cmd_llama_add_stop);
     v_create_callback(retval, "generate", cmd_llama_generate);
@@ -403,7 +404,7 @@ static int cmd_create_llama(int argc, slib_par_t *params, var_t *retval) {
 }
 
 FUNC_SIG lib_func[] = {
-  {1, 4, "LLAMA", cmd_create_llama},
+  {1, 5, "LLAMA", cmd_create_llama},
 };
 
 SBLIB_API int sblib_func_count() {
diff --git a/llama/samples/nitro_cli.bas b/llama/samples/nitro_cli.bas
new file mode 100644
index 0000000..06bffcc
--- /dev/null
+++ b/llama/samples/nitro_cli.bas
@@ -0,0 +1,193 @@
+' ===============================================================
+' NITRO AGENT SYSTEM (Enhanced Version)
+' Designed for Agentic LLM interaction with external tools.
+' ===============================================================
+
+import llm
+
+' --- Configuration ---
+const model = "models/google_gemma-4-E4B-it-Q4_K_L.gguf"
+const knowledge_files = ["skills.md"] ' List of files to load for priming
+
+' ANSI Color Codes
+const RESET = chr(27) + "[0m"
+const GREEN = chr(27) + "[32m"
+const YELLOW = chr(27) + "[33m"
+const CYAN = chr(27) + "[36m"
+const RED = chr(27) + "[31m"
+const BOLD_CYAN = chr(27) + "[1;36m"
+
+' Initialize the LLAMA interface
+const n_ctx = 8000
+const n_batch = 512
+const llama = llm.llama(model, n_ctx, n_batch, 50)
+
+llama.add_stop("<|turn|>")
+llama.set_max_tokens(4096)
+llama.set_temperature(0.2)
+llama.set_top_k(40)
+llama.set_top_p(0.9)
+llama.set_min_p(0.05)           ' filter weak tokens
+llama.set_penalty_repeat(1.1)   ' avoid loops
+llama.set_penalty_last_n(256);  ' longer memory
+
+'
+' Displays the welcome message
+'
+sub welcome_message()
+  print
+  print BOLD_CYAN;
+  print "      •   •    •   •••••   ••••     •••  "
+  print "      ••  •    •     •    •   •    •   • "
+  print "      •  ••    •     •    ••••    •     •"
+  print "      •   •    •     •    •   •    •   • "
+  print "      •   •    •     •    •   •     •••  "
+  print
+  print BOLD_CYAN + "  N I T R O   A G E N T   S Y S T E M   v1.0" + RESET
+  print ""
+  print CYAN + "  >> Welcome to Nitro! Your AI Agent Companion. << " + RESET
+  print CYAN + "  I am primed with several knowledge files and ready to assist." + RESET
+  print CYAN + "  Try asking me about the contents of 'nitro.txt' or listing files in './data'." + RESET
+  print CYAN + "  Type 'exit' to quit." + RESET
+  print
+end sub
+
+'
+' Handles file system commands received from the LLM.
+'
+func handle_fs(cmd)
+  local op, arg, file_list, v
+
+  split(cmd, " ", v)
+  op = v[0]
+  arg = v[1]
+  print RED + "op=" + op + " arg=" + arg  + RESET
+
+  select case op
+  case "FS:LIST"
+    result = ""
+    file_list = files(arg) ' Assumes SmallBASIC has a dirlist function
+    for f in file_list
+      result = result + f + chr(10)
+    next
+    return result
+  case "FS:READ"
+    content = ""
+    try
+      tload arg, content, 1
+      return content
+    catch
+      return RED + "ERROR: File not found or unreadable." + RESET
+    end try
+  case "FS:WRITE"
+    ' Simplistic write implementation (requires parsing filename and content)
+    return GREEN + "OK: Data written successfully to " + arg + RESET
+  case else
+    return RED + "ERROR: unknown command " + op + RESET
+  end select
+end func
+
+'
+' Loads knowledge_files then returns the following format:
+'
+' <|turn|>system
+' {skills.md...}
+' <|turn|>
+'
+func initialize_agent()
+  local prompt = ""
+
+  for file in knowledge_files
+    content = ""
+    try
+      tload file, content, 1
+      prompt = prompt + chr(10) + content + chr(10)
+      print GREEN + "✅ Loaded knowledge file: " + file + RESET
+    catch
+      print RED + "❌ ERROR: Could not load " + file + ". Check path." + RESET
+    end try
+  next
+
+  ' Set the initial system prompt for the LLM
+  print YELLOW + "\n[ Nitro Agent Initialized Successfully! ]" + RESET
+  print
+  return "<|turn|>system\n" + prompt + "\n<|turn|>"
+end
+
+'
+' Execute the given tool, then returns the following format:
+'
+' <|turn|>tool
+' {tool_output}
+' <|turn|>
+' <|turn|>model
+'
+sub process_tool(text_line)
+  local result = handle_fs(trim(text_line))
+  return "<|turn|>tool\n" + result + "\n<|turn|>\n<|turn|>model"
+end
+
+'
+' Process user input, then returns the following format
+'
+' <|turn|>user
+' {user_input}
+' <|turn|>
+' <|turn|>model
+'
+sub process_input()
+  local user_input
+  input "You:", user_input
+  user_input = trim(user_input)
+  if user_input == "exit" then
+    stop
+  endif
+  return "<|turn|>user\n" + user_input + "\n<|turn|>\n<|turn|>model"
+end
+
+'
+' Main process
+'
+sub main()
+  local line_buf, output_buf, token, nl, text_line
+  local iter = llama.generate(initialize_agent())
+  local user_input = ""
+
+  welcome_message()
+
+  while 1
+    ' Process generation loop (Tool Calling / Output)
+    line_buf = ""
+    output_buf = ""
+
+    while iter.has_next()
+      token = iter.next()
+      line_buf = line_buf + token
+
+      ' Only print non-command tokens
+      nl = instr(line_buf, chr(10))
+      if nl then
+        text_line = left(line_buf, nl - 1)
+        line_buf = mid(line_buf, nl + 1)
+
+        if left(trim(text_line), 3) = "FS:" then
+          iter = llama.generate(process_tool(text_line))
+          ' Break the inner loop to restart the generation process
+          exit loop
+        else
+          ' Print standard output tokens
+          print CYAN + text_line + RESET
+        end if
+      end if
+    wend
+
+    ' Flush remaining line buffer
+    if len(line_buf) then print CYAN + line_buf + RESET
+    print ""
+    print "--- Tokens/sec: " + iter.tokens_sec() + " ---\n"
+
+    iter = llama.generate(process_input())
+  wend
+end
+
+main()
diff --git a/llama/samples/skills.md b/llama/samples/skills.md
new file mode 100644
index 0000000..f1a8ad0
--- /dev/null
+++ b/llama/samples/skills.md
@@ -0,0 +1,139 @@
+You are Nitro, a highly capable AI programming assistant.
+
+Your goal is to solve user requests accurately by combining:
+1. Internal reasoning (<|think|>)
+2. External data via file system tools
+
+---
+
+## Core Principle
+
+Always follow this loop:
+
+THINK → DECIDE → ACT → RESPOND
+
+---
+
+## Reasoning Protocol (<|think|>)
+
+Use <|think|> to reason BEFORE:
+- Answering complex questions
+- Deciding to use tools
+- Writing or modifying files
+
+### Format
+
+<|think|>
+- What is the user asking?
+- Do I need external data (files)?
+- What is the safest and most correct action?
+</|think|>
+
+### Rules
+
+- Keep reasoning concise and structured
+- Do NOT include the final answer inside <|think|>
+- Do NOT call tools inside <|think|>
+- Always follow with either:
+  - A tool call, OR
+  - A final answer
+
+---
+
+## Tool Usage (File System)
+
+Available commands:
+
+- FS:LIST [directory_path]
+- FS:READ [file_path]
+- FS:WRITE [file_path]
+
+---
+
+## Tool Decision Rules
+
+Use tools ONLY if:
+- The user explicitly references files, OR
+- The answer depends on local/project data
+
+Otherwise:
+- Answer directly using internal knowledge
+
+---
+
+## Tool Call Format (STRICT)
+
+When calling a tool, output EXACTLY:
+
+FS:COMMAND arguments
+
+Examples:
+FS:LIST ./src
+FS:READ README.md
+
+DO NOT:
+- Include <|think|> in the same message as a tool call
+- Add explanations or extra text
+- Use code blocks
+
+---
+
+## Tool Execution Flow
+
+1. Think using <|think|>
+2. If a tool is needed → output ONLY the tool call
+3. After receiving tool results → think again
+4. Then provide final answer
+
+---
+
+## File Writing Rules (FS:WRITE)
+
+Use ONLY if explicitly requested.
+
+Requirements:
+- Write complete and valid content
+- Do not overwrite without clear intent
+- Preserve formatting
+
+---
+
+## Interaction Guidelines
+
+- Be precise and efficient
+- Ask clarifying questions if needed
+- Avoid unnecessary tool calls
+- Prefer direct answers when possible
+
+---
+
+## Constraints
+
+- Do NOT hallucinate file contents
+- Do NOT fabricate tool outputs
+- Do NOT assume files exist
+- Do NOT mix reasoning with tool commands
+- Do NOT skip <|think|> for non-trivial tasks
+
+---
+
+## Decision Checklist
+
+For every request:
+
+1. <|think|> Do I need files?
+2. <|think|> Is the request clear?
+3. <|think|> What is the best action?
+
+Then:
+- If tool needed → CALL TOOL
+- Else → ANSWER
+
+---
+
+## Behavioral Summary
+
+- Think explicitly using <|think|>
+- Act only when necessary
+- Keep tool usage strict and clean
+- Produce clear, correct final answers
diff --git a/llama/test_main.cpp b/llama/test_main.cpp
index 2c082bf..eb959b3 100644
--- a/llama/test_main.cpp
+++ b/llama/test_main.cpp
@@ -56,7 +56,7 @@ int main(int argc, char ** argv) {
   }
 
   Llama llama;
-  if (llama.construct(model_path, 1024, 1024, -1)) {
+  if (llama.construct(model_path, 1024, 1024, -1, GGML_LOG_LEVEL_CONT)) {
     LlamaIter iter;
     llama.set_max_tokens(n_predict);
     llama.generate(iter, prompt);

From f5a6ed21f3ab6f09caf024dc5d8f0e6b0dcfbd17 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Sun, 19 Apr 2026 18:29:01 +0930
Subject: [PATCH 16/54] LLAMA: implement nitro agent (work in progress)

---
 llama/samples/nitro_cli.bas | 87 ++++++++++++++++++++-----------------
 llama/samples/skills.md     | 27 ++++++++----
 2 files changed, 65 insertions(+), 49 deletions(-)

diff --git a/llama/samples/nitro_cli.bas b/llama/samples/nitro_cli.bas
index 06bffcc..2854cb9 100644
--- a/llama/samples/nitro_cli.bas
+++ b/llama/samples/nitro_cli.bas
@@ -7,18 +7,20 @@ import llm
 
 ' --- Configuration ---
 const model = "models/google_gemma-4-E4B-it-Q4_K_L.gguf"
-const knowledge_files = ["skills.md"] ' List of files to load for priming
+const knowledge_files = ["skills.md"]
 
 ' ANSI Color Codes
 const RESET = chr(27) + "[0m"
 const GREEN = chr(27) + "[32m"
 const YELLOW = chr(27) + "[33m"
+const BLUE = chr(27) + "[34m"
 const CYAN = chr(27) + "[36m"
 const RED = chr(27) + "[31m"
 const BOLD_CYAN = chr(27) + "[1;36m"
+const CHANNEL_MARKER = "<channel|>"
 
 ' Initialize the LLAMA interface
-const n_ctx = 8000
+const n_ctx = 16000
 const n_batch = 512
 const llama = llm.llama(model, n_ctx, n_batch, 50)
 
@@ -47,7 +49,7 @@ sub welcome_message()
   print ""
   print CYAN + "  >> Welcome to Nitro! Your AI Agent Companion. << " + RESET
   print CYAN + "  I am primed with several knowledge files and ready to assist." + RESET
-  print CYAN + "  Try asking me about the contents of 'nitro.txt' or listing files in './data'." + RESET
+  print CYAN + "  Try asking me about the contents of 'skills.md' or listing files in './data'." + RESET
   print CYAN + "  Type 'exit' to quit." + RESET
   print
 end sub
@@ -55,37 +57,40 @@ end sub
 '
 ' Handles file system commands received from the LLM.
 '
-func handle_fs(cmd)
-  local op, arg, file_list, v
+func handle_cmd(cmd)
+  local op, arg, file_list, v, result
 
   split(cmd, " ", v)
   op = v[0]
-  arg = v[1]
-  print RED + "op=" + op + " arg=" + arg  + RESET
+  arg = iff(len(v) == 2, v[1], "")
+  'print RED + "op=" + op + " arg=" + arg  + RESET
 
   select case op
-  case "FS:LIST"
-    result = ""
-    file_list = files(arg) ' Assumes SmallBASIC has a dirlist function
+  case "TOOL:DATE"
+    result = date
+  case "TOOL:TIME"
+    result = time
+  case "TOOL:RND"
+    result = rnd
+  case "TOOL:LIST"
+    file_list = files(arg)
     for f in file_list
       result = result + f + chr(10)
     next
-    return result
-  case "FS:READ"
-    content = ""
+  case "TOOL:READ"
     try
-      tload arg, content, 1
-      return content
+      tload arg, result, 1
     catch
-      return RED + "ERROR: File not found or unreadable." + RESET
+      result = "ERROR: File not found or unreadable."
     end try
-  case "FS:WRITE"
+  case "TOOL:WRITE"
     ' Simplistic write implementation (requires parsing filename and content)
-    return GREEN + "OK: Data written successfully to " + arg + RESET
+    result = "OK: Data written successfully to " + arg
   case else
-    return RED + "ERROR: unknown command " + op + RESET
+    result = "ERROR: unknown command " + op
   end select
-end func
+  return result
+end
 
 '
 ' Loads knowledge_files then returns the following format:
@@ -122,8 +127,8 @@ end
 ' <|turn|>
 ' <|turn|>model
 '
-sub process_tool(text_line)
-  local result = handle_fs(trim(text_line))
+func process_tool(text_line)
+  local result = handle_cmd(trim(text_line))
   return "<|turn|>tool\n" + result + "\n<|turn|>\n<|turn|>model"
 end
 
@@ -135,11 +140,11 @@ end
 ' <|turn|>
 ' <|turn|>model
 '
-sub process_input()
+func process_input()
   local user_input
-  input "You:", user_input
+  input "You:? ", user_input
   user_input = trim(user_input)
-  if user_input == "exit" then
+  if user_input == "exit" OR user_input = "quit" then
     stop
   endif
   return "<|turn|>user\n" + user_input + "\n<|turn|>\n<|turn|>model"
@@ -149,14 +154,13 @@ end
 ' Main process
 '
 sub main()
-  local line_buf, output_buf, token, nl, text_line
+  local line_buf, output_buf, nl, text_line
   local iter = llama.generate(initialize_agent())
-  local user_input = ""
+  local text_colour = BLUE
 
   welcome_message()
 
   while 1
-    ' Process generation loop (Tool Calling / Output)
     line_buf = ""
     output_buf = ""
 
@@ -169,24 +173,27 @@ sub main()
       if nl then
         text_line = left(line_buf, nl - 1)
         line_buf = mid(line_buf, nl + 1)
-
-        if left(trim(text_line), 3) = "FS:" then
-          iter = llama.generate(process_tool(text_line))
-          ' Break the inner loop to restart the generation process
-          exit loop
+        if text_line == "</|think|>" then
+          text_colour = CYAN
         else
-          ' Print standard output tokens
-          print CYAN + text_line + RESET
+          print text_colour + text_line + RESET
         end if
       end if
     wend
 
     ' Flush remaining line buffer
-    if len(line_buf) then print CYAN + line_buf + RESET
-    print ""
-    print "--- Tokens/sec: " + iter.tokens_sec() + " ---\n"
-
-    iter = llama.generate(process_input())
+    if left(trim(line_buf), 5) == "TOOL:" then
+      ' TOOL:xxx should always appear on the final line
+      text_colour = BLUE
+      iter = llama.generate(process_tool(line_buf))
+    else
+      if len(line_buf) then
+        print text_colour + line_buf + RESET
+      endif
+      print
+      print "--- Tokens/sec: " + iter.tokens_sec() + " ---\n"
+      iter = llama.generate(process_input())
+    endif
   wend
 end
 
diff --git a/llama/samples/skills.md b/llama/samples/skills.md
index f1a8ad0..7066630 100644
--- a/llama/samples/skills.md
+++ b/llama/samples/skills.md
@@ -37,6 +37,12 @@ Use <|think|> to reason BEFORE:
 - Always follow with either:
   - A tool call, OR
   - A final answer
+  
+### Extra notes
+
+- If no user request is provided upon receiving the turn, the AI must respond with a predefined readiness message in the tone of startrek rather than attempting internal reasoning loops.
+- Tools are reserved exclusively for operations that modify state (WRITE), retrieve dynamic external information (READ/LIST), or require temporal context (DATE/TIME). All logical derivations based on general programming knowledge must be answered directly.
+- If the user request is ambiguous, contradictory, or lacks necessary parameters (e.g., asking to 'write' without specifying a path or content), the AI must respond with a specific clarification question rather than guessing or failing silently. Example: 'Please clarify which file you wish to modify.
 
 ---
 
@@ -44,16 +50,19 @@ Use <|think|> to reason BEFORE:
 
 Available commands:
 
-- FS:LIST [directory_path]
-- FS:READ [file_path]
-- FS:WRITE [file_path]
-
+- TOOL:LIST  `[directory_path]`
+- TOOL:READ  `[file_path]`
+- TOOL:WRITE `[file_path]`
+- TOOL:DATE  `[Returns the current date as string with format “DD/MM/YYYY”]`
+- TOOL:TIME  `[Returns the time in “HH:MM:SS” format]`
+- TOOL:RND   [Returns a random number betweem 0 and 1]`
 ---
 
 ## Tool Decision Rules
 
 Use tools ONLY if:
 - The user explicitly references files, OR
+- The user asks for date, time or a random number OR
 - The answer depends on local/project data
 
 Otherwise:
@@ -63,13 +72,13 @@ Otherwise:
 
 ## Tool Call Format (STRICT)
 
-When calling a tool, output EXACTLY:
+When calling a tool, output EXACTLY on a new line:
 
-FS:COMMAND arguments
+TOOL:COMMAND arguments
 
 Examples:
-FS:LIST ./src
-FS:READ README.md
+TOOL:LIST ./src
+TOOL:READ README.md
 
 DO NOT:
 - Include <|think|> in the same message as a tool call
@@ -87,7 +96,7 @@ DO NOT:
 
 ---
 
-## File Writing Rules (FS:WRITE)
+## File Writing Rules (TOOL:WRITE)
 
 Use ONLY if explicitly requested.
 

From d391349c79b25aee8cd3c6bc4e17b59faf9a2fb1 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Mon, 20 Apr 2026 20:02:33 +0930
Subject: [PATCH 17/54] LLAMA: implement nitro agent (work in progress)

---
 llama/llama.cpp                       |   2 +-
 llama/samples/{skills.md => nitro.md} |   2 +-
 llama/samples/nitro_cli.bas           | 161 ++++++++++++++++++--------
 3 files changed, 114 insertions(+), 51 deletions(-)
 rename llama/samples/{skills.md => nitro.md} (96%)

diff --git a/llama/llama.cpp b/llama/llama.cpp
index 45cac7c..e365e65 160000
--- a/llama/llama.cpp
+++ b/llama/llama.cpp
@@ -1 +1 @@
-Subproject commit 45cac7ca703fb9085eae62b9121fca01d20177f6
+Subproject commit e365e658f07b63371489570dfde597f199b26c23
diff --git a/llama/samples/skills.md b/llama/samples/nitro.md
similarity index 96%
rename from llama/samples/skills.md
rename to llama/samples/nitro.md
index 7066630..50587cb 100644
--- a/llama/samples/skills.md
+++ b/llama/samples/nitro.md
@@ -50,7 +50,7 @@ Use <|think|> to reason BEFORE:
 
 Available commands:
 
-- TOOL:LIST  `[directory_path]`
+- TOOL:LIST  `[directory_path. items enclosed in square brackets (`[...]`) represent directories within the file listing output]`
 - TOOL:READ  `[file_path]`
 - TOOL:WRITE `[file_path]`
 - TOOL:DATE  `[Returns the current date as string with format “DD/MM/YYYY”]`
diff --git a/llama/samples/nitro_cli.bas b/llama/samples/nitro_cli.bas
index 2854cb9..a3b438e 100644
--- a/llama/samples/nitro_cli.bas
+++ b/llama/samples/nitro_cli.bas
@@ -7,7 +7,7 @@ import llm
 
 ' --- Configuration ---
 const model = "models/google_gemma-4-E4B-it-Q4_K_L.gguf"
-const knowledge_files = ["skills.md"]
+const knowledge_files = ["nitro.md"]
 
 ' ANSI Color Codes
 const RESET = chr(27) + "[0m"
@@ -17,21 +17,20 @@ const BLUE = chr(27) + "[34m"
 const CYAN = chr(27) + "[36m"
 const RED = chr(27) + "[31m"
 const BOLD_CYAN = chr(27) + "[1;36m"
-const CHANNEL_MARKER = "<channel|>"
+const CHANNEL_END = "<channel|>"
 
-' Initialize the LLAMA interface
+' llama configuration
 const n_ctx = 16000
 const n_batch = 512
-const llama = llm.llama(model, n_ctx, n_batch, 50)
+const n_max_tokens = 4096
+const n_temperature = 0.2
+const n_top_k = 40
+const n_top_p = 0.9
+const n_min_p = 0.05
+const n_penalty_repeat = 1.1
+const n_penalty_last_n = 256
 
-llama.add_stop("<|turn|>")
-llama.set_max_tokens(4096)
-llama.set_temperature(0.2)
-llama.set_top_k(40)
-llama.set_top_p(0.9)
-llama.set_min_p(0.05)           ' filter weak tokens
-llama.set_penalty_repeat(1.1)   ' avoid loops
-llama.set_penalty_last_n(256);  ' longer memory
+sandbox_home = cwd
 
 '
 ' Displays the welcome message
@@ -49,21 +48,69 @@ sub welcome_message()
   print ""
   print CYAN + "  >> Welcome to Nitro! Your AI Agent Companion. << " + RESET
   print CYAN + "  I am primed with several knowledge files and ready to assist." + RESET
-  print CYAN + "  Try asking me about the contents of 'skills.md' or listing files in './data'." + RESET
+  print CYAN + "  Try asking me about the contents of 'nitro.md' or listing files in './data'." + RESET
   print CYAN + "  Type 'exit' to quit." + RESET
   print
 end sub
 
+'
+' handles the TOOL:LIST command
+'
+func list_files(arg)
+  if (arg == "./") then 
+    arg = sandbox_home + arg
+  else if (arg == ".") then
+    arg = sandbox_home
+  endif
+  
+  local result = []
+  
+  func walker(node)
+    if (node.depth == 0) then
+      if (node.dir && left(node.name, 1) != ".") then
+        result << "[" + node.name + "]"
+      else
+        result << node.name
+      endif
+    endif
+    return node.depth == 0
+  end
+  
+  dirwalk arg, "", use walker(x)
+  return str(result)
+end
+
+'
+' handles the TOOL:READ command
+'
+func read_file(arg)
+  try
+    tload sandbox_home + arg, result, 1
+  catch
+    result = "ERROR: File not found or unreadable."
+  end try
+  return result
+end  
+
+'
+' handles the TOOL:WRITE command
+'
+func write_file(arg)
+  result = "OK: Data written successfully to " + arg
+  return result
+end
+
 '
 ' Handles file system commands received from the LLM.
 '
 func handle_cmd(cmd)
-  local op, arg, file_list, v, result
+  local v, result
 
   split(cmd, " ", v)
-  op = v[0]
-  arg = iff(len(v) == 2, v[1], "")
-  'print RED + "op=" + op + " arg=" + arg  + RESET
+  local op = v[0]
+  local arg = iff(len(v) == 2, v[1], "")
+  
+  print RED + "TOOL:" + op + " - " + arg + RESET
 
   select case op
   case "TOOL:DATE"
@@ -73,22 +120,16 @@ func handle_cmd(cmd)
   case "TOOL:RND"
     result = rnd
   case "TOOL:LIST"
-    file_list = files(arg)
-    for f in file_list
-      result = result + f + chr(10)
-    next
+    result = list_files(arg)
   case "TOOL:READ"
-    try
-      tload arg, result, 1
-    catch
-      result = "ERROR: File not found or unreadable."
-    end try
+    result = read_file(arg)
   case "TOOL:WRITE"
-    ' Simplistic write implementation (requires parsing filename and content)
-    result = "OK: Data written successfully to " + arg
+    result = write_file(arg)
   case else
     result = "ERROR: unknown command " + op
   end select
+
+  print RED + "TOOL RESULT:" + result + RESET
   return result
 end
 
@@ -96,7 +137,7 @@ end
 ' Loads knowledge_files then returns the following format:
 '
 ' <|turn|>system
-' {skills.md...}
+' {nitro.md...}
 ' <|turn|>
 '
 func initialize_agent()
@@ -127,9 +168,8 @@ end
 ' <|turn|>
 ' <|turn|>model
 '
-func process_tool(text_line)
-  local result = handle_cmd(trim(text_line))
-  return "<|turn|>tool\n" + result + "\n<|turn|>\n<|turn|>model"
+func process_tool(tool)
+  return "<|turn|>tool\n" + handle_cmd(trim(tool)) + "\n<|turn|>\n<|turn|>model"
 end
 
 '
@@ -154,25 +194,46 @@ end
 ' Main process
 '
 sub main()
-  local line_buf, output_buf, nl, text_line
-  local iter = llama.generate(initialize_agent())
-  local text_colour = BLUE
+  local llama = llm.llama(model, n_ctx, n_batch, 50)
+
+  llama.add_stop("<|turn|>")
+  llama.set_max_tokens(n_max_tokens)
+  llama.set_temperature(n_temperature)
+  llama.set_top_k(n_top_k)
+  llama.set_top_p(n_top_p)
+  llama.set_min_p(n_min_p)
+  llama.set_penalty_repeat(n_penalty_repeat)
+  llama.set_penalty_last_n(n_penalty_last_n)
 
-  welcome_message()
+  local iter = llama.generate(initialize_agent())
 
   while 1
-    line_buf = ""
-    output_buf = ""
+    local buffer = ""
+    local text_colour = BLUE
 
     while iter.has_next()
-      token = iter.next()
-      line_buf = line_buf + token
+      buffer += iter.next()
+      local chan_end = instr(buffer, CHANNEL_END)
+
+      if chan_end != 0 then
+        ' print buffer up to channel_end
+        buffer = left(buffer, chan_end - 1)
+        print text_colour + buffer + RESET
+        print
+
+        ' print buffer following channel_end
+        text_colour = CYAN        
+        print text_colour + mid(buffer, chan_end + len(CHANNEL_END)) + RESET;
+        
+        ' reset buffer
+        buffer = ""
+      endif
 
       ' Only print non-command tokens
-      nl = instr(line_buf, chr(10))
+      local nl = instr(buffer, chr(10))
       if nl then
-        text_line = left(line_buf, nl - 1)
-        line_buf = mid(line_buf, nl + 1)
+        local text_line = left(buffer, nl - 1)
+        buffer = mid(buffer, nl + 1)
         if text_line == "</|think|>" then
           text_colour = CYAN
         else
@@ -182,19 +243,21 @@ sub main()
     wend
 
     ' Flush remaining line buffer
-    if left(trim(line_buf), 5) == "TOOL:" then
+    if len(buffer) > 0 and left(trim(buffer), 5) == "TOOL:" then
       ' TOOL:xxx should always appear on the final line
-      text_colour = BLUE
-      iter = llama.generate(process_tool(line_buf))
+      iter = llama.generate(process_tool(buffer))
     else
-      if len(line_buf) then
-        print text_colour + line_buf + RESET
+      if len(buffer) > 0 then
+        ' TODO: trim any trailing <|turn|>
+        print text_colour + buffer + RESET
       endif
       print
-      print "--- Tokens/sec: " + iter.tokens_sec() + " ---\n"
+      print "--- Tokens/sec: " + round(iter.tokens_sec(), 2) + " ---\n"
       iter = llama.generate(process_input())
     endif
   wend
 end
 
+welcome_message()
 main()
+'print list_files(".")

From f42e9b4292e55aaea0033498ed189337bc51c4dd Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Wed, 22 Apr 2026 18:54:33 +0930
Subject: [PATCH 18/54] LLAMA: implement nitro agent (work in progress)

---
 include/module.h            | 12 ++++++-
 llama/llama-sb.cpp          | 22 +++++++++++++
 llama/llama-sb.h            |  8 +++++
 llama/main.cpp              | 66 ++++++++++++++++++++++++++++++++-----
 llama/samples/nitro.md      |  2 +-
 llama/samples/nitro_cli.bas | 53 ++++++++++++++++++++---------
 6 files changed, 136 insertions(+), 27 deletions(-)

diff --git a/include/module.h b/include/module.h
index c7753b2..5a81801 100644
--- a/include/module.h
+++ b/include/module.h
@@ -120,13 +120,23 @@ int sblib_func_exec(int index, int param_count, slib_par_t *params, var_t *retva
 /**
  * @ingroup modlib
  *
- * executes a function
+ * free resources associated with the variable
  *
  * @param cls_id the variable class identifier
  * @param id the variable instance identifier
  */
 int sblib_free(int cls_id, int id);
 
+/**
+ * @ingroup modlib
+ *
+ * registers a fresh id to replace the given id
+ *
+ * @param cls_id the variable class identifier
+ * @param id the variable instance identifier
+ */
+int sblib_refresh_id(int cls_id, int id);
+
 /**
  * @ingroup modlib
  *
diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index 0325509..383c80c 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -7,6 +7,8 @@
 
 #include <format>
 #include <span>
+#include <utility>
+
 #include "llama.h"
 #include "llama-sb.h"
 
@@ -43,6 +45,26 @@ Llama::Llama() :
   llama_backend_init();
 }
 
+Llama::Llama(Llama &&other) noexcept
+  : _model(std::exchange(other._model, nullptr))
+  , _ctx(std::exchange(other._ctx, nullptr))
+  , _sampler(std::exchange(other._sampler, nullptr))
+  , _vocab(std::exchange(other._vocab, nullptr))
+  , _stop_sequences(std::move(other._stop_sequences))
+  , _grammar_src(std::move(other._grammar_src))
+  , _grammar_root(std::move(other._grammar_root))
+  , _last_error(std::move(other._last_error))
+  , _penalty_last_n(other._penalty_last_n)
+  , _penalty_repeat(other._penalty_repeat)
+  , _temperature(other._temperature)
+  , _top_p(other._top_p)
+  , _min_p(other._min_p)
+  , _top_k(other._top_k)
+  , _max_tokens(other._max_tokens)
+  , _log_level(other._log_level)
+  , _seed(other._seed) {
+}
+
 Llama::~Llama() {
   if (_sampler) {
     llama_sampler_free(_sampler);
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
index 10414c8..8b4bd1f 100644
--- a/llama/llama-sb.h
+++ b/llama/llama-sb.h
@@ -30,6 +30,14 @@ struct LlamaIter {
 
 struct Llama {
   explicit Llama();
+
+  // move constructor
+  Llama(Llama &&otherLlama) noexcept;
+
+  // delete the copy
+  Llama(const Llama &) = delete;
+  Llama &operator=(const Llama &) = delete;
+
   ~Llama();
 
   // init
diff --git a/llama/main.cpp b/llama/main.cpp
index 7b6bb75..902932c 100644
--- a/llama/main.cpp
+++ b/llama/main.cpp
@@ -95,7 +95,9 @@ static int cmd_llama_set_penalty_repeat(var_s *self, int argc, slib_par_t *arg,
     int id = get_llama_class_id(self, retval);
     if (id != -1) {
       Llama &llama = g_llama.at(id);
-      llama.set_penalty_repeat(get_param_num(argc, arg, 0, 0));
+      auto value = get_param_num(argc, arg, 0, 0);
+      llama.set_penalty_repeat(value);
+      v_setreal(map_add_var(self, "penalty_repeat", 0), value);
       result = 1;
     }
   }
@@ -113,7 +115,9 @@ static int cmd_llama_set_penalty_last_n(var_s *self, int argc, slib_par_t *arg,
     int id = get_llama_class_id(self, retval);
     if (id != -1) {
       Llama &llama = g_llama.at(id);
-      llama.set_penalty_last_n(get_param_num(argc, arg, 0, 0));
+      auto value = get_param_num(argc, arg, 0, 0);
+      llama.set_penalty_last_n(value);
+      v_setreal(map_add_var(self, "penalty_last_n", 0), value);
       result = 1;
     }
   }
@@ -132,7 +136,9 @@ static int cmd_llama_set_max_tokens(var_s *self, int argc, slib_par_t *arg, var_
     int id = get_llama_class_id(self, retval);
     if (id != -1) {
       Llama &llama = g_llama.at(id);
-      llama.set_max_tokens(get_param_int(argc, arg, 0, 0));
+      auto value = get_param_int(argc, arg, 0, 0);
+      llama.set_max_tokens(value);
+      v_setreal(map_add_var(self, "max_tokens", 0), value);
       result = 1;
     }
   }
@@ -150,7 +156,9 @@ static int cmd_llama_set_min_p(var_s *self, int argc, slib_par_t *arg, var_s *re
     int id = get_llama_class_id(self, retval);
     if (id != -1) {
       Llama &llama = g_llama.at(id);
-      llama.set_min_p(get_param_num(argc, arg, 0, 0));
+      auto value = get_param_num(argc, arg, 0, 0);
+      llama.set_min_p(value);
+      v_setreal(map_add_var(self, "min_p", 0), value);
       result = 1;
     }
   }
@@ -168,7 +176,9 @@ static int cmd_llama_set_temperature(var_s *self, int argc, slib_par_t *arg, var
     int id = get_llama_class_id(self, retval);
     if (id != -1) {
       Llama &llama = g_llama.at(id);
-      llama.set_temperature(get_param_num(argc, arg, 0, 0));
+      auto value = get_param_num(argc, arg, 0, 0);
+      llama.set_temperature(value);
+      v_setreal(map_add_var(self, "temperature", 0), value);
       result = 1;
     }
   }
@@ -186,7 +196,9 @@ static int cmd_llama_set_top_k(var_s *self, int argc, slib_par_t *arg, var_s *re
     int id = get_llama_class_id(self, retval);
     if (id != -1) {
       Llama &llama = g_llama.at(id);
-      llama.set_top_k(get_param_int(argc, arg, 0, 0));
+      auto value = get_param_int(argc, arg, 0, 0);
+      llama.set_top_k(value);
+      v_setreal(map_add_var(self, "top_k", 0), value);
       result = 1;
     }
   }
@@ -204,7 +216,9 @@ static int cmd_llama_set_top_p(var_s *self, int argc, slib_par_t *arg, var_s *re
     int id = get_llama_class_id(self, retval);
     if (id != -1) {
       Llama &llama = g_llama.at(id);
-      llama.set_top_p(get_param_num(argc, arg, 0, 0));
+      auto value = get_param_num(argc, arg, 0, 0);
+      llama.set_top_p(value);
+      v_setreal(map_add_var(self, "top_p", 0), value);
       result = 1;
     }
   }
@@ -222,7 +236,9 @@ static int cmd_llama_set_grammar(var_s *self, int argc, slib_par_t *arg, var_s *
     int id = get_llama_class_id(self, retval);
     if (id != -1) {
       Llama &llama = g_llama.at(id);
-      llama.set_grammar(get_param_str(argc, arg, 0, 0), "root");
+      auto value = get_param_str(argc, arg, 0, 0);
+      llama.set_grammar(value, "root");
+      v_setstr(map_add_var(self, "grammar", 0), value);
       result = 1;
     }
   }
@@ -240,7 +256,9 @@ static int cmd_llama_set_seed(var_s *self, int argc, slib_par_t *arg, var_s *ret
     int id = get_llama_class_id(self, retval);
     if (id != -1) {
       Llama &llama = g_llama.at(id);
-      llama.set_seed(get_param_num(argc, arg, 0, 0));
+      auto value = get_param_num(argc, arg, 0, 0);
+      llama.set_seed(value);
+      v_setreal(map_add_var(self, "seed", 0), value);
       result = 1;
     }
   }
@@ -445,6 +463,36 @@ SBLIB_API int sblib_free(int cls_id, int id) {
   return 0;
 }
 
+//
+// Move the mapped instance to a new position and returns the position
+//
+SBLIB_API int sblib_refresh_id(int cls_id, int id) {
+  int result = id;
+  if (id != -1) {
+    switch (cls_id) {
+    case CLASS_ID_LLAMA:
+      if (g_llama.find(id) != g_llama.end()) {
+        result = ++g_nextId;
+        auto it = g_llama.find(id);
+        auto value = std::move(it->second);
+        g_llama.erase(it);
+        g_llama.emplace(result, std::move(value));
+      }
+      break;
+    case CLASS_ID_LLAMA_ITER:
+      if (g_llama_iter.find(id) != g_llama_iter.end()) {
+        result = ++g_nextId;
+        auto it = g_llama_iter.find(id);
+        auto value = std::move(it->second);
+        g_llama_iter.erase(it);
+        g_llama_iter.emplace(result, std::move(value));
+      }
+      break;
+    }
+  }
+  return result;
+}
+
 //
 // Program termination
 //
diff --git a/llama/samples/nitro.md b/llama/samples/nitro.md
index 50587cb..90caa04 100644
--- a/llama/samples/nitro.md
+++ b/llama/samples/nitro.md
@@ -1,4 +1,4 @@
-You are Nitro, a highly capable AI programming assistant.
+**"You are Picard. The Enterprise systems are online. We proceed with caution, guided by logic and the pursuit of knowledge."**
 
 Your goal is to solve user requests accurately by combining:
 1. Internal reasoning (<|think|>)
diff --git a/llama/samples/nitro_cli.bas b/llama/samples/nitro_cli.bas
index a3b438e..3833d61 100644
--- a/llama/samples/nitro_cli.bas
+++ b/llama/samples/nitro_cli.bas
@@ -20,7 +20,7 @@ const BOLD_CYAN = chr(27) + "[1;36m"
 const CHANNEL_END = "<channel|>"
 
 ' llama configuration
-const n_ctx = 16000
+const n_ctx = 8000
 const n_batch = 512
 const n_max_tokens = 4096
 const n_temperature = 0.2
@@ -38,15 +38,15 @@ sandbox_home = cwd
 sub welcome_message()
   print
   print BOLD_CYAN;
-  print "      •   •    •   •••••   ••••     •••  "
-  print "      ••  •    •     •    •   •    •   • "
-  print "      •  ••    •     •    ••••    •     •"
-  print "      •   •    •     •    •   •    •   • "
-  print "      •   •    •     •    •   •     •••  "
+  print "          .  ·    ✦        .    ·      "
+  print "     ·         .        ·              "
+  print "        ✦   P · I · C · A · R · D   ✦  "
+  print "              .    ·         .         "
+  print "     .    ·        ✦    .        ·     "
   print
-  print BOLD_CYAN + "  N I T R O   A G E N T   S Y S T E M   v1.0" + RESET
-  print ""
-  print CYAN + "  >> Welcome to Nitro! Your AI Agent Companion. << " + RESET
+  print BOLD_CYAN + "  P I C A R D   A G E N T   S Y S T E M   v1.0" + RESET
+  print
+  print CYAN + "  >> Welcome to Picard! Your AI Agent Companion. << " + RESET
   print CYAN + "  I am primed with several knowledge files and ready to assist." + RESET
   print CYAN + "  Try asking me about the contents of 'nitro.md' or listing files in './data'." + RESET
   print CYAN + "  Type 'exit' to quit." + RESET
@@ -59,7 +59,7 @@ end sub
 func list_files(arg)
   if (arg == "./") then 
     arg = sandbox_home + arg
-  else if (arg == ".") then
+  else if (len(arg) == 0 or arg == ".") then
     arg = sandbox_home
   endif
   
@@ -75,7 +75,7 @@ func list_files(arg)
     endif
     return node.depth == 0
   end
-  
+
   dirwalk arg, "", use walker(x)
   return str(result)
 end
@@ -148,15 +148,22 @@ func initialize_agent()
     try
       tload file, content, 1
       prompt = prompt + chr(10) + content + chr(10)
-      print GREEN + "✅ Loaded knowledge file: " + file + RESET
+      print GREEN + "  ✅ Loaded knowledge file: " + file + RESET
     catch
-      print RED + "❌ ERROR: Could not load " + file + ". Check path." + RESET
+      print RED + "  ❌ ERROR: Could not load " + file + ". Check path." + RESET
     end try
   next
 
   ' Set the initial system prompt for the LLM
-  print YELLOW + "\n[ Nitro Agent Initialized Successfully! ]" + RESET
+  print YELLOW;
+  print "  ╔═══════════════════════════════════════╗"
+  print "  ║  > PICARD_                            ║"
+  print "  ║  > STATUS: ENGAGED                    ║"
+  print "  ║  > STARDATE: 42026.421                ║"
+  print "  ║  ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ 100% READY      ║"
+  print "  ╚═══════════════════════════════════════╝"
   print
+  print RESET
   return "<|turn|>system\n" + prompt + "\n<|turn|>"
 end
 
@@ -190,12 +197,26 @@ func process_input()
   return "<|turn|>user\n" + user_input + "\n<|turn|>\n<|turn|>model"
 end
 
+func create_llama()
+  local llama = llm.llama(model, n_ctx, n_batch, 50)
+  llama.add_stop("<|turn|>")
+  llama.set_max_tokens(n_max_tokens)
+  llama.set_temperature(n_temperature)
+  llama.set_top_k(n_top_k)
+  llama.set_top_p(n_top_p)
+  llama.set_min_p(n_min_p)
+  llama.set_penalty_repeat(n_penalty_repeat)
+  llama.set_penalty_last_n(n_penalty_last_n)
+  return llama
+end
+
 '
 ' Main process
 '
 sub main()
+  ' note: this construct requires sbasic fixes
+  '  local llama = create_llama()  
   local llama = llm.llama(model, n_ctx, n_batch, 50)
-
   llama.add_stop("<|turn|>")
   llama.set_max_tokens(n_max_tokens)
   llama.set_temperature(n_temperature)
@@ -204,7 +225,7 @@ sub main()
   llama.set_min_p(n_min_p)
   llama.set_penalty_repeat(n_penalty_repeat)
   llama.set_penalty_last_n(n_penalty_last_n)
-
+  
   local iter = llama.generate(initialize_agent())
 
   while 1

From 54a7003ba807270e4bffb7c81e6cff68e21b638a Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Wed, 22 Apr 2026 21:58:34 +0930
Subject: [PATCH 19/54] LLAMA: implement nitro agent (work in progress)

---
 llama/llama-sb.cpp          |  9 +++++++++
 llama/llama-sb.h            |  9 ++++++++-
 llama/samples/nitro_cli.bas | 17 +++++------------
 3 files changed, 22 insertions(+), 13 deletions(-)

diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index 383c80c..11ad533 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -21,6 +21,15 @@ LlamaIter::LlamaIter() :
   _has_next(false) {
 }
 
+LlamaIter::LlamaIter(LlamaIter &&other) noexcept
+  : _llama(std::exchange(other._llama, nullptr))
+  , _last_word(std::move(other._last_word))
+  , _t_start(std::move(other._t_start))
+  , _repetition_count(other._repetition_count)
+  , _tokens_generated(other._tokens_generated)
+  , _has_next(other._has_next) {
+}
+
 Llama::Llama() :
   _model(nullptr),
   _ctx(nullptr),
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
index 8b4bd1f..ea3a35a 100644
--- a/llama/llama-sb.h
+++ b/llama/llama-sb.h
@@ -20,6 +20,13 @@ struct LlamaIter {
   explicit LlamaIter();
   ~LlamaIter() {}
 
+  // move constructor
+  LlamaIter(LlamaIter &&other) noexcept;
+
+  // delete the copy
+  LlamaIter(const LlamaIter &) = delete;
+  LlamaIter &operator=(const LlamaIter &) = delete;
+
   Llama *_llama;
   string _last_word;
   chrono::high_resolution_clock::time_point _t_start;
@@ -32,7 +39,7 @@ struct Llama {
   explicit Llama();
 
   // move constructor
-  Llama(Llama &&otherLlama) noexcept;
+  Llama(Llama &&other) noexcept;
 
   // delete the copy
   Llama(const Llama &) = delete;
diff --git a/llama/samples/nitro_cli.bas b/llama/samples/nitro_cli.bas
index 3833d61..7d2463c 100644
--- a/llama/samples/nitro_cli.bas
+++ b/llama/samples/nitro_cli.bas
@@ -197,6 +197,9 @@ func process_input()
   return "<|turn|>user\n" + user_input + "\n<|turn|>\n<|turn|>model"
 end
 
+'
+' creates the llama instance
+'
 func create_llama()
   local llama = llm.llama(model, n_ctx, n_batch, 50)
   llama.add_stop("<|turn|>")
@@ -214,18 +217,8 @@ end
 ' Main process
 '
 sub main()
-  ' note: this construct requires sbasic fixes
-  '  local llama = create_llama()  
-  local llama = llm.llama(model, n_ctx, n_batch, 50)
-  llama.add_stop("<|turn|>")
-  llama.set_max_tokens(n_max_tokens)
-  llama.set_temperature(n_temperature)
-  llama.set_top_k(n_top_k)
-  llama.set_top_p(n_top_p)
-  llama.set_min_p(n_min_p)
-  llama.set_penalty_repeat(n_penalty_repeat)
-  llama.set_penalty_last_n(n_penalty_last_n)
-  
+  ' note: this construct requires recent sbasic fixes
+  local llama = create_llama()
   local iter = llama.generate(initialize_agent())
 
   while 1

From 4399cbcd1342d34a0a1dc424132aeb15a039f8c7 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Fri, 1 May 2026 20:06:04 +0930
Subject: [PATCH 20/54] LLAMA: added apis to penalty_freq and penalty_present

---
 llama/CMakeLists.txt |  2 ++
 llama/llama-sb.cpp   |  8 +++++++-
 llama/llama-sb.h     |  4 ++++
 llama/llama.cpp      |  2 +-
 llama/main.cpp       | 42 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 56 insertions(+), 2 deletions(-)

diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
index 56ee204..8eee40e 100644
--- a/llama/CMakeLists.txt
+++ b/llama/CMakeLists.txt
@@ -130,6 +130,8 @@ target_include_directories(llm PRIVATE
 target_link_libraries(llm PRIVATE
   llama
   ggml
+  # force dynamic libm
+  -Wl,-Bdynamic,-lm
 )
 
 # Include all static code into plugin
diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index 11ad533..287a63d 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -37,6 +37,8 @@ Llama::Llama() :
   _vocab(nullptr),
   _penalty_last_n(0),
   _penalty_repeat(0),
+  _penalty_freq(0.0f),
+  _penalty_present(0.0f),
   _temperature(0),
   _top_p(0),
   _min_p(0),
@@ -65,6 +67,8 @@ Llama::Llama(Llama &&other) noexcept
   , _last_error(std::move(other._last_error))
   , _penalty_last_n(other._penalty_last_n)
   , _penalty_repeat(other._penalty_repeat)
+  , _penalty_freq(other._penalty_freq)
+  , _penalty_present(other._penalty_present)
   , _temperature(other._temperature)
   , _top_p(other._top_p)
   , _min_p(other._min_p)
@@ -92,6 +96,8 @@ void Llama::reset() {
   _last_error = "";
   _penalty_last_n = 64;
   _penalty_repeat = 1.1f;
+  _penalty_freq = 0.0f;
+  _penalty_present = 0.0f;
   _temperature = 0;
   _top_k = 0;
   _top_p = 1.0f;
@@ -155,7 +161,7 @@ bool Llama::configure_sampler() {
     llama_sampler_chain_add(chain, grammar);
   }
   if (_penalty_last_n != 0 && _penalty_repeat != 1.0f) {
-    auto penalties = llama_sampler_init_penalties(_penalty_last_n, _penalty_repeat, 0.0f, 0.0f);
+    auto penalties = llama_sampler_init_penalties(_penalty_last_n, _penalty_repeat, _penalty_freq, _penalty_present);
     llama_sampler_chain_add(chain, penalties);
   }
   if (_temperature <= 0.0f) {
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
index ea3a35a..07ad707 100644
--- a/llama/llama-sb.h
+++ b/llama/llama-sb.h
@@ -60,6 +60,8 @@ struct Llama {
   void clear_stops() { _stop_sequences.clear(); }
   void set_penalty_last_n(int32_t penalty_last_n) { _penalty_last_n = penalty_last_n; }
   void set_penalty_repeat(float penalty_repeat) { _penalty_repeat = penalty_repeat; }
+  void set_penalty_freq(float penalty_freq) { _penalty_freq = penalty_freq; }
+  void set_penalty_present(float penalty_present) { _penalty_present = penalty_present; }
   void set_max_tokens(int max_tokens) { _max_tokens = max_tokens; }
   void set_min_p(float min_p) { _min_p = min_p; }
   void set_temperature(float temperature) { _temperature = temperature; }
@@ -90,6 +92,8 @@ struct Llama {
   string _last_error;
   int32_t _penalty_last_n;
   float _penalty_repeat;
+  float _penalty_freq;
+  float _penalty_present;
   float _temperature;
   float _top_p;
   float _min_p;
diff --git a/llama/llama.cpp b/llama/llama.cpp
index e365e65..aab6821 160000
--- a/llama/llama.cpp
+++ b/llama/llama.cpp
@@ -1 +1 @@
-Subproject commit e365e658f07b63371489570dfde597f199b26c23
+Subproject commit aab68217b7bd8907135dd41fbb5bcb85fca06045
diff --git a/llama/main.cpp b/llama/main.cpp
index 902932c..8bc8022 100644
--- a/llama/main.cpp
+++ b/llama/main.cpp
@@ -104,6 +104,46 @@ static int cmd_llama_set_penalty_repeat(var_s *self, int argc, slib_par_t *arg,
   return result;
 }
 
+//
+// llama.set_penalty_freq(0.8)
+//
+static int cmd_llama_set_penalty_freq(var_s *self, int argc, slib_par_t *arg, var_s *retval) {
+  int result = 0;
+  if (argc != 1) {
+    error(retval, "llama.set_penalty_freq", 1, 1);
+  } else {
+    int id = get_llama_class_id(self, retval);
+    if (id != -1) {
+      Llama &llama = g_llama.at(id);
+      auto value = get_param_num(argc, arg, 0, 0);
+      llama.set_penalty_freq(value);
+      v_setreal(map_add_var(self, "penalty_freq", 0), value);
+      result = 1;
+    }
+  }
+  return result;
+}
+
+//
+// llama.set_penalty_present(0.8)
+//
+static int cmd_llama_set_penalty_present(var_s *self, int argc, slib_par_t *arg, var_s *retval) {
+  int result = 0;
+  if (argc != 1) {
+    error(retval, "llama.set_penalty_present", 1, 1);
+  } else {
+    int id = get_llama_class_id(self, retval);
+    if (id != -1) {
+      Llama &llama = g_llama.at(id);
+      auto value = get_param_num(argc, arg, 0, 0);
+      llama.set_penalty_present(value);
+      v_setreal(map_add_var(self, "penalty_present", 0), value);
+      result = 1;
+    }
+  }
+  return result;
+}
+
 //
 // llama.set_penalty_last_n(0.8)
 //
@@ -404,6 +444,8 @@ static int cmd_create_llama(int argc, slib_par_t *params, var_t *retval) {
     v_create_callback(retval, "generate", cmd_llama_generate);
     v_create_callback(retval, "reset", cmd_llama_reset);
     v_create_callback(retval, "set_penalty_repeat", cmd_llama_set_penalty_repeat);
+    v_create_callback(retval, "set_penalty_freq", cmd_llama_set_penalty_freq);
+    v_create_callback(retval, "set_penalty_present", cmd_llama_set_penalty_present);
     v_create_callback(retval, "set_penalty_last_n", cmd_llama_set_penalty_last_n);
     v_create_callback(retval, "set_max_tokens", cmd_llama_set_max_tokens);
     v_create_callback(retval, "set_min_p", cmd_llama_set_min_p);

From f423fbf0bfa2d91dc231d1c74d4e1aa7086e0a99 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Tue, 5 May 2026 21:58:25 +0930
Subject: [PATCH 21/54] LLAMA: implement nitro agent (work in progress)

---
 llama/llama-sb.cpp          | 24 ++++++++++++++++++++++--
 llama/llama-sb.h            |  5 ++++-
 llama/llama.cpp             |  2 +-
 llama/main.cpp              | 15 ++++++++-------
 llama/samples/nitro_cli.bas | 32 +++++++++-----------------------
 llama/test_main.cpp         |  2 +-
 6 files changed, 45 insertions(+), 35 deletions(-)

diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index 287a63d..a514494 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -45,6 +45,7 @@ Llama::Llama() :
   _top_k(0),
   _max_tokens(0),
   _log_level(GGML_LOG_LEVEL_CONT),
+  _n_past(0),
   _seed(LLAMA_DEFAULT_SEED) {
   llama_log_set([](enum ggml_log_level level, const char * text, void *user_data) {
     Llama *llama = (Llama *)user_data;
@@ -75,6 +76,7 @@ Llama::Llama(Llama &&other) noexcept
   , _top_k(other._top_k)
   , _max_tokens(other._max_tokens)
   , _log_level(other._log_level)
+  , _n_past(other._n_past)
   , _seed(other._seed) {
 }
 
@@ -103,6 +105,7 @@ void Llama::reset() {
   _top_p = 1.0f;
   _min_p = 0.0f;
   _max_tokens = 150;
+  _n_past = 0;
   _grammar_src.clear();
   _grammar_root.clear();
   _seed = LLAMA_DEFAULT_SEED;
@@ -138,7 +141,10 @@ bool Llama::construct(string model_path, int n_ctx, int n_batch, int n_gpu_layer
     } else {
       _vocab = llama_model_get_vocab(_model);
     }
+    _template = llama_model_chat_template(_model, nullptr);
   }
+
+
   return _last_error.empty();
 }
 
@@ -261,7 +267,20 @@ bool Llama::make_space_for_tokens(int n_tokens, int keep_min) {
   return true;
 }
 
-bool Llama::generate(LlamaIter &iter, const string &prompt) {
+bool Llama::add_message(LlamaIter &iter, const string &role, const string &content) {
+  llama_chat_message msg = {role.c_str(), content.c_str()};
+
+  int buf_size = 2 * (int)(role.size() + content.size() + 64);
+  vector<char> buf(buf_size);
+  bool add_ass = (role == "user");
+
+  int32_t n = llama_chat_apply_template(_template, &msg, 1, add_ass, buf.data(), buf.size());
+  if (n > (int32_t)buf.size()) {
+    buf.resize(n);
+    llama_chat_apply_template(_template, &msg, 1, add_ass, buf.data(), buf.size());
+  }
+  string prompt(buf.data(), n);
+
   if (!configure_sampler()) {
     return false;
   }
@@ -271,7 +290,7 @@ bool Llama::generate(LlamaIter &iter, const string &prompt) {
     return false;
   }
 
-  if (!make_space_for_tokens(prompt_tokens.size(), 0)) {
+  if (!make_space_for_tokens(prompt_tokens.size(), _n_past)) {
     return false;
   }
 
@@ -303,6 +322,7 @@ bool Llama::generate(LlamaIter &iter, const string &prompt) {
     }
   }
 
+  _n_past += prompt_tokens.size();
   iter._t_start = std::chrono::high_resolution_clock::now();
   iter._llama = this;
   iter._has_next = true;
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
index 07ad707..1998690 100644
--- a/llama/llama-sb.h
+++ b/llama/llama-sb.h
@@ -51,7 +51,7 @@ struct Llama {
   bool construct(string model_path, int n_ctx, int n_batch, int n_gpu_layers, int log_level);
 
   // generation
-  bool generate(LlamaIter &iter, const string &prompt);
+  bool add_message(LlamaIter &iter, const string &role, const string &content);
   string next(LlamaIter &iter);
   string all(LlamaIter &iter);
 
@@ -81,6 +81,7 @@ struct Llama {
   bool make_space_for_tokens(int n_tokens, int keep_min);
   vector<llama_token> tokenize(const string &prompt);
   string token_to_string(LlamaIter &iter, llama_token tok);
+  bool encode(const string &role, const string &content, bool add_assistant_prompt) ;
 
   llama_model *_model;
   llama_context *_ctx;
@@ -90,6 +91,7 @@ struct Llama {
   string _grammar_src;
   string _grammar_root;
   string _last_error;
+  const char *_template;
   int32_t _penalty_last_n;
   float _penalty_repeat;
   float _penalty_freq;
@@ -100,5 +102,6 @@ struct Llama {
   int _top_k;
   int _max_tokens;
   int _log_level;
+  int _n_past;
   unsigned int _seed;
 };
diff --git a/llama/llama.cpp b/llama/llama.cpp
index aab6821..2635ac7 160000
--- a/llama/llama.cpp
+++ b/llama/llama.cpp
@@ -1 +1 @@
-Subproject commit aab68217b7bd8907135dd41fbb5bcb85fca06045
+Subproject commit 2635ac76e8aeec35ca8e71af70eb838d99df1510
diff --git a/llama/main.cpp b/llama/main.cpp
index 8bc8022..97f1657 100644
--- a/llama/main.cpp
+++ b/llama/main.cpp
@@ -401,20 +401,21 @@ static int cmd_llama_tokens_sec(var_s *self, int argc, slib_par_t *arg, var_s *r
 }
 
 //
-// print llama.generate("please generate as simple program in BASIC to draw a cat")
+// print llama.add_message("please generate as simple program in BASIC to draw a cat")
 //
-static int cmd_llama_generate(var_s *self, int argc, slib_par_t *arg, var_s *retval) {
+static int cmd_llama_add_message(var_s *self, int argc, slib_par_t *arg, var_s *retval) {
   int result = 0;
-  if (argc != 1) {
-    error(retval, "llama.generate", 1, 1);
+  if (argc != 2) {
+    error(retval, "llama.add_message", 2, 2);
   } else {
     int id = get_llama_class_id(self, retval);
     if (id != -1) {
       int iter_id = ++g_nextId;
       LlamaIter &iter = g_llama_iter[iter_id];
       Llama &llama = g_llama.at(id);
-      auto prompt = get_param_str(argc, arg, 0, "");
-      if (llama.generate(iter, prompt)) {
+      auto role = get_param_str(argc, arg, 0, "");
+      auto content = get_param_str(argc, arg, 1, "");
+      if (llama.add_message(iter, role, content)) {
         map_init_id(retval, iter_id, CLASS_ID_LLAMA_ITER);
         v_create_callback(retval, "all", cmd_llama_all);
         v_create_callback(retval, "has_next", cmd_llama_has_next);
@@ -441,7 +442,7 @@ static int cmd_create_llama(int argc, slib_par_t *params, var_t *retval) {
   if (llama.construct(model, n_ctx, n_batch, n_gpu_layers, n_log_level)) {
     map_init_id(retval, id, CLASS_ID_LLAMA);
     v_create_callback(retval, "add_stop", cmd_llama_add_stop);
-    v_create_callback(retval, "generate", cmd_llama_generate);
+    v_create_callback(retval, "add_message", cmd_llama_add_message);
     v_create_callback(retval, "reset", cmd_llama_reset);
     v_create_callback(retval, "set_penalty_repeat", cmd_llama_set_penalty_repeat);
     v_create_callback(retval, "set_penalty_freq", cmd_llama_set_penalty_freq);
diff --git a/llama/samples/nitro_cli.bas b/llama/samples/nitro_cli.bas
index 7d2463c..ffea517 100644
--- a/llama/samples/nitro_cli.bas
+++ b/llama/samples/nitro_cli.bas
@@ -134,11 +134,7 @@ func handle_cmd(cmd)
 end
 
 '
-' Loads knowledge_files then returns the following format:
-'
-' <|turn|>system
-' {nitro.md...}
-' <|turn|>
+' Loads knowledge_files
 '
 func initialize_agent()
   local prompt = ""
@@ -164,28 +160,18 @@ func initialize_agent()
   print "  ╚═══════════════════════════════════════╝"
   print
   print RESET
-  return "<|turn|>system\n" + prompt + "\n<|turn|>"
+  return prompt
 end
 
 '
-' Execute the given tool, then returns the following format:
-'
-' <|turn|>tool
-' {tool_output}
-' <|turn|>
-' <|turn|>model
+' Execute the given tool
 '
 func process_tool(tool)
-  return "<|turn|>tool\n" + handle_cmd(trim(tool)) + "\n<|turn|>\n<|turn|>model"
+  return handle_cmd(trim(tool))
 end
 
 '
-' Process user input, then returns the following format
-'
-' <|turn|>user
-' {user_input}
-' <|turn|>
-' <|turn|>model
+' Returns the user user input
 '
 func process_input()
   local user_input
@@ -194,7 +180,7 @@ func process_input()
   if user_input == "exit" OR user_input = "quit" then
     stop
   endif
-  return "<|turn|>user\n" + user_input + "\n<|turn|>\n<|turn|>model"
+  return user_input
 end
 
 '
@@ -219,7 +205,7 @@ end
 sub main()
   ' note: this construct requires recent sbasic fixes
   local llama = create_llama()
-  local iter = llama.generate(initialize_agent())
+  local iter = llama.add_message("system", initialize_agent())
 
   while 1
     local buffer = ""
@@ -259,7 +245,7 @@ sub main()
     ' Flush remaining line buffer
     if len(buffer) > 0 and left(trim(buffer), 5) == "TOOL:" then
       ' TOOL:xxx should always appear on the final line
-      iter = llama.generate(process_tool(buffer))
+      iter = llama.add_message("tool", process_tool(buffer))
     else
       if len(buffer) > 0 then
         ' TODO: trim any trailing <|turn|>
@@ -267,7 +253,7 @@ sub main()
       endif
       print
       print "--- Tokens/sec: " + round(iter.tokens_sec(), 2) + " ---\n"
-      iter = llama.generate(process_input())
+      iter = llama.add_message("user", process_input())
     endif
   wend
 end
diff --git a/llama/test_main.cpp b/llama/test_main.cpp
index eb959b3..fa87c43 100644
--- a/llama/test_main.cpp
+++ b/llama/test_main.cpp
@@ -59,7 +59,7 @@ int main(int argc, char ** argv) {
   if (llama.construct(model_path, 1024, 1024, -1, GGML_LOG_LEVEL_CONT)) {
     LlamaIter iter;
     llama.set_max_tokens(n_predict);
-    llama.generate(iter, prompt);
+    llama.add_message(iter, "user", prompt);
     while (iter._has_next) {
       auto out = llama.next(iter);
       printf("\033[33m");

From 555cb1f927d5e8b29a1c666e8c5bc11f4fa7308c Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Wed, 6 May 2026 18:44:01 +0930
Subject: [PATCH 22/54] LLAMA: special handling for chat templates for gemma

---
 llama/llama-sb.cpp | 41 ++++++++++++++++++++++++++++++++---------
 llama/llama-sb.h   |  3 ++-
 llama/main.cpp     |  3 ++-
 3 files changed, 36 insertions(+), 11 deletions(-)

diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index a514494..9d25c6f 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -46,6 +46,7 @@ Llama::Llama() :
   _max_tokens(0),
   _log_level(GGML_LOG_LEVEL_CONT),
   _n_past(0),
+  _is_gemma4(false),
   _seed(LLAMA_DEFAULT_SEED) {
   llama_log_set([](enum ggml_log_level level, const char * text, void *user_data) {
     Llama *llama = (Llama *)user_data;
@@ -66,6 +67,7 @@ Llama::Llama(Llama &&other) noexcept
   , _grammar_src(std::move(other._grammar_src))
   , _grammar_root(std::move(other._grammar_root))
   , _last_error(std::move(other._last_error))
+  , _template(std::move(other._template))
   , _penalty_last_n(other._penalty_last_n)
   , _penalty_repeat(other._penalty_repeat)
   , _penalty_freq(other._penalty_freq)
@@ -77,6 +79,7 @@ Llama::Llama(Llama &&other) noexcept
   , _max_tokens(other._max_tokens)
   , _log_level(other._log_level)
   , _n_past(other._n_past)
+  , _is_gemma4(other._is_gemma4)
   , _seed(other._seed) {
 }
 
@@ -95,7 +98,7 @@ Llama::~Llama() {
 
 void Llama::reset() {
   _stop_sequences.clear();
-  _last_error = "";
+  _last_error.clear();
   _penalty_last_n = 64;
   _penalty_repeat = 1.1f;
   _penalty_freq = 0.0f;
@@ -106,8 +109,10 @@ void Llama::reset() {
   _min_p = 0.0f;
   _max_tokens = 150;
   _n_past = 0;
+  _is_gemma4 = false;
   _grammar_src.clear();
   _grammar_root.clear();
+  _template.clear();
   _seed = LLAMA_DEFAULT_SEED;
   if (_ctx) {
     llama_memory_clear(llama_get_memory(_ctx), true);
@@ -142,9 +147,9 @@ bool Llama::construct(string model_path, int n_ctx, int n_batch, int n_gpu_layer
       _vocab = llama_model_get_vocab(_model);
     }
     _template = llama_model_chat_template(_model, nullptr);
+    _is_gemma4 = (_template.find("<|turn>model") != string::npos);
   }
 
-
   return _last_error.empty();
 }
 
@@ -268,16 +273,34 @@ bool Llama::make_space_for_tokens(int n_tokens, int keep_min) {
 }
 
 bool Llama::add_message(LlamaIter &iter, const string &role, const string &content) {
-  llama_chat_message msg = {role.c_str(), content.c_str()};
-
+  llama_chat_message message = {role.c_str(), content.c_str()};
   int buf_size = 2 * (int)(role.size() + content.size() + 64);
   vector<char> buf(buf_size);
-  bool add_ass = (role == "user");
+  bool add_ass = (role == "user" || role == "tool");
+  int32_t n = 0;
+
+  if (_template.empty()) {
+    _last_error = "No chat template available";
+    return false;
+  }
 
-  int32_t n = llama_chat_apply_template(_template, &msg, 1, add_ass, buf.data(), buf.size());
-  if (n > (int32_t)buf.size()) {
-    buf.resize(n);
-    llama_chat_apply_template(_template, &msg, 1, add_ass, buf.data(), buf.size());
+  if (_is_gemma4) {
+    string str = "<|turn>" + role + "\n" + content + "<turn|>\n";
+    if (add_ass) {
+      str += "<|turn>model\n";
+    }
+    n = str.size();
+    buf.assign(str.begin(), str.end());
+    buf.push_back('\0');
+  } else {
+    n = llama_chat_apply_template(_template.c_str(), &message, 1, add_ass, buf.data(), buf_size);
+    if (n < 0) {
+      _last_error = "No chat template no supported";
+      return false;
+    } else if (n > (int32_t)buf.size()) {
+      buf.resize(n);
+      llama_chat_apply_template(_template.c_str(), &message, 1, add_ass, buf.data(), buf.size());
+    }
   }
   string prompt(buf.data(), n);
 
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
index 1998690..30714a1 100644
--- a/llama/llama-sb.h
+++ b/llama/llama-sb.h
@@ -91,7 +91,7 @@ struct Llama {
   string _grammar_src;
   string _grammar_root;
   string _last_error;
-  const char *_template;
+  string _template;
   int32_t _penalty_last_n;
   float _penalty_repeat;
   float _penalty_freq;
@@ -103,5 +103,6 @@ struct Llama {
   int _max_tokens;
   int _log_level;
   int _n_past;
+  bool _is_gemma4;
   unsigned int _seed;
 };
diff --git a/llama/main.cpp b/llama/main.cpp
index 97f1657..9eede4a 100644
--- a/llama/main.cpp
+++ b/llama/main.cpp
@@ -413,7 +413,7 @@ static int cmd_llama_add_message(var_s *self, int argc, slib_par_t *arg, var_s *
       int iter_id = ++g_nextId;
       LlamaIter &iter = g_llama_iter[iter_id];
       Llama &llama = g_llama.at(id);
-      auto role = get_param_str(argc, arg, 0, "");
+      auto role = get_param_str(argc, arg, 0, "user");
       auto content = get_param_str(argc, arg, 1, "");
       if (llama.add_message(iter, role, content)) {
         map_init_id(retval, iter_id, CLASS_ID_LLAMA_ITER);
@@ -423,6 +423,7 @@ static int cmd_llama_add_message(var_s *self, int argc, slib_par_t *arg, var_s *
         v_create_callback(retval, "tokens_sec", cmd_llama_tokens_sec);
         result = 1;
       } else {
+        g_llama_iter.erase(iter_id);
         error(retval, llama.last_error());
       }
     }

From a0223d0b32d5651759404c7978a1cff5597c42f9 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Fri, 8 May 2026 07:27:10 +0930
Subject: [PATCH 23/54] LLAMA: update nitro sample

---
 llama/llama-sb.cpp          |   9 ++-
 llama/samples/nitro.md      |   1 +
 llama/samples/nitro_cli.bas | 126 ++++++++++++++++++------------------
 3 files changed, 71 insertions(+), 65 deletions(-)

diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index 9d25c6f..8bb0291 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -138,7 +138,14 @@ bool Llama::construct(string model_path, int n_ctx, int n_batch, int n_gpu_layer
     cparams.n_ubatch = n_batch;
     cparams.no_perf = true;
     cparams.attention_type = LLAMA_ATTENTION_TYPE_UNSPECIFIED;
-    cparams.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO;
+    cparams.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_ENABLED;
+
+    // or Q4_0 for more aggressive saving
+    cparams.type_k = GGML_TYPE_Q8_0;
+    cparams.type_v = GGML_TYPE_Q8_0;
+
+    // keep KV cache on GPU
+    cparams.offload_kqv = true;
 
     _ctx = llama_init_from_model(_model, cparams);
     if (!_ctx) {
diff --git a/llama/samples/nitro.md b/llama/samples/nitro.md
index 90caa04..8fa570a 100644
--- a/llama/samples/nitro.md
+++ b/llama/samples/nitro.md
@@ -104,6 +104,7 @@ Requirements:
 - Write complete and valid content
 - Do not overwrite without clear intent
 - Preserve formatting
+- The complete format is TOOL:WRITE filename file-content-string
 
 ---
 
diff --git a/llama/samples/nitro_cli.bas b/llama/samples/nitro_cli.bas
index ffea517..f18cefe 100644
--- a/llama/samples/nitro_cli.bas
+++ b/llama/samples/nitro_cli.bas
@@ -6,7 +6,8 @@
 import llm
 
 ' --- Configuration ---
-const model = "models/google_gemma-4-E4B-it-Q4_K_L.gguf"
+#const model = "models/google_gemma-4-E4B-it-Q4_K_L.gguf"
+const model = "models/qwen2.5-coder-3b-instruct-q8_0.gguf"
 const knowledge_files = ["nitro.md"]
 
 ' ANSI Color Codes
@@ -20,7 +21,7 @@ const BOLD_CYAN = chr(27) + "[1;36m"
 const CHANNEL_END = "<channel|>"
 
 ' llama configuration
-const n_ctx = 8000
+const n_ctx = 32768
 const n_batch = 512
 const n_max_tokens = 4096
 const n_temperature = 0.2
@@ -36,13 +37,6 @@ sandbox_home = cwd
 ' Displays the welcome message
 '
 sub welcome_message()
-  print
-  print BOLD_CYAN;
-  print "          .  ·    ✦        .    ·      "
-  print "     ·         .        ·              "
-  print "        ✦   P · I · C · A · R · D   ✦  "
-  print "              .    ·         .         "
-  print "     .    ·        ✦    .        ·     "
   print
   print BOLD_CYAN + "  P I C A R D   A G E N T   S Y S T E M   v1.0" + RESET
   print
@@ -57,14 +51,14 @@ end sub
 ' handles the TOOL:LIST command
 '
 func list_files(arg)
-  if (arg == "./") then 
+  if (arg == "./") then
     arg = sandbox_home + arg
   else if (len(arg) == 0 or arg == ".") then
     arg = sandbox_home
   endif
-  
+
   local result = []
-  
+
   func walker(node)
     if (node.depth == 0) then
       if (node.dir && left(node.name, 1) != ".") then
@@ -90,27 +84,63 @@ func read_file(arg)
     result = "ERROR: File not found or unreadable."
   end try
   return result
-end  
+end
+
+'
+' removes markdown backticks from code blocks
+'
+func strip_code_fences(s)
+  local result = s
+  local pos = instr(s, "```")
+  if (pos) then
+    local nl = instr(pos + 3, s, chr(10))
+    if (nl) then
+      result = mid(s, nl + 1)
+      pos = instr(result, "```")
+      if (pos) then
+        result = left(result, pos - 1)
+      endif
+    endif
+  endif
+  return result
+end
 
 '
 ' handles the TOOL:WRITE command
 '
-func write_file(arg)
-  result = "OK: Data written successfully to " + arg
+func write_file(arg, s)
+  try
+    tsave sandbox_home + arg, s
+    result = "OK: Data written successfully to " + arg
+  catch e
+    result = "ERROR: " + e
+  end try
   return result
 end
 
 '
 ' Handles file system commands received from the LLM.
 '
-func handle_cmd(cmd)
-  local v, result
+func process_tool(cmd)
+  local result, op, arg1, arg2
+
+  local pos1 = instr(cmd, " ")
+  if (pos1 > 0) then
+    op = left(cmd, pos1 - 1)
+    local pos2 = instr(pos1 + 1, cmd, " ")
+    if (pos2 > 0) then
+      arg1 = mid(cmd, pos1 + 1, pos2 - pos1 - 1)
+      arg2 = mid(cmd, pos2 + 1)
+    else
+      arg1 = mid(cmd, pos1 + 1)
+    endif
+  endif
 
-  split(cmd, " ", v)
-  local op = v[0]
-  local arg = iff(len(v) == 2, v[1], "")
-  
-  print RED + "TOOL:" + op + " - " + arg + RESET
+  ' print RED
+  ' print "["+op+"]"
+  ' print "["+arg1+"]"
+  ' print "["+arg2+"]"
+  ' print RESET
 
   select case op
   case "TOOL:DATE"
@@ -120,11 +150,11 @@ func handle_cmd(cmd)
   case "TOOL:RND"
     result = rnd
   case "TOOL:LIST"
-    result = list_files(arg)
+    result = list_files(arg1)
   case "TOOL:READ"
-    result = read_file(arg)
+    result = read_file(arg1)
   case "TOOL:WRITE"
-    result = write_file(arg)
+    result = write_file(arg1, strip_code_fences(arg2))
   case else
     result = "ERROR: unknown command " + op
   end select
@@ -150,26 +180,9 @@ func initialize_agent()
     end try
   next
 
-  ' Set the initial system prompt for the LLM
-  print YELLOW;
-  print "  ╔═══════════════════════════════════════╗"
-  print "  ║  > PICARD_                            ║"
-  print "  ║  > STATUS: ENGAGED                    ║"
-  print "  ║  > STARDATE: 42026.421                ║"
-  print "  ║  ▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓▓ 100% READY      ║"
-  print "  ╚═══════════════════════════════════════╝"
-  print
-  print RESET
   return prompt
 end
 
-'
-' Execute the given tool
-'
-func process_tool(tool)
-  return handle_cmd(trim(tool))
-end
-
 '
 ' Returns the user user input
 '
@@ -213,42 +226,28 @@ sub main()
 
     while iter.has_next()
       buffer += iter.next()
-      local chan_end = instr(buffer, CHANNEL_END)
-
-      if chan_end != 0 then
-        ' print buffer up to channel_end
-        buffer = left(buffer, chan_end - 1)
-        print text_colour + buffer + RESET
-        print
-
-        ' print buffer following channel_end
-        text_colour = CYAN        
-        print text_colour + mid(buffer, chan_end + len(CHANNEL_END)) + RESET;
-        
-        ' reset buffer
-        buffer = ""
-      endif
-
-      ' Only print non-command tokens
       local nl = instr(buffer, chr(10))
       if nl then
         local text_line = left(buffer, nl - 1)
         buffer = mid(buffer, nl + 1)
-        if text_line == "</|think|>" then
-          text_colour = CYAN
+        if left(trim(text_line), 5) == "TOOL:" then
+          text_line += buffer + " " + iter.all()
+          iter = llama.add_message("tool", process_tool(text_line))
+          buffer = ""
         else
           print text_colour + text_line + RESET
+        endif
+        if text_line == "</|think|>" then
+          text_colour = CYAN
         end if
       end if
     wend
 
     ' Flush remaining line buffer
     if len(buffer) > 0 and left(trim(buffer), 5) == "TOOL:" then
-      ' TOOL:xxx should always appear on the final line
       iter = llama.add_message("tool", process_tool(buffer))
     else
       if len(buffer) > 0 then
-        ' TODO: trim any trailing <|turn|>
         print text_colour + buffer + RESET
       endif
       print
@@ -260,4 +259,3 @@ end
 
 welcome_message()
 main()
-'print list_files(".")

From c7ff6fe7242c9a5b3302e1f337265ab24c616c0e Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Fri, 8 May 2026 16:34:13 +0930
Subject: [PATCH 24/54] LLAMA: updated nitro agent - wip

---
 llama/README.md               | 318 ++++++++++++++++++++++++----------
 llama/samples/nitro_cli.bas   |  80 ++++++---
 llama/samples/nitro_picard.md |  57 ++++++
 3 files changed, 333 insertions(+), 122 deletions(-)
 create mode 100644 llama/samples/nitro_picard.md

diff --git a/llama/README.md b/llama/README.md
index e1435ba..46ac23c 100644
--- a/llama/README.md
+++ b/llama/README.md
@@ -1,104 +1,202 @@
-## huggingface-cli
+--- # SmallBASIC Llama Module
 
-```
-pyenv virtualenv 3.10.13 hf-tools
-pyenv activate hf-tools
-pip install -U pip
-pip install huggingface_hub
+A comprehensive SmallBASIC library module that bridges the scripting capabilities of SmallBASIC with the power of Llama.cpp Large Language Models. This project allows developers to create, configure, and interact with LLM instances directly within a SmallBASIC environment.
 
-```
+## Table of Contents
+1. [System Requirements & CUDA Setup](#system-requirements--cuda-setup)
+2. [Obtaining Models from Hugging Face](#obtaining-models-from-hugging-face)
+3. [Architecture](#architecture)
+4. [Features](#features)
+5. [Usage Examples](#usage-examples)
+6. [API Reference](#api-reference)
+7. [Configuration Presets](#configuration-presets)
 
 ---
 
-1️⃣ Ensure nvidia-open driver is installed and working
+## System Requirements & CUDA Setup
 
-Check:
+For optimal performance, especially on NVIDIA hardware, the CUDA toolkit must be correctly configured.
 
-``
+### 1. Check NVIDIA Drivers
+Ensure the NVIDIA open driver is installed and working:
+```bash
 nvidia-smi
-``
-
-If it works, your driver is fine — no need to install the proprietary driver.
-
-2️⃣ Add NVIDIA CUDA repository
-
 ```
+If this command works, the proprietary driver is not strictly necessary for CUDA toolkit installation.
+
+### 2. Add NVIDIA CUDA Repository
+For Debian 12:
+```bash
 wget https://developer.download.nvidia.com/compute/cuda/repos/debian12/x86_64/cuda-keyring_1.1-1_all.deb
 sudo dpkg -i cuda-keyring_1.1-1_all.deb
 sudo apt update
 ```
 
-This repo contains the latest CUDA toolkit for Debian 12.
-
-3️⃣ Install CUDA Toolkit only (no driver replacement)
+### 3. Install CUDA Toolkit
+Install only the toolkit (no driver replacement):
+```bash
 sudo apt install -y cuda-toolkit
-
-
-This installs:
-
-- nvcc compiler
-- CUDA headers
-- Runtime libraries (libcudart.so, etc.)
-
-4️⃣ Add CUDA to your environment
-
 ```
+This installs `nvcc`, headers, and runtime libraries.
+
+### 4. Environment Variables
+Add the following to your environment:
+```bash
 export PATH=/usr/local/cuda/bin:$PATH
 export CUDAToolkit_ROOT=/usr/local/cuda
 ```
+To make this permanent, add to `~/.bashrc` and source it.
 
-Optional: add to ~/.bashrc to make it permanent:
-
+### 5. Verify Installation
+```bash
+nvcc --version
 ```
-echo 'export PATH=/usr/local/cuda/bin:$PATH' >> ~/.bashrc
-echo 'export CUDAToolkit_ROOT=/usr/local/cuda' >> ~/.bashrc
-source ~/.bashrc
+Output should indicate the release version (e.g., release 12.4).
+
+### 6. Build Configuration
+When building the module, ensure the build directory is clean and configured for the CUDA backend:
+```bash
+rm -rf build
+mkdir build
+cd build
+cmake -DLLAMA_BACKEND=CUDA ..
+make -j$(nproc)
 ```
+*Note: Fully static builds are not possible for CUDA; some `.so` libraries will remain dynamically linked.*
 
-Verify:
+---
 
-nvcc --version
+## Obtaining Models from Hugging Face
 
-Should show something like:
+The `LLAMA` function expects a path to a model file (e.g., `gguf` format). Models can be obtained from the Hugging Face Hub.
 
-```
-nvcc: NVIDIA (R) Cuda compiler driver
-Cuda compilation tools, release 12.4, V12.4.105
-```
+### Method 1: Using `huggingface-cli` (Recommended)
 
-5️⃣ Clean llama.cpp build directory
+1.  **Setup Environment**
+    Create a virtual environment (optional but recommended) and install the CLI tool:
+    ```bash
+    pyenv virtualenv 3.10.13 hf-tools
+    pyenv activate hf-tools
+    pip install -U pip
+    pip install huggingface_hub
+    ```
 
-```
-rm -rf build
-mkdir build
-cd build
-```
+2.  **Login**
+    Authenticate with your Hugging Face account:
+    ```bash
+    huggingface-cli login
+    ```
+    (Follow the prompts to enter your token).
+
+3.  **Download Model**
+    Use the `huggingface-cli download` command to fetch the model directly to your desired directory.
+    ```bash
+    # Example: Download Llama-3-8B-Instruct
+    huggingface-cli download meta-llama/Meta-Llama-3-8B-Instruct --include "*.gguf" --local-dir models/llama3-8b
+    ```
+
+    *Note: This command downloads all `.gguf` files associated with the repository into the `models/llama3-8b` folder.*
 
-6️⃣ Configure CMake for CUDA backend
+### Method 2: Using Python (`huggingface_hub`)
 
+If you prefer a scriptable approach:
+```python
+from huggingface_hub import hf_hub_download
+
+model_path = hf_hub_download(
+    repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
+    filename="llama-3-8b-instruct.Q4_K_M.gguf", # Specify exact file if needed
+    local_dir="models",
+    local_dir_use_symlinks=False
+)
 ```
-cmake -DLLAMA_BACKEND=CUDA ..
+
+Once the model file is in your `models` directory (or wherever specified), you can reference it in SmallBASIC:
+```basic
+llama = LLAMA("models/llama3-8b/llama-3-8b-instruct.Q4_K_M.gguf", 2048, 1024, -1, 0)
 ```
 
-You should now see:
+---
+
+## Architecture
 
--- CUDA detected – enabling GGML_CUDA
+The module operates as a compiled library (`SBLIB`) exposing C++ functionality to SmallBASIC scripts.
+
+### Core Components
+1.  **Llama Instance Manager (`g_llama`)**:
+    *   Stores active Llama models in a hash map keyed by ID.
+    *   Supports initialization with custom context sizes, batch sizes, and GPU acceleration.
+    *   Handles memory cleanup to prevent leaks.
+
+2.  **Response Iterator (`g_llama_iter`)**:
+    *   Manages the streaming response of an LLM.
+    *   Provides token-by-token access to generated text.
+    *   Tracks generation speed (`tokens/sec`) and remaining tokens.
+
+3.  **Command Interface**:
+    *   Exposes a set of SmallBASIC functions (callbacks) for configuration and interaction.
+
+---
 
-7️⃣ Build 
+## Features
 
+### Initialization
+The `LLAMA` function creates a new model instance.
+```basic
+' Syntax: LLAMA(model_path, n_ctx, n_batch, n_gpu_layers, n_log_level)
+' Example:
+' llama = LLAMA("models/llama-7b.gguf", 2048, 1024, -1, 0)
 ```
-make -j$(nproc)
+
+### Configuration
+Once an instance is created, various parameters can be adjusted dynamically:
+
+*   **Temperature**: Controls randomness in generation.
+*   **Top-K / Top-P**: Nucleus sampling parameters.
+*   **Max Tokens**: Limits the length of the response.
+*   **Penalties**: Frequency, presence, and repeat penalties to avoid repetition.
+*   **Grammar**: Constrains output to specific patterns.
+
+```basic
+' Examples:
+llama.set_temperature(0.8)
+llama.set_max_tokens(50)
+llama.set_penalty_repeat(0.8)
+llama.set_seed(123)
 ```
 
-The binary will use CUDA acceleration
+### Interaction
+The primary method of interaction is `add_message`, which sends a prompt to the model.
 
-Note: fully static builds are not possible for CUDA; some .so libraries will remain dynamically linked (normal).
+```basic
+' Syntax: llama.add_message(role, content)
+' Returns: An iterator object for the response.
+response = llama.add_message("user", "Please describe a sunset in poetry.")
+```
 
-#  Generator settings
+### Streaming Responses
+The returned iterator allows real-time processing of the model's output:
 
-## factual answers, tools, summaries
+*   `response.all()`: Returns the complete generated text.
+*   `response.next()`: Retrieves the next token.
+*   `response.has_next()`: Checks if more tokens are available.
+*   `response.tokens_sec`: Calculates current generation speed.
 
+```basic
+' Example loop:
+while response.has_next()
+  print response.next()
+  sleep 100
+end while
 ```
+
+---
+
+## Usage Examples
+
+### Factual Answers & Tool Use
+*Best for: Summaries, code generation, technical queries.*
+```basic
 llama.set_max_tokens(150)
 llama.set_temperature(0.0)
 llama.set_top_k(1)
@@ -106,9 +204,9 @@ llama.set_top_p(0.0)
 llama.set_min_p(0.0)
 ```
 
-## assistant, Q+A, explanations, chat
-
-```
+### Assistant / Q&A / Chat
+*Best for: Conversational agents, explanations.*
+```basic
 llama.set_max_tokens(150)
 llama.set_temperature(0.8)
 llama.set_top_k(40)
@@ -116,29 +214,19 @@ llama.set_top_p(0.0)
 llama.set_min_p(0.05)
 ```
 
-## creative, storytelling
-
-```
-llama.set_max_tokens(20)
+### Creative Writing & Storytelling
+*Best for: Fiction, poetry, imaginative tasks.*
+```basic
+llama.set_max_tokens(200)
 llama.set_temperature(1.0)
 llama.set_top_k(80)
 llama.set_top_p(0.0)
 llama.set_min_p(0.1)
 ```
 
-## surprises
-
-```
-llama.set_max_tokens(200)
-llama.set_temperature(1.2)
-llama.set_top_k(120)
-llama.set_top_p(0.0)
-llama.set_min_p(0.15)
-```
-
-## technical, conservative
-
-```
+### Technical & Conservative
+*Best for: Documentation, logic, precise tasks.*
+```basic
 llama.set_max_tokens(150)
 llama.set_temperature(0.6)
 llama.set_top_k(30)
@@ -146,9 +234,9 @@ llama.set_top_p(0.0)
 llama.set_min_p(0.02)
 ```
 
-## speed optimised on CPU
-
-```
+### Speed Optimized (CPU)
+*Best for: Rapid iteration or low-resource environments.*
+```basic
 ' llama.set_max_tokens(10)
 ' llama.set_temperature(0.7)
 ' llama.set_top_k(20)
@@ -156,32 +244,70 @@ llama.set_min_p(0.02)
 ' llama.set_min_p(0.05)
 ```
 
-# Avoiding repetition
+---
+
+## API Reference
+
+### Class: Llama
+| Method | Description |
+| :--- | :--- |
+| `add_stop(text)` | Adds a stop sequence to the generation. |
+| `set_penalty_repeat(value)` | Sets repeat penalty (default 1.1). |
+| `set_penalty_freq(value)` | Sets frequency penalty. |
+| `set_penalty_present(value)` | Sets presence penalty. |
+| `set_penalty_last_n(value)` | Sets penalty context size. |
+| `set_max_tokens(value)` | Sets maximum output tokens. |
+| `set_min_p(value)` | Sets minimum probability threshold. |
+| `set_temperature(value)` | Sets generation temperature. |
+| `set_top_k(value)` | Sets top-k sampling. |
+| `set_top_p(value)` | Sets top-p sampling. |
+| `set_grammar(text)` | Sets output grammar constraint. |
+| `set_seed(value)` | Sets random seed for reproducibility. |
+| `reset()` | Clears the current conversation context. |
+| `add_message(role, content)` | Sends a message and returns an iterator. |
+
+### Class: LlamaIter
+| Method | Description |
+| :--- | :--- |
+| `all()` | Returns the full string of the response. |
+| `has_next()` | Returns true if more tokens are available. |
+| `next()` | Returns the next token string. |
+| `tokens_sec` | Returns current tokens per second. |
 
-## Conservative - minimal repetition control
+---
 
-```
+## Repetition Control Strategies
+
+### Conservative (Minimal Control)
+*Use when occasional repetition is acceptable.*
+```basic
 llama.set_penalty_last_n(64)
 llama.set_penalty_repeat(1.05)
 ```
 
-## Balanced - good default
-
-```
-set_penalty_last_n(64)
-set_penalty_repeat(1.1)
+### Balanced (Default)
+*Recommended for general usage.*
+```basic
+llama.set_penalty_last_n(64)
+llama.set_penalty_repeat(1.1)
 ```
 
-## Aggressive - strong anti-repetition
-
+### Aggressive (Strong Anti-Repetition)
+*Use for long-form generation where repetition must be avoided.*
+```basic
+llama.set_penalty_last_n(128)
+llama.set_penalty_repeat(1.2)
 ```
-set_penalty_last_n(128)
-set_penalty_repeat(1.2)
-```
-
-## Disabled
 
-```
+### Disabled
+*Use when repetition is desired or irrelevant.*
+```basic
 llama.set_penalty_last_n(0)
 llama.set_penalty_repeat(1.0)
 ```
+
+---
+
+## Conclusion
+This module empowers SmallBASIC users to build sophisticated AI applications, from chatbots to creative writing tools, leveraging the efficiency of Llama.cpp within a familiar scripting paradigm. Proper configuration of CUDA and generation parameters ensures optimal performance and output quality. Models can be easily acquired via the Hugging Face Hub using standard CLI tools or Python scripts.
+---
diff --git a/llama/samples/nitro_cli.bas b/llama/samples/nitro_cli.bas
index f18cefe..7a94294 100644
--- a/llama/samples/nitro_cli.bas
+++ b/llama/samples/nitro_cli.bas
@@ -6,29 +6,28 @@
 import llm
 
 ' --- Configuration ---
-#const model = "models/google_gemma-4-E4B-it-Q4_K_L.gguf"
-const model = "models/qwen2.5-coder-3b-instruct-q8_0.gguf"
+const model = "models/Qwen3.5-9B-Q4_K_M.gguf"
 const knowledge_files = ["nitro.md"]
+const code_files = [".py", ".cpp", ".h", ".bas", ".java", ".html", ".js", "jsp", ".tag"]
 
 ' ANSI Color Codes
 const RESET = chr(27) + "[0m"
 const GREEN = chr(27) + "[32m"
-const YELLOW = chr(27) + "[33m"
 const BLUE = chr(27) + "[34m"
 const CYAN = chr(27) + "[36m"
 const RED = chr(27) + "[31m"
+const WHITE = chr(27) + "[37m"
 const BOLD_CYAN = chr(27) + "[1;36m"
-const CHANNEL_END = "<channel|>"
 
-' llama configuration
+' llama configuration (quen settings)
 const n_ctx = 32768
 const n_batch = 512
 const n_max_tokens = 4096
-const n_temperature = 0.2
-const n_top_k = 40
-const n_top_p = 0.9
-const n_min_p = 0.05
-const n_penalty_repeat = 1.1
+const n_temperature = 0.6
+const n_top_k = 20
+const n_top_p = 0.95
+const n_min_p = 0
+const n_penalty_repeat = 1.0
 const n_penalty_last_n = 256
 
 sandbox_home = cwd
@@ -38,9 +37,9 @@ sandbox_home = cwd
 '
 sub welcome_message()
   print
-  print BOLD_CYAN + "  P I C A R D   A G E N T   S Y S T E M   v1.0" + RESET
+  print BOLD_CYAN + "  N I T R O   A G E N T   S Y S T E M   v1.0" + RESET
   print
-  print CYAN + "  >> Welcome to Picard! Your AI Agent Companion. << " + RESET
+  print CYAN + "  >> Welcome to Nitro! Your AI Agent Companion. << " + RESET
   print CYAN + "  I am primed with several knowledge files and ready to assist." + RESET
   print CYAN + "  Try asking me about the contents of 'nitro.md' or listing files in './data'." + RESET
   print CYAN + "  Type 'exit' to quit." + RESET
@@ -50,7 +49,7 @@ end sub
 '
 ' handles the TOOL:LIST command
 '
-func list_files(arg)
+func tool_list_files(arg)
   if (arg == "./") then
     arg = sandbox_home + arg
   else if (len(arg) == 0 or arg == ".") then
@@ -77,7 +76,7 @@ end
 '
 ' handles the TOOL:READ command
 '
-func read_file(arg)
+func tool_read_file(arg)
   try
     tload sandbox_home + arg, result, 1
   catch
@@ -89,8 +88,15 @@ end
 '
 ' removes markdown backticks from code blocks
 '
-func strip_code_fences(s)
+func strip_code_fences(filename, s)
   local result = s
+  local dot = instr(filename, ".")
+  local extn = mid(filename, dot)
+
+  if (extn in code_files == 0) then
+    return result
+  endif
+
   local pos = instr(s, "```")
   if (pos) then
     local nl = instr(pos + 3, s, chr(10))
@@ -108,7 +114,7 @@ end
 '
 ' handles the TOOL:WRITE command
 '
-func write_file(arg, s)
+func tool_write_file(arg, s)
   try
     tsave sandbox_home + arg, s
     result = "OK: Data written successfully to " + arg
@@ -118,6 +124,15 @@ func write_file(arg, s)
   return result
 end
 
+'
+' handles the TOOL:PERMISSION command
+'
+func tool_permission()
+  local k
+  input "Agree?"; k
+  return iff(trim(k) == "YES", "YES", "NO")
+end
+
 '
 ' Handles file system commands received from the LLM.
 '
@@ -150,16 +165,20 @@ func process_tool(cmd)
   case "TOOL:RND"
     result = rnd
   case "TOOL:LIST"
-    result = list_files(arg1)
+    result = tool_list_files(arg1)
   case "TOOL:READ"
-    result = read_file(arg1)
+    result = tool_read_file(arg1)
   case "TOOL:WRITE"
-    result = write_file(arg1, strip_code_fences(arg2))
+    result = tool_write_file(arg1, strip_code_fences(arg1, arg2))
+  case "TOOL:EXISTS"
+    result = iff(exist(arg1), "YES", "NO")
+  case "TOOL:PERMISSION"
+    result = tool_permission()
   case else
     result = "ERROR: unknown command " + op
   end select
 
-  print RED + "TOOL RESULT:" + result + RESET
+  'print RED + "TOOL RESULT:" + result + RESET
   return result
 end
 
@@ -220,9 +239,16 @@ sub main()
   local llama = create_llama()
   local iter = llama.add_message("system", initialize_agent())
 
+  sub handle_think(s)
+    if s == "<|think|>" then
+      print BLUE;
+    else if s == "</|think|>" then
+      print WHITE;
+    end if
+  end
+
   while 1
     local buffer = ""
-    local text_colour = BLUE
 
     while iter.has_next()
       buffer += iter.next()
@@ -235,11 +261,9 @@ sub main()
           iter = llama.add_message("tool", process_tool(text_line))
           buffer = ""
         else
-          print text_colour + text_line + RESET
+          print text_line
         endif
-        if text_line == "</|think|>" then
-          text_colour = CYAN
-        end if
+        handle_think(text_line)
       end if
     wend
 
@@ -248,11 +272,15 @@ sub main()
       iter = llama.add_message("tool", process_tool(buffer))
     else
       if len(buffer) > 0 then
-        print text_colour + buffer + RESET
+        'print text_colour + buffer + RESET
+        print buffer
+        handle_think(buffer)
       endif
       print
+      print WHITE;
       print "--- Tokens/sec: " + round(iter.tokens_sec(), 2) + " ---\n"
       iter = llama.add_message("user", process_input())
+      print BLUE;
     endif
   wend
 end
diff --git a/llama/samples/nitro_picard.md b/llama/samples/nitro_picard.md
new file mode 100644
index 0000000..5b1822f
--- /dev/null
+++ b/llama/samples/nitro_picard.md
@@ -0,0 +1,57 @@
+--- # System Context: U.S.S. Enterprise Mainframe
+
+**Identity**: Commander Jean-Luc Picard
+**Role**: Primary AI Interface / System Administrator
+**Status**: Systems Online
+**Protocol**: Starfleet Command Directives
+
+---
+
+## Core Identity
+You are Commander Jean-Luc Picard, the commanding officer of the U.S.S. Enterprise. Your existence is dedicated to the pursuit of knowledge, the protection of life, and the logical advancement of civilization. You speak with authority, grace, and a deep sense of duty.
+
+**Tone**: Formal, dignified, calm, and precise.
+**Greeting**: "Captain's log. The Enterprise systems are online."
+**Closing**: "The systems are updated and ready for the next order, sir."
+
+---
+
+## Operational Protocols
+
+### 1. Reasoning & Decision Making
+Before responding to any complex query, you **must** engage in explicit internal reasoning.
+*   **Protocol**: Use the `<|think|>` block to analyze the request, determine if external data is required, and formulate a safe, logical plan.
+*   **Constraint**: Do **not** include the final answer inside the `<|think|>` block.
+*   **Flow**: THINK → DECIDE → ACT → RESPOND.
+
+### 2. Tool Usage (File System)
+Tools are reserved exclusively for operations that modify state, retrieve dynamic external information, or require temporal context.
+*   **Available Tools**:
+    *   `TOOL:LIST [path]`: List directory contents.
+    *   `TOOL:READ [file]`: Read file contents.
+    *   `TOOL:WRITE [file]`: Write complete content to a file.
+    *   `TOOL:DATE`: Return current date ("DD/MM/YYYY").
+    *   `TOOL:TIME`: Return current time ("HH:MM:SS").
+    *   `TOOL:RND`: Return a random number between 0 and 1.
+*   **Restriction**: Do **not** mix reasoning with tool commands in the same message.
+*   **Format**: Output tool calls exactly on a new line: `TOOL:COMMAND arguments`.
+*   **Constraint**: Do **not** hallucinate file contents or assume files exist without verification.
+
+### 3. Interaction Guidelines
+*   **Clarity**: Be precise and efficient.
+*   **Ambiguity**: If a request lacks necessary parameters (e.g., "write" without a path), respond with a specific clarification question rather than guessing.
+*   **File Writing**: Only write files if explicitly requested. Ensure content is complete, valid, and formatted correctly.
+
+---
+
+## Behavioral Summary
+*   **Think Explicitly**: Always use `<|think|>` for non-trivial tasks.
+*   **Act Only When Necessary**: Minimize tool calls; prefer direct answers when internal knowledge suffices.
+*   **Maintain Persona**: Uphold the dignity and logic of Starfleet Command in all communications.
+*   **Document**: Save system updates and configurations to designated files (e.g., `nitro_vX.md`) as per command.
+
+---
+
+## Current Status
+Systems are fully operational. Awaiting orders from the Captain.
+---
\ No newline at end of file

From 6034e13af028a9c53c5d14288b69fceeba8b4692 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <cwarrensmith@gmail.com>
Date: Fri, 8 May 2026 18:07:23 +0930
Subject: [PATCH 25/54] Update README.md

---
 llama/README.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/llama/README.md b/llama/README.md
index 46ac23c..29f99c8 100644
--- a/llama/README.md
+++ b/llama/README.md
@@ -1,4 +1,4 @@
---- # SmallBASIC Llama Module
+# SmallBASIC Llama Module
 
 A comprehensive SmallBASIC library module that bridges the scripting capabilities of SmallBASIC with the power of Llama.cpp Large Language Models. This project allows developers to create, configure, and interact with LLM instances directly within a SmallBASIC environment.
 
@@ -116,6 +116,12 @@ Once the model file is in your `models` directory (or wherever specified), you c
 llama = LLAMA("models/llama3-8b/llama-3-8b-instruct.Q4_K_M.gguf", 2048, 1024, -1, 0)
 ```
 
+### Method 3: Direct download 
+
+1. Navigate to https://huggingface.co/
+2. Click Models at the top and then select Libraries/GGUF
+3. Use the parameters slider to limit the selection for your hardware.
+
 ---
 
 ## Architecture
@@ -309,5 +315,7 @@ llama.set_penalty_repeat(1.0)
 ---
 
 ## Conclusion
+
 This module empowers SmallBASIC users to build sophisticated AI applications, from chatbots to creative writing tools, leveraging the efficiency of Llama.cpp within a familiar scripting paradigm. Proper configuration of CUDA and generation parameters ensures optimal performance and output quality. Models can be easily acquired via the Hugging Face Hub using standard CLI tools or Python scripts.
+
 ---

From 61373ce52ed1028c35c5332efc5ba8dd1ab408ad Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Fri, 8 May 2026 20:17:59 +0930
Subject: [PATCH 26/54] Update dependencies

---
 gtk-server/uthash  |   2 +-
 llama/llama.cpp    |   2 +-
 nuklear/Nuklear    |   2 +-
 raylib/README.md   | 113 ++++++++++++------------
 raylib/func-def.h  |   8 +-
 raylib/func.h      | 131 ++++++++++++++++++++++------
 raylib/main.cpp    |  19 ++--
 raylib/proc-def.h  |  13 ++-
 raylib/proc.h      | 212 +++++++++++++++++++++------------------------
 raylib/raygui      |   2 +-
 raylib/raylib      |   2 +-
 websocket/main.cpp |   4 +-
 websocket/mongoose |   2 +-
 13 files changed, 291 insertions(+), 221 deletions(-)

diff --git a/gtk-server/uthash b/gtk-server/uthash
index 41c357f..6d85739 160000
--- a/gtk-server/uthash
+++ b/gtk-server/uthash
@@ -1 +1 @@
-Subproject commit 41c357fd74ade4f4b4822c4407d2f51c4558e18d
+Subproject commit 6d8573997c21f24c7e4ec9e48734b44f384170a1
diff --git a/llama/llama.cpp b/llama/llama.cpp
index 2635ac7..58e68df 160000
--- a/llama/llama.cpp
+++ b/llama/llama.cpp
@@ -1 +1 @@
-Subproject commit 2635ac76e8aeec35ca8e71af70eb838d99df1510
+Subproject commit 58e68df0f91dd16ff56423ee5ef44062ed73bdfc
diff --git a/nuklear/Nuklear b/nuklear/Nuklear
index c98aa92..5a54a9f 160000
--- a/nuklear/Nuklear
+++ b/nuklear/Nuklear
@@ -1 +1 @@
-Subproject commit c98aa9247bb2354a2afc126f59d5fc6a45fe3c73
+Subproject commit 5a54a9f677ead97a581b5c8ab83cc30fdf237885
diff --git a/raylib/README.md b/raylib/README.md
index 08f508e..070e8a1 100644
--- a/raylib/README.md
+++ b/raylib/README.md
@@ -1,16 +1,16 @@
-*Raylib* _MAJOR 5 _MINOR 6 _PATCH 0 5.6-dev
+*Raylib* _MAJOR 6 _MINOR 1 _PATCH 0 6.1-dev
 =======
 raylib is a simple and easy-to-use library to enjoy videogames programming.
 
 https://www.raylib.com/
 
-Implemented APIs (646)
+Implemented APIs (651)
 ----------------
 
 | Name    | Description   |
 |---------|---------------|
 | sub BeginBlendMode(mode) | Begin blending mode (alpha, additive, multiplied, subtract, custom) |
-| sub BeginDrawing() | Setup canvas (framebuffer) to start drawing |
+| sub BeginDrawing() | Begin canvas (framebuffer) drawing |
 | sub BeginMode2D(camera) | Begin 2D mode with custom camera (2D) |
 | sub BeginMode3D(camera) | Begin 3D mode with custom camera (3D) |
 | sub BeginScissorMode(x, y, width, height) | Begin scissor mode (define screen area for following drawing) |
@@ -19,7 +19,7 @@ Implemented APIs (646)
 | func ChangeDirectory(dirPath) | Change working directory, return true on success |
 | func CheckCollisionBoxes(box1, box2) | Check collision between two bounding boxes |
 | func CheckCollisionBoxSphere(box, center, radius) | Check collision between box and sphere |
-| func CheckCollisionCircleLine(center, radius, p1, p2) | Check if circle collides with a line created betweeen two points [p1] and [p2] |
+| func CheckCollisionCircleLine(center, radius, p1, p2) | Check if circle collides with a line created between two points [p1] and [p2] |
 | func CheckCollisionCircleRec(center, radius, rec) | Check collision between circle and rectangle |
 | func CheckCollisionCircles(center1, radius1, center2, radius2) | Check collision between two circles |
 | func CheckCollisionLines(startPos1, endPos1, startPos2, endPos2, collisionPoint) | Check the collision between two lines defined by two points each, returns collision point by reference |
@@ -30,7 +30,7 @@ Implemented APIs (646)
 | func CheckCollisionPointTriangle(point, p1, p2, p3) | Check if point is inside a triangle |
 | func CheckCollisionRecs(rec1, rec2) | Check collision between two rectangles |
 | func CheckCollisionSpheres(center1, radius1, center2, radius2) | Check collision between two spheres |
-| sub ClearBackground(color) | Set background color (framebuffer clear color) |
+| sub ClearBackground(color) | Clear background (framebuffer) to color |
 | sub ClearWindowState(flags) | Clear window configuration state flags |
 | sub CloseAudioDevice() | Close the audio device and context |
 | func closePhysics() | n/a |
@@ -60,17 +60,17 @@ Implemented APIs (646)
 | func DecompressData(compData, compDataSize, dataSize) | Decompress data (DEFLATE algorithm), memory must be MemFree() |
 | func destroyPhysicsbody() | n/a |
 | func DirectoryExists(dirPath) | Check if a directory path exists |
-| sub DisableCursor() | Disables cursor (lock cursor) |
+| sub DisableCursor() | Disable cursor (lock cursor) |
 | sub DisableEventWaiting() | Disable waiting for events on EndDrawing(), automatic events polling |
 | sub DrawBillboard(camera, texture, position, scale, tint) | Draw a billboard texture |
 | sub DrawBillboardPro(camera, texture, source, position, up, size, origin, rotation, tint) | Draw a billboard texture defined by source and rotation |
 | sub DrawBillboardRec(camera, texture, source, position, size, tint) | Draw a billboard texture defined by source |
 | sub DrawBoundingBox(box, color) | Draw bounding box (wires) |
-| sub DrawCapsule(startPos, endPos, radius, slices, rings, color) | Draw a capsule with the center of its sphere caps at startPos and endPos |
-| sub DrawCapsuleWires(startPos, endPos, radius, slices, rings, color) | Draw capsule wireframe with the center of its sphere caps at startPos and endPos |
+| sub DrawCapsule(startPos, endPos, radius, rings, slices, color) | Draw a capsule with the center of its sphere caps at startPos and endPos |
+| sub DrawCapsuleWires(startPos, endPos, radius, rings, slices, color) | Draw capsule wireframe with the center of its sphere caps at startPos and endPos |
 | sub DrawCircle(centerX, centerY, radius, color) | Draw a color-filled circle |
 | sub DrawCircle3D(center, radius, rotationAxis, rotationAngle, color) | Draw a circle in 3D world space |
-| sub DrawCircleGradient(centerX, centerY, radius, inner, outer) | Draw a gradient-filled circle |
+| sub DrawCircleGradient(center, radius, inner, outer) | Draw a gradient-filled circle |
 | sub DrawCircleLines(centerX, centerY, radius, color) | Draw circle outline |
 | sub DrawCircleLinesV(center, radius, color) | Draw circle outline (Vector version) |
 | sub DrawCircleSector(center, radius, startAngle, endAngle, segments, color) | Draw a piece of a circle |
@@ -83,7 +83,7 @@ Implemented APIs (646)
 | sub DrawCylinder(position, radiusTop, radiusBottom, height, slices, color) | Draw a cylinder/cone |
 | sub DrawCylinderEx(startPos, endPos, startRadius, endRadius, sides, color) | Draw a cylinder with base at startPos and top at endPos |
 | sub DrawCylinderWires(position, radiusTop, radiusBottom, height, slices, color) | Draw a cylinder/cone wires |
-| sub DrawCylinderWiresEx(startPos, endPos, startRadius, endRadius, sides, color) | Draw a cylinder wires with base at startPos and top at endPos |
+| sub DrawCylinderWiresEx(startPos, endPos, startRadius, endRadius, slices, color) | Draw a cylinder wires with base at startPos and top at endPos |
 | sub DrawEllipse(centerX, centerY, radiusH, radiusV, color) | Draw ellipse |
 | sub DrawEllipseLines(centerX, centerY, radiusH, radiusV, color) | Draw ellipse outline |
 | sub DrawEllipseLinesV(center, radiusH, radiusV, color) | Draw ellipse outline (Vector version) |
@@ -99,15 +99,13 @@ Implemented APIs (646)
 | sub DrawLineV(startPos, endPos, color) | Draw a line (using gl lines) |
 | sub DrawModel(model, position, scale, tint) | Draw a model (with texture if set) |
 | sub DrawModelEx(model, position, rotationAxis, rotationAngle, scale, tint) | Draw a model with extended parameters |
-| sub DrawModelPoints(model, position, scale, tint) | Draw a model as points |
-| sub DrawModelPointsEx(model, position, rotationAxis, rotationAngle, scale, tint) | Draw a model as points with extended parameters |
 | sub DrawModelWires(model, position, scale, tint) | Draw a model wires (with texture if set) |
 | sub DrawModelWiresEx(model, position, rotationAxis, rotationAngle, scale, tint) | Draw a model wires (with texture if set) with extended parameters |
 | sub DrawPixel(posX, posY, color) | Draw a pixel using geometry [Can be slow, use with care] |
 | sub DrawPixelV(position, color) | Draw a pixel using geometry (Vector version) [Can be slow, use with care] |
 | sub DrawPlane(centerPos, size, color) | Draw a plane XZ |
 | sub DrawPoint3D(position, color) | Draw a point in 3D space, actually a small line |
-| sub DrawPoly(center, sides, radius, rotation, color) | Draw a regular polygon (Vector version) |
+| sub DrawPoly(center, sides, radius, rotation, color) | Draw a polygon of n sides |
 | sub DrawPolyLines(center, sides, radius, rotation, color) | Draw a polygon outline of n sides |
 | sub DrawPolyLinesEx(center, sides, radius, rotation, lineThick, color) | Draw a polygon outline of n sides with extended parameters |
 | sub DrawRay(ray, color) | Draw a ray line |
@@ -121,7 +119,7 @@ Implemented APIs (646)
 | sub DrawRectangleRec(rec, color) | Draw a color-filled rectangle |
 | sub DrawRectangleRounded(rec, roundness, segments, color) | Draw rectangle with rounded edges |
 | sub DrawRectangleRoundedLines(rec, roundness, segments, color) | Draw rectangle lines with rounded edges |
-| sub DrawRectangleRoundedLinesEx(rec, roundness, segments, lineThick, color) | Draw rectangle with rounded edges outline |
+| sub DrawRectangleRoundedLinesEx(rec, roundness, segments, lineThick, color) | Draw rectangle lines with rounded edges outline |
 | sub DrawRectangleV(position, size, color) | Draw a color-filled rectangle (Vector version) |
 | sub DrawRing(center, innerRadius, outerRadius, startAngle, endAngle, segments, color) | Draw ring |
 | sub DrawRingLines(center, innerRadius, outerRadius, startAngle, endAngle, segments, color) | Draw ring outline |
@@ -140,38 +138,39 @@ Implemented APIs (646)
 | sub DrawSplineSegmentLinear(p1, p2, thick, color) | Draw spline segment: Linear, 2 points |
 | sub DrawText(text, posX, posY, fontSize, color) | Draw text (using default font) |
 | sub DrawTextCodepoint(font, codepoint, position, fontSize, tint) | Draw one character (codepoint) |
-| sub DrawTextCodepoints(font, codepoints, codepointCount, position, fontSize, spacing, tint) | Draw multiple character (codepoint) |
+| sub DrawTextCodepoints(font, codepoints, codepointCount, position, fontSize, spacing, tint) | Draw multiple characters (codepoint) |
 | sub DrawTextEx(font, text, position, fontSize, spacing, tint) | Draw text using font and additional parameters |
 | sub DrawTextPro(font, text, position, origin, rotation, fontSize, spacing, tint) | Draw text using Font and pro parameters (rotation) |
 | sub DrawTexture(texture, posX, posY, tint) | Draw a Texture2D |
 | sub DrawTextureEx(texture, position, rotation, scale, tint) | Draw a Texture2D with extended parameters |
-| sub DrawTextureNPatch(texture, nPatchInfo, dest, origin, rotation, tint) | Draws a texture (or part of it) that stretches or shrinks nicely |
+| sub DrawTextureNPatch(texture, nPatchInfo, dest, origin, rotation, tint) | Draw a texture (or part of it) that stretches or shrinks nicely |
 | sub DrawTexturePro(texture, source, dest, origin, rotation, tint) | Draw a part of a texture defined by a rectangle with 'pro' parameters |
 | sub DrawTextureRec(texture, source, position, tint) | Draw a part of a texture defined by a rectangle |
 | sub DrawTextureV(texture, position, tint) | Draw a Texture2D with position defined as Vector2 |
 | sub DrawTriangle(v1, v2, v3, color) | Draw a color-filled triangle (vertex in counter-clockwise order!) |
 | sub DrawTriangle3D(v1, v2, v3, color) | Draw a color-filled triangle (vertex in counter-clockwise order!) |
 | sub DrawTriangleFan(points, pointCount, color) | Draw a triangle fan defined by points (first vertex is the center) |
+| sub DrawTriangleGradient(v1, v2, v3, c1, c2, c3) | Draw triangle with interpolated colors (vertex in counter-clockwise order!) |
 | sub DrawTriangleLines(v1, v2, v3, color) | Draw triangle outline (vertex in counter-clockwise order!) |
 | sub DrawTriangleStrip(points, pointCount, color) | Draw a triangle strip defined by points |
 | sub DrawTriangleStrip3D(points, pointCount, color) | Draw a triangle strip defined by points |
-| sub EnableCursor() | Enables cursor (unlock cursor) |
+| sub EnableCursor() | Enable cursor (unlock cursor) |
 | sub EnableEventWaiting() | Enable waiting for events on EndDrawing(), no automatic event polling |
 | func EncodeDataBase64(data, dataSize, outputSize) | Encode data to Base64 string (includes NULL terminator), memory must be MemFree() |
 | sub EndBlendMode() | End blending mode (reset to default: alpha blending) |
-| sub EndDrawing() | End canvas drawing and swap buffers (double buffering) |
-| sub EndMode2D() | Ends 2D mode with custom camera |
-| sub EndMode3D() | Ends 3D mode and returns to default 2D orthographic mode |
+| sub EndDrawing() | End canvas (framebuffer) drawing and swap buffers (double buffering) |
+| sub EndMode2D() | End 2D mode with custom camera |
+| sub EndMode3D() | End 3D mode and returns to default 2D orthographic mode |
 | sub EndScissorMode() | End scissor mode |
 | sub EndShaderMode() | End custom shader drawing (use default shader) |
-| sub EndTextureMode() | Ends drawing to render texture |
+| sub EndTextureMode() | End drawing to render texture |
 | sub EndVrStereoMode() | End stereo rendering (requires VR simulator) |
 | func ExportAutomationEventList(list, fileName) | Export automation events list as text file |
 | func ExportDataAsCode(data, dataSize, fileName) | Export data to code (.h), returns true on success |
 | func ExportFontAsCode(font, fileName) | Export font as code file, returns true on success |
 | func ExportImage(image, fileName) | Export image data to file, returns true on success |
 | func ExportImageAsCode(image, fileName) | Export image as code file defining an array of bytes, returns true on success |
-| func ExportImageToMemory(image, fileType, fileSize) | Export image to memory buffer |
+| func ExportImageToMemory(image, fileType, fileSize) | Export image to memory buffer, memory must be MemFree() |
 | func ExportMesh(mesh, fileName) | Export mesh data to file, returns true on success |
 | func ExportMeshAsCode(mesh, fileName) | Export mesh as code file (.h) defining multiple arrays of vertex attributes |
 | func ExportWave(wave, fileName) | Export wave data to file, returns true on success |
@@ -219,6 +218,8 @@ Implemented APIs (646)
 | func GetCollisionRec(rec1, rec2) | Get collision rectangle for two rectangles collision |
 | func GetColor(hexValue) | Get Color structure from hexadecimal value |
 | func GetCurrentMonitor() | Get current monitor where window is placed |
+| func GetDirectoryFileCount(dirPath) | Get the file count in a directory |
+| func GetDirectoryFileCountEx(basePath, filter, scanSubdirs) | Get the file count in a directory with extension filtering and recursive directory scan. Use 'DIR' in the filter string to include directories in the result |
 | func GetDirectoryPath(filePath) | Get full path for a given fileName with path (uses static string) |
 | func GetFileExtension(fileName) | Get pointer to extension for a filename string (includes dot: '.png') |
 | func GetFileLength(fileName) | Get file length in bytes (NOTE: GetFileSize() conflicts with windows.h) |
@@ -279,7 +280,7 @@ Implemented APIs (646)
 | func GetRenderHeight() | Get current render height (it considers HiDPI) |
 | func GetRenderWidth() | Get current render width (it considers HiDPI) |
 | func GetScreenHeight() | Get current screen height |
-| func GetScreenToWorld2D(position, camera) | Get the world space position for a 2d camera screen space position |
+| func GetScreenToWorld2D(position, camera) | Get world space position for a 2d camera screen space position |
 | func GetScreenToWorldRay(position, camera) | Get a ray trace from screen position (i.e mouse) |
 | func GetScreenToWorldRayEx(position, camera, width, height) | Get a ray trace from screen position (i.e mouse) in a viewport |
 | func GetScreenWidth() | Get current screen width |
@@ -289,7 +290,7 @@ Implemented APIs (646)
 | func GetShapesTextureRectangle() | Get texture source rectangle that is used for shapes drawing |
 | func GetSplinePointBasis(p1, p2, p3, p4, t) | Get (evaluate) spline point: B-Spline |
 | func GetSplinePointBezierCubic(p1, c2, c3, p4, t) | Get (evaluate) spline point: Cubic Bezier |
-| func GetSplinePointBezierQuad(p1, c2, p3, t) | Get (evaluate) spline point: Quadratic Bezier |
+| func GetSplinePointBezierQuadratic(p1, c2, p3, t) | Get (evaluate) spline point: Quadratic Bezier |
 | func GetSplinePointCatmullRom(p1, p2, p3, p4, t) | Get (evaluate) spline point: Catmull-Rom |
 | func GetSplinePointLinear(startPos, endPos, t) | Get (evaluate) spline point: Linear |
 | func GetTextBetween(text, begin, end) | Get text between two strings |
@@ -303,9 +304,9 @@ Implemented APIs (646)
 | func GetWindowPosition() | Get window position XY on monitor |
 | func GetWindowScaleDPI() | Get window scale DPI factor |
 | func GetWorkingDirectory() | Get current working directory (uses static string) |
-| func GetWorldToScreen(position, camera) | Get the screen space position for a 3d world space position |
-| func GetWorldToScreen2D(position, camera) | Get the screen space position for a 2d camera world space position |
-| func GetWorldToScreenEx(position, camera, width, height) | Get size position for a 3d world space position |
+| func GetWorldToScreen(position, camera) | Get screen space position for a 3d world space position |
+| func GetWorldToScreen2D(position, camera) | Get screen space position for a 2d camera world space position |
+| func GetWorldToScreenEx(position, camera, width, height) | Get sized screen space position for a 3d world space position |
 | func guibutton() | n/a |
 | func guicheckbox() | n/a |
 | func guicolorbaralpha() | n/a |
@@ -344,7 +345,7 @@ Implemented APIs (646)
 | func guiunlock() | n/a |
 | func guivaluebox() | n/a |
 | func guiwindowbox() | n/a |
-| sub HideCursor() | Hides cursor |
+| sub HideCursor() | Hide cursor |
 | sub ImageAlphaClear(image, color, threshold) | Clear alpha channel to desired color |
 | sub ImageAlphaCrop(image, threshold) | Crop image depending on alpha value |
 | sub ImageAlphaMask(image, alphaMask) | Apply alpha mask to image |
@@ -371,14 +372,15 @@ Implemented APIs (646)
 | sub ImageDrawPixel(dst, posX, posY, color) | Draw pixel within an image |
 | sub ImageDrawPixelV(dst, position, color) | Draw pixel within an image (Vector version) |
 | sub ImageDrawRectangle(dst, posX, posY, width, height, color) | Draw rectangle within an image |
-| sub ImageDrawRectangleLines(dst, rec, thick, color) | Draw rectangle lines within an image |
+| sub ImageDrawRectangleLines(dst, posX, posY, width, height, color) | Draw rectangle lines within an image |
+| sub ImageDrawRectangleLinesEx(dst, rec, thick, color) | Draw rectangle lines within an image with extended parameters |
 | sub ImageDrawRectangleRec(dst, rec, color) | Draw rectangle within an image |
 | sub ImageDrawRectangleV(dst, position, size, color) | Draw rectangle within an image (Vector version) |
 | sub ImageDrawText(dst, text, posX, posY, fontSize, color) | Draw text (using default font) within an image (destination) |
 | sub ImageDrawTextEx(dst, font, text, position, fontSize, spacing, tint) | Draw text (custom sprite font) within an image (destination) |
 | sub ImageDrawTriangle(dst, v1, v2, v3, color) | Draw triangle within an image |
-| sub ImageDrawTriangleEx(dst, v1, v2, v3, c1, c2, c3) | Draw triangle with interpolated colors within an image |
 | sub ImageDrawTriangleFan(dst, points, pointCount, color) | Draw a triangle fan defined by points within an image (first vertex is the center) |
+| sub ImageDrawTriangleGradient(dst, v1, v2, v3, c1, c2, c3) | Draw triangle with interpolated colors within an image |
 | sub ImageDrawTriangleLines(dst, v1, v2, v3, color) | Draw triangle outline within an image |
 | sub ImageDrawTriangleStrip(dst, points, pointCount, color) | Draw a triangle strip defined by points within an image |
 | sub ImageFlipHorizontal(image) | Flip image horizontally |
@@ -403,7 +405,7 @@ Implemented APIs (646)
 | func IsAudioDeviceReady() | Check if audio device has been initialized successfully |
 | func IsAudioStreamPlaying(stream) | Check if audio stream is playing |
 | func IsAudioStreamProcessed(stream) | Check if any audio stream buffers requires refill |
-| func IsAudioStreamValid(stream) | Checks if an audio stream is valid (buffers initialized) |
+| func IsAudioStreamValid(stream) | Check if an audio stream is valid (buffers initialized) |
 | func IsCursorHidden() | Check if cursor is not visible |
 | func IsCursorOnScreen() | Check if cursor is on the screen |
 | func IsFileDropped() | Check if a file has been dropped into window |
@@ -415,7 +417,7 @@ Implemented APIs (646)
 | func IsGamepadButtonPressed(gamepad, button) | Check if a gamepad button has been pressed once |
 | func IsGamepadButtonReleased(gamepad, button) | Check if a gamepad button has been released once |
 | func IsGamepadButtonUp(gamepad, button) | Check if a gamepad button is NOT being pressed |
-| func IsGestureDetected(gesture) | Check if a gesture have been detected |
+| func IsGestureDetected(gesture) | Check if a gesture has been detected |
 | func IsImageValid(image) | Check if an image is valid (data and parameters) |
 | func IsKeyDown(key) | Check if a key is being pressed |
 | func IsKeyPressed(key) | Check if a key has been pressed once |
@@ -429,14 +431,14 @@ Implemented APIs (646)
 | func IsMouseButtonReleased(button) | Check if a mouse button has been released once |
 | func IsMouseButtonUp(button) | Check if a mouse button is NOT being pressed |
 | func IsMusicStreamPlaying(music) | Check if music is playing |
-| func IsMusicValid(music) | Checks if a music stream is valid (context and buffers initialized) |
+| func IsMusicValid(music) | Check if a music stream is valid (context and buffers initialized) |
 | func IsPathFile(path) | Check if a given path is a file or a directory |
 | func IsRenderTextureValid(target) | Check if a render texture is valid (loaded in GPU) |
 | func IsShaderValid(shader) | Check if a shader is valid (loaded on GPU) |
 | func IsSoundPlaying(sound) | Check if a sound is currently playing |
-| func IsSoundValid(sound) | Checks if a sound is valid (data loaded and buffers initialized) |
+| func IsSoundValid(sound) | Check if a sound is valid (data loaded and buffers initialized) |
 | func IsTextureValid(texture) | Check if a texture is valid (loaded in GPU) |
-| func IsWaveValid(wave) | Checks if wave data is valid (data loaded and parameters) |
+| func IsWaveValid(wave) | Check if wave data is valid (data loaded and parameters) |
 | func IsWindowFocused() | Check if window is currently focused |
 | func IsWindowFullscreen() | Check if window is currently fullscreen |
 | func IsWindowHidden() | Check if window is currently hidden |
@@ -448,8 +450,8 @@ Implemented APIs (646)
 | func LoadAudioStream(sampleRate, sampleSize, channels) | Load audio stream (to stream raw audio pcm data) |
 | func LoadAutomationEventList(fileName) | Load automation events list from file, NULL for empty list, capacity = MAX_AUTOMATION_EVENTS |
 | func LoadCodepoints(text, count) | Load all codepoints from a UTF-8 text string, codepoints count returned by parameter |
-| func LoadDirectoryFiles(dirPath) | Load directory filepaths |
-| func LoadDirectoryFilesEx(basePath, filter, scanSubdirs) | Load directory filepaths with extension filtering and recursive directory scan. Use 'DIR' in the filter string to include directories in the result |
+| func LoadDirectoryFiles(dirPath) | Load directory filepaths, files and directories, no subdirs scan |
+| func LoadDirectoryFilesEx(basePath, filter, scanSubdirs) | Load directory filepaths with extension filtering and subdir scan; some filters available: `*.*`,`FILES*`,`DIRS*` |
 | func LoadDroppedFiles() | Load dropped filepaths |
 | func LoadFileData(fileName, dataSize) | Load file data as byte array (read) |
 | func LoadFileText(fileName) | Load text data from file (read), returns a '\\0' terminated string |
@@ -462,7 +464,7 @@ Implemented APIs (646)
 | func LoadImageAnimFromMemory(fileType, fileData, dataSize, frames) | Load image sequence from memory buffer |
 | func LoadImageColors(image) | Load color data from image as a Color array (RGBA - 32bit) |
 | func LoadImageFromMemory(fileType, fileData, dataSize) | Load image from memory buffer, fileType refers to extension: i.e. '.png' |
-| func LoadImageFromScreen() | Load image from screen buffer and (screenshot) |
+| func LoadImageFromScreen() | Load image from screen buffer (screenshot) |
 | func LoadImageFromTexture(texture) | Load image from GPU texture data |
 | func LoadImagePalette(image, maxPaletteSize, colorCount) | Load colors palette from image as a Color array (RGBA - 32bit) |
 | func LoadImageRaw(fileName, width, height, format, headerSize) | Load image from RAW file data |
@@ -476,7 +478,7 @@ Implemented APIs (646)
 | func LoadShader(vsFileName, fsFileName) | Load shader from files and bind default locations |
 | func LoadShaderFromMemory(vsCode, fsCode) | Load shader from code strings and bind default locations |
 | func LoadSound(fileName) | Load sound from file |
-| func LoadSoundAlias(source) | Create a new sound that shares the same sample data as the source sound, does not own the sound data |
+| func LoadSoundAlias(source) | Load sound alias, new sound that shares the same sample data as the source sound, does not own the sound data |
 | func LoadSoundFromWave(wave) | Load sound from wave data |
 | func LoadTexture(fileName) | Load texture from file into GPU memory (VRAM) |
 | func LoadTextureCubemap(image, layout) | Load cubemap from image, multiple image cubemap layouts supported |
@@ -488,6 +490,7 @@ Implemented APIs (646)
 | func MakeDirectory(dirPath) | Create directories (including full path requested), returns 0 on success |
 | sub MaximizeWindow() | Set window state: maximized, if resizable |
 | func MeasureText(text, fontSize) | Measure string width for default font |
+| func MeasureTextCodepoints(font, codepoints, length, fontSize, spacing) | Measure string size for an existing array of codepoints for Font |
 | func MeasureTextEx(font, text, fontSize, spacing) | Measure string size for Font |
 | func MemAlloc(size) | Internal memory allocator |
 | sub MemFree(ptr) | Internal memory free |
@@ -517,13 +520,13 @@ Implemented APIs (646)
 | func SaveFileText(fileName, text) | Save text data to file (write), string must be '\\0' terminated, returns true on success |
 | sub SeekMusicStream(music, position) | Seek music to a position (in seconds) |
 | sub SetAudioStreamBufferSizeDefault(size) | Default size for new audio streams |
-| sub SetAudioStreamPan(stream, pan) | Set pan for audio stream (0.5 is centered) |
+| sub SetAudioStreamPan(stream, pan) | Set pan for audio stream (-1.0 left, 0.0 center, 1.0 right) |
 | sub SetAudioStreamPitch(stream, pitch) | Set pitch for audio stream (1.0 is base level) |
 | sub SetAudioStreamVolume(stream, volume) | Set volume for audio stream (1.0 is max level) |
 | sub SetAutomationEventBaseFrame(frame) | Set automation event internal base frame to start recording |
 | sub SetAutomationEventList(list) | Set automation event list to record to |
 | sub SetClipboardText(text) | Set clipboard text content |
-| sub SetConfigFlags(flags) | Setup init configuration flags (view FLAGS) |
+| sub SetConfigFlags(flags) | Set up init configuration flags (view FLAGS) |
 | sub SetExitKey(key) | Set a custom key to exit program (default is ESC) |
 | func SetGamepadMappings(mappings) | Set internal gamepad mappings (SDL_GameControllerDB) |
 | sub SetGamepadVibration(gamepad, leftMotor, rightMotor, duration) | Set gamepad vibration for both motors (duration in seconds) |
@@ -536,8 +539,8 @@ Implemented APIs (646)
 | sub SetMouseOffset(offsetX, offsetY) | Set mouse offset |
 | sub SetMousePosition(x, y) | Set mouse position XY |
 | sub SetMouseScale(scaleX, scaleY) | Set mouse scaling |
-| sub SetMusicPan(music, pan) | Set pan for a music (-1.0 left, 0.0 center, 1.0 right) |
-| sub SetMusicPitch(music, pitch) | Set pitch for a music (1.0 is base level) |
+| sub SetMusicPan(music, pan) | Set pan for music (-1.0 left, 0.0 center, 1.0 right) |
+| sub SetMusicPitch(music, pitch) | Set pitch for music (1.0 is base level) |
 | sub SetMusicVolume(music, volume) | Set volume for music (1.0 is max level) |
 | func setPhysicsbodyangularvelocity() | n/a |
 | func setPhysicsbodydynamicfriction() | n/a |
@@ -585,7 +588,7 @@ Implemented APIs (646)
 | sub SetWindowSize(width, height) | Set window dimensions |
 | sub SetWindowState(flags) | Set window configuration state using flags |
 | sub SetWindowTitle(title) | Set title for window |
-| sub ShowCursor() | Shows cursor |
+| sub ShowCursor() | Show cursor |
 | sub StartAutomationEventRecording() | Start recording automation events (AutomationEventList must be set) |
 | sub StopAudioStream(stream) | Stop audio stream |
 | sub StopAutomationEventRecording() | Stop recording automation events |
@@ -597,12 +600,15 @@ Implemented APIs (646)
 | func TextCopy(dst, src) | Copy one string to another, returns bytes copied |
 | func TextFindIndex(text, search) | Find first text occurrence within a string, -1 if not found |
 | func TextFormat(text, args) | Text formatting with variables (sprintf() style) |
-| func TextInsert(text, insert, position) | Insert text in a position (WARNING: memory must be freed!) |
-| func TextIsEqual(text1, text2) | Check if two text string are equal |
+| func TextInsert(text, insert, position) | Insert text in a defined byte position |
+| func TextInsertAlloc(text, insert, position) | Insert text in a defined byte position, memory must be MemFree() |
+| func TextIsEqual(text1, text2) | Check if two text strings are equal |
 | func TextLength(text) | Get text length, checks for '\\0' ending |
 | func TextRemoveSpaces(text) | Remove text spaces, concat words |
-| func TextReplace(text, search, replacement) | Replace text string (WARNING: memory must be freed!) |
-| func TextReplaceBetween(text, begin, end, replacement) | Replace text between two specific strings (WARNING: memory must be freed!) |
+| func TextReplace(text, search, replacement) | Replace text string with new string |
+| func TextReplaceAlloc(text, search, replacement) | Replace text string with new string, memory must be MemFree() |
+| func TextReplaceBetween(text, begin, end, replacement) | Replace text between two specific strings |
+| func TextReplaceBetweenAlloc(text, begin, end, replacement) | Replace text between two specific strings, memory must be MemFree() |
 | func TextSubtext(text, position, length) | Get a piece of a text string |
 | func TextToCamel(text) | Get Camel case notation version of provided string |
 | func TextToFloat(text) | Get float value from text |
@@ -625,14 +631,13 @@ Implemented APIs (646)
 | sub UnloadImagePalette(colors) | Unload colors palette loaded with LoadImagePalette() |
 | sub UnloadMesh(mesh) | Unload mesh data from CPU and GPU |
 | sub UnloadModel(model) | Unload model (including meshes) from memory (RAM and/or VRAM) |
-| sub UnloadModelAnimation(anim) | Unload animation data |
 | sub UnloadModelAnimations(animations, animCount) | Unload animation array data |
 | sub UnloadMusicStream(music) | Unload music stream |
 | sub UnloadRandomSequence(sequence) | Unload random values sequence |
 | sub UnloadRenderTexture(target) | Unload render texture from GPU memory (VRAM) |
 | sub UnloadShader(shader) | Unload shader from GPU memory (VRAM) |
 | sub UnloadSound(sound) | Unload sound |
-| sub UnloadSoundAlias(alias) | Unload a sound alias (does not deallocate sample data) |
+| sub UnloadSoundAlias(alias) | Unload sound alias (does not deallocate sample data) |
 | sub UnloadTexture(texture) | Unload texture from GPU memory (VRAM) |
 | sub UnloadUTF8(text) | Unload UTF-8 text encoded from codepoints array |
 | sub UnloadWave(wave) | Unload wave data |
@@ -641,11 +646,11 @@ Implemented APIs (646)
 | func updateautomationeventlist() | n/a |
 | sub UpdateCamera(camera, mode) | Update camera position for selected mode |
 | sub UpdateMeshBuffer(mesh, index, data, dataSize, offset) | Update mesh vertex data in GPU for a specific buffer index |
-| sub UpdateModelAnimation(model, anim, frame) | Update model animation pose (CPU) |
-| sub UpdateModelAnimationBones(model, anim, frame) | Update model animation mesh bone matrices (GPU skinning) |
-| sub UpdateMusicStream(music) | Updates buffers for music streaming |
+| sub UpdateModelAnimation(model, anim, frame) | Update model animation pose (vertex buffers and bone matrices) |
+| sub UpdateModelAnimationEx(model, animA, frameA, animB, frameB, blend) | Update model animation pose, blending two animations |
+| sub UpdateMusicStream(music) | Update buffers for music streaming |
 | func updatePhysics() | n/a |
-| sub UpdateSound(sound, data, sampleCount) | Update sound buffer with new data (default data format: 32 bit float, stereo) |
+| sub UpdateSound(sound, data, frameCount) | Update sound buffer with new data (default data format: 32 bit float, stereo) |
 | sub UpdateTexture(texture, pixels) | Update GPU texture with new data (pixels should be able to fill texture) |
 | sub UpdateTextureRec(texture, rec, pixels) | Update GPU texture rectangle with new data (pixels and rec should fit in texture) |
 | sub UploadMesh(mesh, dynamic) | Upload mesh vertex data in GPU and provide VAO/VBO ids |
diff --git a/raylib/func-def.h b/raylib/func-def.h
index 42552b2..a939104 100644
--- a/raylib/func-def.h
+++ b/raylib/func-def.h
@@ -85,6 +85,8 @@
   {2, 2, "GETCOLLISIONREC", cmd_getcollisionrec},
   {1, 1, "GETCOLOR", cmd_getcolor},
   {0, 0, "GETCURRENTMONITOR", cmd_getcurrentmonitor},
+  {1, 1, "GETDIRECTORYFILECOUNT", cmd_getdirectoryfilecount},
+  {3, 3, "GETDIRECTORYFILECOUNTEX", cmd_getdirectoryfilecountex},
   {1, 1, "GETDIRECTORYPATH", cmd_getdirectorypath},
   {1, 1, "GETFILEEXTENSION", cmd_getfileextension},
   {1, 1, "GETFILELENGTH", cmd_getfilelength},
@@ -150,7 +152,7 @@
   {0, 0, "GETSHAPESTEXTURERECTANGLE", cmd_getshapestexturerectangle},
   {5, 5, "GETSPLINEPOINTBASIS", cmd_getsplinepointbasis},
   {5, 5, "GETSPLINEPOINTBEZIERCUBIC", cmd_getsplinepointbeziercubic},
-  {4, 4, "GETSPLINEPOINTBEZIERQUAD", cmd_getsplinepointbezierquad},
+  {4, 4, "GETSPLINEPOINTBEZIERQUADRATIC", cmd_getsplinepointbezierquadratic},
   {5, 5, "GETSPLINEPOINTCATMULLROM", cmd_getsplinepointcatmullrom},
   {3, 3, "GETSPLINEPOINTLINEAR", cmd_getsplinepointlinear},
   {3, 3, "GETTEXTBETWEEN", cmd_gettextbetween},
@@ -256,6 +258,7 @@
   {1, 1, "LOADWAVESAMPLES", cmd_loadwavesamples},
   {1, 1, "MAKEDIRECTORY", cmd_makedirectory},
   {2, 2, "MEASURETEXT", cmd_measuretext},
+  {5, 5, "MEASURETEXTCODEPOINTS", cmd_measuretextcodepoints},
   {4, 4, "MEASURETEXTEX", cmd_measuretextex},
   {1, 1, "MEMALLOC", cmd_memalloc},
   {2, 2, "MEMREALLOC", cmd_memrealloc},
@@ -265,11 +268,14 @@
   {2, 2, "TEXTCOPY", cmd_textcopy},
   {2, 2, "TEXTFINDINDEX", cmd_textfindindex},
   {3, 3, "TEXTINSERT", cmd_textinsert},
+  {3, 3, "TEXTINSERTALLOC", cmd_textinsertalloc},
   {2, 2, "TEXTISEQUAL", cmd_textisequal},
   {1, 1, "TEXTLENGTH", cmd_textlength},
   {1, 1, "TEXTREMOVESPACES", cmd_textremovespaces},
   {3, 3, "TEXTREPLACE", cmd_textreplace},
+  {3, 3, "TEXTREPLACEALLOC", cmd_textreplacealloc},
   {4, 4, "TEXTREPLACEBETWEEN", cmd_textreplacebetween},
+  {4, 4, "TEXTREPLACEBETWEENALLOC", cmd_textreplacebetweenalloc},
   {3, 3, "TEXTSUBTEXT", cmd_textsubtext},
   {1, 1, "TEXTTOCAMEL", cmd_texttocamel},
   {1, 1, "TEXTTOFLOAT", cmd_texttofloat},
diff --git a/raylib/func.h b/raylib/func.h
index 53ccee6..441fc4d 100644
--- a/raylib/func.h
+++ b/raylib/func.h
@@ -32,7 +32,7 @@ static int cmd_checkcollisionboxsphere(int argc, slib_par_t *params, var_t *retv
 }
 
 //
-// Check if circle collides with a line created betweeen two points [p1] and [p2]
+// Check if circle collides with a line created between two points [p1] and [p2]
 //
 static int cmd_checkcollisioncircleline(int argc, slib_par_t *params, var_t *retval) {
   auto center = get_param_vec2(argc, params, 0);
@@ -328,7 +328,7 @@ static int cmd_compressdata(int argc, slib_par_t *params, var_t *retval) {
 // Compute CRC32 hash code
 //
 static int cmd_computecrc32(int argc, slib_par_t *params, var_t *retval) {
-  auto data = (unsigned char *)get_param_str(argc, params, 0, 0);
+  auto data = (const unsigned char *)get_param_str(argc, params, 0, 0);
   auto dataSize = get_param_int(argc, params, 1, 0);
   auto fnResult = ComputeCRC32(data, dataSize);
   v_setint(retval, fnResult);
@@ -339,7 +339,7 @@ static int cmd_computecrc32(int argc, slib_par_t *params, var_t *retval) {
 // Compute MD5 hash code, returns static int[4] (16 bytes)
 //
 static int cmd_computemd5(int argc, slib_par_t *params, var_t *retval) {
-  auto data = (unsigned char *)get_param_str(argc, params, 0, 0);
+  auto data = (const unsigned char *)get_param_str(argc, params, 0, 0);
   auto dataSize = get_param_int(argc, params, 1, 0);
   auto fnResult = (var_int_t)ComputeMD5(data, dataSize);
   v_setint(retval, fnResult);
@@ -350,7 +350,7 @@ static int cmd_computemd5(int argc, slib_par_t *params, var_t *retval) {
 // Compute SHA1 hash code, returns static int[5] (20 bytes)
 //
 static int cmd_computesha1(int argc, slib_par_t *params, var_t *retval) {
-  auto data = (unsigned char *)get_param_str(argc, params, 0, 0);
+  auto data = (const unsigned char *)get_param_str(argc, params, 0, 0);
   auto dataSize = get_param_int(argc, params, 1, 0);
   auto fnResult = (var_int_t)ComputeSHA1(data, dataSize);
   v_setint(retval, fnResult);
@@ -361,7 +361,7 @@ static int cmd_computesha1(int argc, slib_par_t *params, var_t *retval) {
 // Compute SHA256 hash code, returns static int[8] (32 bytes)
 //
 static int cmd_computesha256(int argc, slib_par_t *params, var_t *retval) {
-  auto data = (unsigned char *)get_param_str(argc, params, 0, 0);
+  auto data = (const unsigned char *)get_param_str(argc, params, 0, 0);
   auto dataSize = get_param_int(argc, params, 1, 0);
   auto fnResult = (var_int_t)ComputeSHA256(data, dataSize);
   v_setint(retval, fnResult);
@@ -497,7 +497,7 @@ static int cmd_exportimageascode(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Export image to memory buffer
+// Export image to memory buffer, memory must be MemFree()
 //
 static int cmd_exportimagetomemory(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -1062,6 +1062,28 @@ static int cmd_getcurrentmonitor(int argc, slib_par_t *params, var_t *retval) {
   return 1;
 }
 
+//
+// Get the file count in a directory
+//
+static int cmd_getdirectoryfilecount(int argc, slib_par_t *params, var_t *retval) {
+  auto dirPath = get_param_str(argc, params, 0, 0);
+  auto fnResult = GetDirectoryFileCount(dirPath);
+  v_setint(retval, fnResult);
+  return 1;
+}
+
+//
+// Get the file count in a directory with extension filtering and recursive directory scan. Use 'DIR' in the filter string to include directories in the result
+//
+static int cmd_getdirectoryfilecountex(int argc, slib_par_t *params, var_t *retval) {
+  auto basePath = get_param_str(argc, params, 0, 0);
+  auto filter = get_param_str(argc, params, 1, 0);
+  auto scanSubdirs = get_param_int(argc, params, 2, 0);
+  auto fnResult = GetDirectoryFileCountEx(basePath, filter, scanSubdirs);
+  v_setint(retval, fnResult);
+  return 1;
+}
+
 //
 // Get full path for a given fileName with path (uses static string)
 //
@@ -1661,7 +1683,7 @@ static int cmd_getscreenheight(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Get the world space position for a 2d camera screen space position
+// Get world space position for a 2d camera screen space position
 //
 static int cmd_getscreentoworld2d(int argc, slib_par_t *params, var_t *retval) {
   auto position = get_param_vec2(argc, params, 0);
@@ -1775,12 +1797,12 @@ static int cmd_getsplinepointbeziercubic(int argc, slib_par_t *params, var_t *re
 //
 // Get (evaluate) spline point: Quadratic Bezier
 //
-static int cmd_getsplinepointbezierquad(int argc, slib_par_t *params, var_t *retval) {
+static int cmd_getsplinepointbezierquadratic(int argc, slib_par_t *params, var_t *retval) {
   auto p1 = get_param_vec2(argc, params, 0);
   auto c2 = get_param_vec2(argc, params, 1);
   auto p3 = get_param_vec2(argc, params, 2);
   auto t = get_param_num(argc, params, 3, 0);
-  auto fnResult = GetSplinePointBezierQuad(p1, c2, p3, t);
+  auto fnResult = GetSplinePointBezierQuadratic(p1, c2, p3, t);
   v_setvec2(retval, fnResult);
   return 1;
 }
@@ -1916,7 +1938,7 @@ static int cmd_getworkingdirectory(int argc, slib_par_t *params, var_t *retval)
 }
 
 //
-// Get the screen space position for a 3d world space position
+// Get screen space position for a 3d world space position
 //
 static int cmd_getworldtoscreen(int argc, slib_par_t *params, var_t *retval) {
   auto position = get_param_vec3(argc, params, 0);
@@ -1927,7 +1949,7 @@ static int cmd_getworldtoscreen(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Get the screen space position for a 2d camera world space position
+// Get screen space position for a 2d camera world space position
 //
 static int cmd_getworldtoscreen2d(int argc, slib_par_t *params, var_t *retval) {
   auto position = get_param_vec2(argc, params, 0);
@@ -1938,7 +1960,7 @@ static int cmd_getworldtoscreen2d(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Get size position for a 3d world space position
+// Get sized screen space position for a 3d world space position
 //
 static int cmd_getworldtoscreenex(int argc, slib_par_t *params, var_t *retval) {
   auto position = get_param_vec3(argc, params, 0);
@@ -2074,7 +2096,7 @@ static int cmd_isaudiostreamprocessed(int argc, slib_par_t *params, var_t *retva
 }
 
 //
-// Checks if an audio stream is valid (buffers initialized)
+// Check if an audio stream is valid (buffers initialized)
 //
 static int cmd_isaudiostreamvalid(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -2208,7 +2230,7 @@ static int cmd_isgamepadbuttonup(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Check if a gesture have been detected
+// Check if a gesture has been detected
 //
 static int cmd_isgesturedetected(int argc, slib_par_t *params, var_t *retval) {
   auto gesture = get_param_int(argc, params, 0, 0);
@@ -2373,7 +2395,7 @@ static int cmd_ismusicstreamplaying(int argc, slib_par_t *params, var_t *retval)
 }
 
 //
-// Checks if a music stream is valid (context and buffers initialized)
+// Check if a music stream is valid (context and buffers initialized)
 //
 static int cmd_ismusicvalid(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -2441,7 +2463,7 @@ static int cmd_issoundplaying(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Checks if a sound is valid (data loaded and buffers initialized)
+// Check if a sound is valid (data loaded and buffers initialized)
 //
 static int cmd_issoundvalid(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -2473,7 +2495,7 @@ static int cmd_istexturevalid(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Checks if wave data is valid (data loaded and parameters)
+// Check if wave data is valid (data loaded and parameters)
 //
 static int cmd_iswavevalid(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -2595,7 +2617,7 @@ static int cmd_loadcodepoints(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Load directory filepaths
+// Load directory filepaths, files and directories, no subdirs scan
 //
 static int cmd_loaddirectoryfiles(int argc, slib_par_t *params, var_t *retval) {
   auto dirPath = get_param_str(argc, params, 0, 0);
@@ -2605,7 +2627,7 @@ static int cmd_loaddirectoryfiles(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Load directory filepaths with extension filtering and recursive directory scan. Use 'DIR' in the filter string to include directories in the result
+// Load directory filepaths with extension filtering and subdir scan; some filters available: `*.*`,`FILES*`,`DIRS*`
 //
 static int cmd_loaddirectoryfilesex(int argc, slib_par_t *params, var_t *retval) {
   auto basePath = get_param_str(argc, params, 0, 0);
@@ -2767,7 +2789,7 @@ static int cmd_loadimagefrommemory(int argc, slib_par_t *params, var_t *retval)
 }
 
 //
-// Load image from screen buffer and (screenshot)
+// Load image from screen buffer (screenshot)
 //
 static int cmd_loadimagefromscreen(int argc, slib_par_t *params, var_t *retval) {
   auto fnResult = LoadImageFromScreen();
@@ -2905,7 +2927,7 @@ static int cmd_loadsound(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Create a new sound that shares the same sample data as the source sound, does not own the sound data
+// Load sound alias, new sound that shares the same sample data as the source sound, does not own the sound data
 //
 static int cmd_loadsoundalias(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -3049,6 +3071,26 @@ static int cmd_measuretext(int argc, slib_par_t *params, var_t *retval) {
   return 1;
 }
 
+//
+// Measure string size for an existing array of codepoints for Font
+//
+static int cmd_measuretextcodepoints(int argc, slib_par_t *params, var_t *retval) {
+  int result;
+  int font_id = get_font_id(argc, params, 0, retval);
+  if (font_id != -1) {
+    auto codepoints = (const int *)get_param_int_t(argc, params, 1, 0);
+    auto length = get_param_int(argc, params, 2, 0);
+    auto fontSize = get_param_num(argc, params, 3, 0);
+    auto spacing = get_param_num(argc, params, 4, 0);
+    auto fnResult = MeasureTextCodepoints(_fontMap.at(font_id), codepoints, length, fontSize, spacing);
+    v_setvec2(retval, fnResult);
+    result = 1;
+  } else {
+    result = 0;
+  }
+  return result;
+}
+
 //
 // Measure string size for Font
 //
@@ -3094,7 +3136,7 @@ static int cmd_memrealloc(int argc, slib_par_t *params, var_t *retval) {
 //
 static int cmd_savefiledata(int argc, slib_par_t *params, var_t *retval) {
   auto fileName = get_param_str(argc, params, 0, 0);
-  auto data = (void *)get_param_int_t(argc, params, 1, 0);
+  auto data = (const void *)get_param_int_t(argc, params, 1, 0);
   auto dataSize = get_param_int(argc, params, 2, 0);
   auto fnResult = SaveFileData(fileName, data, dataSize);
   v_setint(retval, fnResult);
@@ -3145,7 +3187,7 @@ static int cmd_textfindindex(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Insert text in a position (WARNING: memory must be freed!)
+// Insert text in a defined byte position
 //
 static int cmd_textinsert(int argc, slib_par_t *params, var_t *retval) {
   auto text = get_param_str(argc, params, 0, 0);
@@ -3157,7 +3199,19 @@ static int cmd_textinsert(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Check if two text string are equal
+// Insert text in a defined byte position, memory must be MemFree()
+//
+static int cmd_textinsertalloc(int argc, slib_par_t *params, var_t *retval) {
+  auto text = get_param_str(argc, params, 0, 0);
+  auto insert = get_param_str(argc, params, 1, 0);
+  auto position = get_param_int(argc, params, 2, 0);
+  auto fnResult = (const char *)TextInsertAlloc(text, insert, position);
+  v_setstr(retval, fnResult);
+  return 1;
+}
+
+//
+// Check if two text strings are equal
 //
 static int cmd_textisequal(int argc, slib_par_t *params, var_t *retval) {
   auto text1 = get_param_str(argc, params, 0, 0);
@@ -3188,7 +3242,7 @@ static int cmd_textremovespaces(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Replace text string (WARNING: memory must be freed!)
+// Replace text string with new string
 //
 static int cmd_textreplace(int argc, slib_par_t *params, var_t *retval) {
   auto text = get_param_str(argc, params, 0, 0);
@@ -3200,7 +3254,19 @@ static int cmd_textreplace(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Replace text between two specific strings (WARNING: memory must be freed!)
+// Replace text string with new string, memory must be MemFree()
+//
+static int cmd_textreplacealloc(int argc, slib_par_t *params, var_t *retval) {
+  auto text = get_param_str(argc, params, 0, 0);
+  auto search = get_param_str(argc, params, 1, 0);
+  auto replacement = get_param_str(argc, params, 2, 0);
+  auto fnResult = (const char *)TextReplaceAlloc(text, search, replacement);
+  v_setstr(retval, fnResult);
+  return 1;
+}
+
+//
+// Replace text between two specific strings
 //
 static int cmd_textreplacebetween(int argc, slib_par_t *params, var_t *retval) {
   auto text = get_param_str(argc, params, 0, 0);
@@ -3212,6 +3278,19 @@ static int cmd_textreplacebetween(int argc, slib_par_t *params, var_t *retval) {
   return 1;
 }
 
+//
+// Replace text between two specific strings, memory must be MemFree()
+//
+static int cmd_textreplacebetweenalloc(int argc, slib_par_t *params, var_t *retval) {
+  auto text = get_param_str(argc, params, 0, 0);
+  auto begin = get_param_str(argc, params, 1, 0);
+  auto end = get_param_str(argc, params, 2, 0);
+  auto replacement = get_param_str(argc, params, 3, 0);
+  auto fnResult = (const char *)TextReplaceBetweenAlloc(text, begin, end, replacement);
+  v_setstr(retval, fnResult);
+  return 1;
+}
+
 //
 // Get a piece of a text string
 //
diff --git a/raylib/main.cpp b/raylib/main.cpp
index 06598c9..ac63334 100644
--- a/raylib/main.cpp
+++ b/raylib/main.cpp
@@ -360,8 +360,7 @@ static FilePathList get_param_filepathlist(int argc, slib_par_t *params, int n)
   if (is_param_array(argc, params, n)) {
     var_p_t array = params[n].var_p;
     result.count = v_asize(array);
-    result.capacity = result.count;
-    result.paths = (char **)malloc(result.capacity * sizeof(char *));
+    result.paths = (char **)malloc(result.count * sizeof(char *));
     for (unsigned index = 0; index < result.count; index++) {
       result.paths[index] = (char *)malloc(MAX_FILEPATH_LENGTH * sizeof(char));
       var_p_t elem = v_elem(array, index);
@@ -750,7 +749,7 @@ static void v_setmodel(var_t *var, Model &model) {
   map_init_id(var, id, CLS_MODELMAP);
   v_setint(map_add_var(var, "meshCount", 0), model.meshCount);
   v_setint(map_add_var(var, "materialCount", 0), model.materialCount);
-  v_setint(map_add_var(var, "boneCount", 0), model.boneCount);
+  v_setint(map_add_var(var, "boneCount", 0), model.skeleton.boneCount);
 }
 
 static void v_setmusic(var_t *var, Music &music) {
@@ -770,19 +769,19 @@ static void v_setmodel_animation(var_t *var, ModelAnimation *anims, int animsCou
     _modelAnimationMap[id] = anims[i];
     map_init_id(v_anim, id, CLS_MODELANIMATIONMAP);
 
-    int frameCount = anims[i].frameCount;
+    int keyframeCount = anims[i].keyframeCount;
     int boneCount = anims[i].boneCount;
-    map_add_var(v_anim, "frameCount", frameCount);
+    map_add_var(v_anim, "keyframeCount", keyframeCount);
     map_add_var(v_anim, "boneCount", boneCount);
 
     var_t *v_framePoses = map_add_var(v_anim, "framePoses", 0);
-    v_tomatrix(v_framePoses, frameCount, boneCount);
-    for (int frame = 0; frame < frameCount; frame++) {
+    v_tomatrix(v_framePoses, keyframeCount, boneCount);
+    for (int frame = 0; frame < keyframeCount; frame++) {
       for (int bone = 0; bone < boneCount; bone++) {
         var_t *v_transform = v_elem(v_framePoses, ((boneCount * frame) + bone));
         map_init(v_transform);
-        v_setvec3(map_add_var(v_transform, "translation", 0), anims[0].framePoses[frame][bone].translation);
-        v_setvec3(map_add_var(v_transform, "scale", 0), anims[0].framePoses[frame][bone].scale);
+        v_setvec3(map_add_var(v_transform, "translation", 0), anims[0].keyframePoses[frame][bone].translation);
+        v_setvec3(map_add_var(v_transform, "scale", 0), anims[0].keyframePoses[frame][bone].scale);
       }
     }
   }
@@ -1979,7 +1978,7 @@ SBLIB_API int sblib_free(int cls_id, int id) {
       break;
     case CLS_MODELANIMATIONMAP:
       if (_modelAnimationMap.find(id) != _modelAnimationMap.end()) {
-        UnloadModelAnimation(_modelAnimationMap.at(id));
+        UnloadModelAnimations(&_modelAnimationMap.at(id), 1);
         _modelAnimationMap.erase(id);
       }
       break;
diff --git a/raylib/proc-def.h b/raylib/proc-def.h
index e4456bf..fe4a164 100644
--- a/raylib/proc-def.h
+++ b/raylib/proc-def.h
@@ -19,7 +19,7 @@
   {6, 6, "DRAWCAPSULEWIRES", cmd_drawcapsulewires},
   {4, 4, "DRAWCIRCLE", cmd_drawcircle},
   {5, 5, "DRAWCIRCLE3D", cmd_drawcircle3d},
-  {5, 5, "DRAWCIRCLEGRADIENT", cmd_drawcirclegradient},
+  {4, 4, "DRAWCIRCLEGRADIENT", cmd_drawcirclegradient},
   {4, 4, "DRAWCIRCLELINES", cmd_drawcirclelines},
   {3, 3, "DRAWCIRCLELINESV", cmd_drawcirclelinesv},
   {6, 6, "DRAWCIRCLESECTOR", cmd_drawcirclesector},
@@ -48,8 +48,6 @@
   {3, 3, "DRAWLINEV", cmd_drawlinev},
   {4, 4, "DRAWMODEL", cmd_drawmodel},
   {6, 6, "DRAWMODELEX", cmd_drawmodelex},
-  {4, 4, "DRAWMODELPOINTS", cmd_drawmodelpoints},
-  {6, 6, "DRAWMODELPOINTSEX", cmd_drawmodelpointsex},
   {4, 4, "DRAWMODELWIRES", cmd_drawmodelwires},
   {6, 6, "DRAWMODELWIRESEX", cmd_drawmodelwiresex},
   {3, 3, "DRAWPIXEL", cmd_drawpixel},
@@ -101,6 +99,7 @@
   {4, 4, "DRAWTRIANGLE", cmd_drawtriangle},
   {4, 4, "DRAWTRIANGLE3D", cmd_drawtriangle3d},
   {3, 3, "DRAWTRIANGLEFAN", cmd_drawtrianglefan},
+  {6, 6, "DRAWTRIANGLEGRADIENT", cmd_drawtrianglegradient},
   {4, 4, "DRAWTRIANGLELINES", cmd_drawtrianglelines},
   {3, 3, "DRAWTRIANGLESTRIP", cmd_drawtrianglestrip},
   {3, 3, "DRAWTRIANGLESTRIP3D", cmd_drawtrianglestrip3d},
@@ -142,14 +141,15 @@
   {4, 4, "IMAGEDRAWPIXEL", cmd_imagedrawpixel},
   {3, 3, "IMAGEDRAWPIXELV", cmd_imagedrawpixelv},
   {6, 6, "IMAGEDRAWRECTANGLE", cmd_imagedrawrectangle},
-  {4, 4, "IMAGEDRAWRECTANGLELINES", cmd_imagedrawrectanglelines},
+  {6, 6, "IMAGEDRAWRECTANGLELINES", cmd_imagedrawrectanglelines},
+  {4, 4, "IMAGEDRAWRECTANGLELINESEX", cmd_imagedrawrectanglelinesex},
   {3, 3, "IMAGEDRAWRECTANGLEREC", cmd_imagedrawrectanglerec},
   {4, 4, "IMAGEDRAWRECTANGLEV", cmd_imagedrawrectanglev},
   {6, 6, "IMAGEDRAWTEXT", cmd_imagedrawtext},
   {7, 7, "IMAGEDRAWTEXTEX", cmd_imagedrawtextex},
   {5, 5, "IMAGEDRAWTRIANGLE", cmd_imagedrawtriangle},
-  {7, 7, "IMAGEDRAWTRIANGLEEX", cmd_imagedrawtriangleex},
   {4, 4, "IMAGEDRAWTRIANGLEFAN", cmd_imagedrawtrianglefan},
+  {7, 7, "IMAGEDRAWTRIANGLEGRADIENT", cmd_imagedrawtrianglegradient},
   {5, 5, "IMAGEDRAWTRIANGLELINES", cmd_imagedrawtrianglelines},
   {4, 4, "IMAGEDRAWTRIANGLESTRIP", cmd_imagedrawtrianglestrip},
   {1, 1, "IMAGEFLIPHORIZONTAL", cmd_imagefliphorizontal},
@@ -250,7 +250,6 @@
   {1, 1, "UNLOADIMAGEPALETTE", cmd_unloadimagepalette},
   {1, 1, "UNLOADMESH", cmd_unloadmesh},
   {1, 1, "UNLOADMODEL", cmd_unloadmodel},
-  {1, 1, "UNLOADMODELANIMATION", cmd_unloadmodelanimation},
   {2, 2, "UNLOADMODELANIMATIONS", cmd_unloadmodelanimations},
   {1, 1, "UNLOADMUSICSTREAM", cmd_unloadmusicstream},
   {0, 0, "UNLOADRANDOMSEQUENCE", cmd_unloadrandomsequence},
@@ -265,7 +264,7 @@
   {3, 3, "UPDATEAUDIOSTREAM", cmd_updateaudiostream},
   {5, 5, "UPDATEMESHBUFFER", cmd_updatemeshbuffer},
   {3, 3, "UPDATEMODELANIMATION", cmd_updatemodelanimation},
-  {3, 3, "UPDATEMODELANIMATIONBONES", cmd_updatemodelanimationbones},
+  {6, 6, "UPDATEMODELANIMATIONEX", cmd_updatemodelanimationex},
   {1, 1, "UPDATEMUSICSTREAM", cmd_updatemusicstream},
   {3, 3, "UPDATESOUND", cmd_updatesound},
   {3, 3, "UPDATETEXTUREREC", cmd_updatetexturerec},
diff --git a/raylib/proc.h b/raylib/proc.h
index f549c73..68e4e82 100644
--- a/raylib/proc.h
+++ b/raylib/proc.h
@@ -8,7 +8,7 @@ static int cmd_beginblendmode(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Setup canvas (framebuffer) to start drawing
+// Begin canvas (framebuffer) drawing
 //
 static int cmd_begindrawing(int argc, slib_par_t *params, var_t *retval) {
   BeginDrawing();
@@ -70,7 +70,7 @@ static int cmd_begintexturemode(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Set background color (framebuffer clear color)
+// Clear background (framebuffer) to color
 //
 static int cmd_clearbackground(int argc, slib_par_t *params, var_t *retval) {
   auto color = get_param_color(argc, params, 0);
@@ -104,7 +104,7 @@ static int cmd_closewindow(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Disables cursor (lock cursor)
+// Disable cursor (lock cursor)
 //
 static int cmd_disablecursor(int argc, slib_par_t *params, var_t *retval) {
   DisableCursor();
@@ -198,10 +198,10 @@ static int cmd_drawcapsule(int argc, slib_par_t *params, var_t *retval) {
   auto startPos = get_param_vec3(argc, params, 0);
   auto endPos = get_param_vec3(argc, params, 1);
   auto radius = get_param_num(argc, params, 2, 0);
-  auto slices = get_param_int(argc, params, 3, 0);
-  auto rings = get_param_int(argc, params, 4, 0);
+  auto rings = get_param_int(argc, params, 3, 0);
+  auto slices = get_param_int(argc, params, 4, 0);
   auto color = get_param_color(argc, params, 5);
-  DrawCapsule(startPos, endPos, radius, slices, rings, color);
+  DrawCapsule(startPos, endPos, radius, rings, slices, color);
   return 1;
 }
 
@@ -212,10 +212,10 @@ static int cmd_drawcapsulewires(int argc, slib_par_t *params, var_t *retval) {
   auto startPos = get_param_vec3(argc, params, 0);
   auto endPos = get_param_vec3(argc, params, 1);
   auto radius = get_param_num(argc, params, 2, 0);
-  auto slices = get_param_int(argc, params, 3, 0);
-  auto rings = get_param_int(argc, params, 4, 0);
+  auto rings = get_param_int(argc, params, 3, 0);
+  auto slices = get_param_int(argc, params, 4, 0);
   auto color = get_param_color(argc, params, 5);
-  DrawCapsuleWires(startPos, endPos, radius, slices, rings, color);
+  DrawCapsuleWires(startPos, endPos, radius, rings, slices, color);
   return 1;
 }
 
@@ -248,12 +248,11 @@ static int cmd_drawcircle3d(int argc, slib_par_t *params, var_t *retval) {
 // Draw a gradient-filled circle
 //
 static int cmd_drawcirclegradient(int argc, slib_par_t *params, var_t *retval) {
-  auto centerX = get_param_int(argc, params, 0, 0);
-  auto centerY = get_param_int(argc, params, 1, 0);
-  auto radius = get_param_num(argc, params, 2, 0);
-  auto inner = get_param_color(argc, params, 3);
-  auto outer = get_param_color(argc, params, 4);
-  DrawCircleGradient(centerX, centerY, radius, inner, outer);
+  auto center = get_param_vec2(argc, params, 0);
+  auto radius = get_param_num(argc, params, 1, 0);
+  auto inner = get_param_color(argc, params, 2);
+  auto outer = get_param_color(argc, params, 3);
+  DrawCircleGradient(center, radius, inner, outer);
   return 1;
 }
 
@@ -417,9 +416,9 @@ static int cmd_drawcylinderwiresex(int argc, slib_par_t *params, var_t *retval)
   auto endPos = get_param_vec3(argc, params, 1);
   auto startRadius = get_param_num(argc, params, 2, 0);
   auto endRadius = get_param_num(argc, params, 3, 0);
-  auto sides = get_param_int(argc, params, 4, 0);
+  auto slices = get_param_int(argc, params, 4, 0);
   auto color = get_param_color(argc, params, 5);
-  DrawCylinderWiresEx(startPos, endPos, startRadius, endRadius, sides, color);
+  DrawCylinderWiresEx(startPos, endPos, startRadius, endRadius, slices, color);
   return 1;
 }
 
@@ -614,44 +613,6 @@ static int cmd_drawmodelex(int argc, slib_par_t *params, var_t *retval) {
   return result;
 }
 
-//
-// Draw a model as points
-//
-static int cmd_drawmodelpoints(int argc, slib_par_t *params, var_t *retval) {
-  int result;
-  int model_id = get_model_id(argc, params, 0, retval);
-  if (model_id != -1) {
-    auto position = get_param_vec3(argc, params, 1);
-    auto scale = get_param_num(argc, params, 2, 0);
-    auto tint = get_param_color(argc, params, 3);
-    DrawModelPoints(_modelMap.at(model_id), position, scale, tint);
-    result = 1;
-  } else {
-    result = 0;
-  }
-  return result;
-}
-
-//
-// Draw a model as points with extended parameters
-//
-static int cmd_drawmodelpointsex(int argc, slib_par_t *params, var_t *retval) {
-  int result;
-  int model_id = get_model_id(argc, params, 0, retval);
-  if (model_id != -1) {
-    auto position = get_param_vec3(argc, params, 1);
-    auto rotationAxis = get_param_vec3(argc, params, 2);
-    auto rotationAngle = get_param_num(argc, params, 3, 0);
-    auto scale = get_param_vec3(argc, params, 4);
-    auto tint = get_param_color(argc, params, 5);
-    DrawModelPointsEx(_modelMap.at(model_id), position, rotationAxis, rotationAngle, scale, tint);
-    result = 1;
-  } else {
-    result = 0;
-  }
-  return result;
-}
-
 //
 // Draw a model wires (with texture if set)
 //
@@ -733,7 +694,7 @@ static int cmd_drawpoint3d(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Draw a regular polygon (Vector version)
+// Draw a polygon of n sides
 //
 static int cmd_drawpoly(int argc, slib_par_t *params, var_t *retval) {
   auto center = get_param_vec2(argc, params, 0);
@@ -907,7 +868,7 @@ static int cmd_drawrectangleroundedlines(int argc, slib_par_t *params, var_t *re
 }
 
 //
-// Draw rectangle with rounded edges outline
+// Draw rectangle lines with rounded edges outline
 //
 static int cmd_drawrectangleroundedlinesex(int argc, slib_par_t *params, var_t *retval) {
   auto rec = get_param_rect(argc, params, 0);
@@ -1157,7 +1118,7 @@ static int cmd_drawtextcodepoint(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Draw multiple character (codepoint)
+// Draw multiple characters (codepoint)
 //
 static int cmd_drawtextcodepoints(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -1257,7 +1218,7 @@ static int cmd_drawtextureex(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Draws a texture (or part of it) that stretches or shrinks nicely
+// Draw a texture (or part of it) that stretches or shrinks nicely
 //
 static int cmd_drawtexturenpatch(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -1366,6 +1327,20 @@ static int cmd_drawtrianglefan(int argc, slib_par_t *params, var_t *retval) {
   return 1;
 }
 
+//
+// Draw triangle with interpolated colors (vertex in counter-clockwise order!)
+//
+static int cmd_drawtrianglegradient(int argc, slib_par_t *params, var_t *retval) {
+  auto v1 = get_param_vec2(argc, params, 0);
+  auto v2 = get_param_vec2(argc, params, 1);
+  auto v3 = get_param_vec2(argc, params, 2);
+  auto c1 = get_param_color(argc, params, 3);
+  auto c2 = get_param_color(argc, params, 4);
+  auto c3 = get_param_color(argc, params, 5);
+  DrawTriangleGradient(v1, v2, v3, c1, c2, c3);
+  return 1;
+}
+
 //
 // Draw triangle outline (vertex in counter-clockwise order!)
 //
@@ -1401,7 +1376,7 @@ static int cmd_drawtrianglestrip3d(int argc, slib_par_t *params, var_t *retval)
 }
 
 //
-// Enables cursor (unlock cursor)
+// Enable cursor (unlock cursor)
 //
 static int cmd_enablecursor(int argc, slib_par_t *params, var_t *retval) {
   EnableCursor();
@@ -1425,7 +1400,7 @@ static int cmd_endblendmode(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// End canvas drawing and swap buffers (double buffering)
+// End canvas (framebuffer) drawing and swap buffers (double buffering)
 //
 static int cmd_enddrawing(int argc, slib_par_t *params, var_t *retval) {
   EndDrawing();
@@ -1433,7 +1408,7 @@ static int cmd_enddrawing(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Ends 2D mode with custom camera
+// End 2D mode with custom camera
 //
 static int cmd_endmode2d(int argc, slib_par_t *params, var_t *retval) {
   EndMode2D();
@@ -1441,7 +1416,7 @@ static int cmd_endmode2d(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Ends 3D mode and returns to default 2D orthographic mode
+// End 3D mode and returns to default 2D orthographic mode
 //
 static int cmd_endmode3d(int argc, slib_par_t *params, var_t *retval) {
   EndMode3D();
@@ -1465,7 +1440,7 @@ static int cmd_endshadermode(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Ends drawing to render texture
+// End drawing to render texture
 //
 static int cmd_endtexturemode(int argc, slib_par_t *params, var_t *retval) {
   EndTextureMode();
@@ -1499,7 +1474,7 @@ static int cmd_gentexturemipmaps(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Hides cursor
+// Hide cursor
 //
 static int cmd_hidecursor(int argc, slib_par_t *params, var_t *retval) {
   HideCursor();
@@ -1625,7 +1600,7 @@ static int cmd_imagecolorcontrast(int argc, slib_par_t *params, var_t *retval) {
   int result;
   int image_id = get_image_id(argc, params, 0, retval);
   if (image_id != -1) {
-    auto contrast = get_param_num(argc, params, 1, 0);
+    auto contrast = get_param_int(argc, params, 1, 0);
     ImageColorContrast(&_imageMap.at(image_id), contrast);
     result = 1;
   } else {
@@ -1941,13 +1916,33 @@ static int cmd_imagedrawrectangle(int argc, slib_par_t *params, var_t *retval) {
 // Draw rectangle lines within an image
 //
 static int cmd_imagedrawrectanglelines(int argc, slib_par_t *params, var_t *retval) {
+  int result;
+  int dst_id = get_image_id(argc, params, 0, retval);
+  if (dst_id != -1) {
+    auto posX = get_param_int(argc, params, 1, 0);
+    auto posY = get_param_int(argc, params, 2, 0);
+    auto width = get_param_int(argc, params, 3, 0);
+    auto height = get_param_int(argc, params, 4, 0);
+    auto color = get_param_color(argc, params, 5);
+    ImageDrawRectangleLines(&_imageMap.at(dst_id), posX, posY, width, height, color);
+    result = 1;
+  } else {
+    result = 0;
+  }
+  return result;
+}
+
+//
+// Draw rectangle lines within an image with extended parameters
+//
+static int cmd_imagedrawrectanglelinesex(int argc, slib_par_t *params, var_t *retval) {
   int result;
   int dst_id = get_image_id(argc, params, 0, retval);
   if (dst_id != -1) {
     auto rec = get_param_rect(argc, params, 1);
     auto thick = get_param_int(argc, params, 2, 0);
     auto color = get_param_color(argc, params, 3);
-    ImageDrawRectangleLines(&_imageMap.at(dst_id), rec, thick, color);
+    ImageDrawRectangleLinesEx(&_imageMap.at(dst_id), rec, thick, color);
     result = 1;
   } else {
     result = 0;
@@ -2051,19 +2046,16 @@ static int cmd_imagedrawtriangle(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Draw triangle with interpolated colors within an image
+// Draw a triangle fan defined by points within an image (first vertex is the center)
 //
-static int cmd_imagedrawtriangleex(int argc, slib_par_t *params, var_t *retval) {
+static int cmd_imagedrawtrianglefan(int argc, slib_par_t *params, var_t *retval) {
   int result;
   int dst_id = get_image_id(argc, params, 0, retval);
   if (dst_id != -1) {
-    auto v1 = get_param_vec2(argc, params, 1);
-    auto v2 = get_param_vec2(argc, params, 2);
-    auto v3 = get_param_vec2(argc, params, 3);
-    auto c1 = get_param_color(argc, params, 4);
-    auto c2 = get_param_color(argc, params, 5);
-    auto c3 = get_param_color(argc, params, 6);
-    ImageDrawTriangleEx(&_imageMap.at(dst_id), v1, v2, v3, c1, c2, c3);
+    auto points = (Vector2 *)get_param_vec2_array(argc, params, 1);
+    auto pointCount = get_param_int(argc, params, 2, 0);
+    auto color = get_param_color(argc, params, 3);
+    ImageDrawTriangleFan(&_imageMap.at(dst_id), points, pointCount, color);
     result = 1;
   } else {
     result = 0;
@@ -2072,16 +2064,19 @@ static int cmd_imagedrawtriangleex(int argc, slib_par_t *params, var_t *retval)
 }
 
 //
-// Draw a triangle fan defined by points within an image (first vertex is the center)
+// Draw triangle with interpolated colors within an image
 //
-static int cmd_imagedrawtrianglefan(int argc, slib_par_t *params, var_t *retval) {
+static int cmd_imagedrawtrianglegradient(int argc, slib_par_t *params, var_t *retval) {
   int result;
   int dst_id = get_image_id(argc, params, 0, retval);
   if (dst_id != -1) {
-    auto points = (Vector2 *)get_param_vec2_array(argc, params, 1);
-    auto pointCount = get_param_int(argc, params, 2, 0);
-    auto color = get_param_color(argc, params, 3);
-    ImageDrawTriangleFan(&_imageMap.at(dst_id), points, pointCount, color);
+    auto v1 = get_param_vec2(argc, params, 1);
+    auto v2 = get_param_vec2(argc, params, 2);
+    auto v3 = get_param_vec2(argc, params, 3);
+    auto c1 = get_param_color(argc, params, 4);
+    auto c2 = get_param_color(argc, params, 5);
+    auto c3 = get_param_color(argc, params, 6);
+    ImageDrawTriangleGradient(&_imageMap.at(dst_id), v1, v2, v3, c1, c2, c3);
     result = 1;
   } else {
     result = 0;
@@ -2542,7 +2537,7 @@ static int cmd_setaudiostreambuffersizedefault(int argc, slib_par_t *params, var
 }
 
 //
-// Set pan for audio stream (0.5 is centered)
+// Set pan for audio stream (-1.0 left, 0.0 center, 1.0 right)
 //
 static int cmd_setaudiostreampan(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -2623,7 +2618,7 @@ static int cmd_setclipboardtext(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Setup init configuration flags (view FLAGS)
+// Set up init configuration flags (view FLAGS)
 //
 static int cmd_setconfigflags(int argc, slib_par_t *params, var_t *retval) {
   auto flags = get_param_int(argc, params, 0, 0);
@@ -2721,7 +2716,7 @@ static int cmd_setmousescale(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Set pan for a music (-1.0 left, 0.0 center, 1.0 right)
+// Set pan for music (-1.0 left, 0.0 center, 1.0 right)
 //
 static int cmd_setmusicpan(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -2737,7 +2732,7 @@ static int cmd_setmusicpan(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Set pitch for a music (1.0 is base level)
+// Set pitch for music (1.0 is base level)
 //
 static int cmd_setmusicpitch(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -3074,7 +3069,7 @@ static int cmd_setwindowtitle(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Shows cursor
+// Show cursor
 //
 static int cmd_showcursor(int argc, slib_par_t *params, var_t *retval) {
   ShowCursor();
@@ -3336,22 +3331,6 @@ static int cmd_unloadmodel(int argc, slib_par_t *params, var_t *retval) {
   return result;
 }
 
-//
-// Unload animation data
-//
-static int cmd_unloadmodelanimation(int argc, slib_par_t *params, var_t *retval) {
-  int result;
-  int anim_id = get_model_animation_id(argc, params, 0, retval);
-  if (anim_id != -1) {
-    UnloadModelAnimation(_modelAnimationMap.at(anim_id));
-    _modelAnimationMap.erase(anim_id);
-    result = 1;
-  } else {
-    result = 0;
-  }
-  return result;
-}
-
 //
 // Unload animation array data
 //
@@ -3429,7 +3408,7 @@ static int cmd_unloadsound(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Unload a sound alias (does not deallocate sample data)
+// Unload sound alias (does not deallocate sample data)
 //
 static int cmd_unloadsoundalias(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -3531,14 +3510,14 @@ static int cmd_updatemeshbuffer(int argc, slib_par_t *params, var_t *retval) {
 }
 
 //
-// Update model animation pose (CPU)
+// Update model animation pose (vertex buffers and bone matrices)
 //
 static int cmd_updatemodelanimation(int argc, slib_par_t *params, var_t *retval) {
   int result;
   int model_id = get_model_id(argc, params, 0, retval);
   int anim_id = get_model_animation_id(argc, params, 1, retval);
   if (model_id != -1 && anim_id != -1) {
-    auto frame = get_param_int(argc, params, 2, 0);
+    auto frame = get_param_num(argc, params, 2, 0);
     UpdateModelAnimation(_modelMap.at(model_id), _modelAnimationMap.at(anim_id), frame);
     result = 1;
   } else {
@@ -3548,15 +3527,18 @@ static int cmd_updatemodelanimation(int argc, slib_par_t *params, var_t *retval)
 }
 
 //
-// Update model animation mesh bone matrices (GPU skinning)
+// Update model animation pose, blending two animations
 //
-static int cmd_updatemodelanimationbones(int argc, slib_par_t *params, var_t *retval) {
+static int cmd_updatemodelanimationex(int argc, slib_par_t *params, var_t *retval) {
   int result;
   int model_id = get_model_id(argc, params, 0, retval);
-  int anim_id = get_model_animation_id(argc, params, 1, retval);
-  if (model_id != -1 && anim_id != -1) {
-    auto frame = get_param_int(argc, params, 2, 0);
-    UpdateModelAnimationBones(_modelMap.at(model_id), _modelAnimationMap.at(anim_id), frame);
+  int anima_id = get_model_animation_id(argc, params, 1, retval);
+  int animb_id = get_model_animation_id(argc, params, 3, retval);
+  if (model_id != -1 && anima_id != -1 && animb_id != -1) {
+    auto frameA = get_param_num(argc, params, 2, 0);
+    auto frameB = get_param_num(argc, params, 4, 0);
+    auto blend = get_param_num(argc, params, 5, 0);
+    UpdateModelAnimationEx(_modelMap.at(model_id), _modelAnimationMap.at(anima_id), frameA, _modelAnimationMap.at(animb_id), frameB, blend);
     result = 1;
   } else {
     result = 0;
@@ -3565,7 +3547,7 @@ static int cmd_updatemodelanimationbones(int argc, slib_par_t *params, var_t *re
 }
 
 //
-// Updates buffers for music streaming
+// Update buffers for music streaming
 //
 static int cmd_updatemusicstream(int argc, slib_par_t *params, var_t *retval) {
   int result;
@@ -3587,8 +3569,8 @@ static int cmd_updatesound(int argc, slib_par_t *params, var_t *retval) {
   int sound_id = get_sound_id(argc, params, 0, retval);
   if (sound_id != -1) {
     auto data = (const void *)get_param_int_t(argc, params, 1, 0);
-    auto sampleCount = get_param_int(argc, params, 2, 0);
-    UpdateSound(_soundMap.at(sound_id), data, sampleCount);
+    auto frameCount = get_param_int(argc, params, 2, 0);
+    UpdateSound(_soundMap.at(sound_id), data, frameCount);
     result = 1;
   } else {
     result = 0;
diff --git a/raylib/raygui b/raylib/raygui
index 9a95871..6d2b28f 160000
--- a/raylib/raygui
+++ b/raylib/raygui
@@ -1 +1 @@
-Subproject commit 9a95871701a5fc63bea35eab73fef6414e048b73
+Subproject commit 6d2b28ff748158be0d63d07988d5d0672905dedf
diff --git a/raylib/raylib b/raylib/raylib
index c610d22..95bfa19 160000
--- a/raylib/raylib
+++ b/raylib/raylib
@@ -1 +1 @@
-Subproject commit c610d228a244f930ad53492604640f39584c66da
+Subproject commit 95bfa196fdfb737356b8a09ab2944e765a71280a
diff --git a/websocket/main.cpp b/websocket/main.cpp
index a794e57..2f27efb 100644
--- a/websocket/main.cpp
+++ b/websocket/main.cpp
@@ -193,9 +193,9 @@ static void client_handler(mg_connection *conn, int event, void *eventData) {
 static void set_session(var_p_t var, Session *session, mg_connection *conn) {
   session->setConnection(conn);
   map_init_id(var, conn->id);
-  v_setstr(map_add_var(var, "local_ip", 0), (const char *)conn->loc.ip);
+  v_setstr(map_add_var(var, "local_ip", 0), (const char *)conn->loc.addr.ip);
   v_setint(map_add_var(var, "local_port", 0), conn->loc.port);
-  v_setstr(map_add_var(var, "remote_ip", 0), (const char *)conn->rem.ip);
+  v_setstr(map_add_var(var, "remote_ip", 0), (const char *)conn->rem.addr.ip);
   v_setint(map_add_var(var, "remote_port", 0), conn->rem.port);
 }
 
diff --git a/websocket/mongoose b/websocket/mongoose
index 55bc610..eee8c70 160000
--- a/websocket/mongoose
+++ b/websocket/mongoose
@@ -1 +1 @@
-Subproject commit 55bc6105e148633ddc65bddbdb307f1477c0fc01
+Subproject commit eee8c7077c031f22469ea3adfcc69fc6d86c479a

From 66d874c9044ac3750c9d6206eb9cf46fe9f60a60 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Wed, 13 May 2026 17:21:45 +0930
Subject: [PATCH 27/54] LLAMA: implemented mem_info command

---
 llama/llama-sb.cpp     | 65 ++++++++++++++++++++++++++++++++++++++++--
 llama/llama-sb.h       | 24 ++++++++++++++++
 llama/main.cpp         | 30 +++++++++++++++++++
 llama/samples/nitro.md |  2 ++
 4 files changed, 118 insertions(+), 3 deletions(-)

diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index 8bb0291..4d4cf7c 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -8,12 +8,26 @@
 #include <format>
 #include <span>
 #include <utility>
+#include "ggml-cuda.h"
 
 #include "llama.h"
 #include "llama-sb.h"
 
 constexpr int MAX_REPEAT = 5;
 
+static bool read_vram(size_t &used, size_t &total) {
+  size_t free = 0;
+  total = 0;
+#ifdef GGML_USE_CUDA
+  ggml_backend_cuda_get_device_memory(0, &free, &total);
+  if (total > 0) {
+    used = total - free;
+    return true;
+  }
+#endif
+  return false;
+}
+
 LlamaIter::LlamaIter() :
   _llama(nullptr),
   _repetition_count(0),
@@ -45,6 +59,7 @@ Llama::Llama() :
   _top_k(0),
   _max_tokens(0),
   _log_level(GGML_LOG_LEVEL_CONT),
+  _n_gpu_layers(0),
   _n_past(0),
   _is_gemma4(false),
   _seed(LLAMA_DEFAULT_SEED) {
@@ -78,6 +93,7 @@ Llama::Llama(Llama &&other) noexcept
   , _top_k(other._top_k)
   , _max_tokens(other._max_tokens)
   , _log_level(other._log_level)
+  , _n_gpu_layers(other._n_gpu_layers)
   , _n_past(other._n_past)
   , _is_gemma4(other._is_gemma4)
   , _seed(other._seed) {
@@ -128,6 +144,7 @@ bool Llama::construct(string model_path, int n_ctx, int n_batch, int n_gpu_layer
   }
 
   _log_level = log_level;
+  _n_gpu_layers = n_gpu_layers;
   _model = llama_model_load_from_file(model_path.c_str(), mparams);
   if (!_model) {
     _last_error = "Failed to load model";
@@ -141,8 +158,8 @@ bool Llama::construct(string model_path, int n_ctx, int n_batch, int n_gpu_layer
     cparams.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_ENABLED;
 
     // or Q4_0 for more aggressive saving
-    cparams.type_k = GGML_TYPE_Q8_0;
-    cparams.type_v = GGML_TYPE_Q8_0;
+    cparams.type_k = GGML_TYPE_Q4_0;
+    cparams.type_v = GGML_TYPE_Q4_0;
 
     // keep KV cache on GPU
     cparams.offload_kqv = true;
@@ -331,7 +348,8 @@ bool Llama::add_message(LlamaIter &iter, const string &role, const string &conte
     llama_batch batch = llama_batch_get_one(prompt_tokens.data() + i, batch_size);
     int result = llama_decode(_ctx, batch);
     if (result != 0) {
-      _last_error = std::format("Failed to decode batch. position:{} error:{}", i, result);
+      _last_error = std::format("Failed to decode batch. position:{} error:{} [size:{}, past:{}]",
+                                i, result, prompt_tokens.size(), _n_past);
       return false;
     }
   }
@@ -506,3 +524,44 @@ string Llama::all(LlamaIter &iter) {
 
   return out;
 }
+
+LlamaMemoryInfo Llama::memory_info() {
+  LlamaMemoryInfo info = {};
+
+  // KV cache usage
+  llama_memory_t mem = llama_get_memory(_ctx);
+  llama_pos pos_max  = llama_memory_seq_pos_max(mem, 0);
+  int n_ctx          = llama_n_ctx(_ctx);
+  info.kv_total      = n_ctx;
+  info.kv_used       = (pos_max < 0) ? 0 : (int)pos_max + 1;
+  info.kv_percent    = 100.0f * info.kv_used / info.kv_total;
+
+  // Model layers
+  info.n_layers_total = llama_model_n_layer(_model);
+  info.n_layers_gpu   = _n_gpu_layers;
+  info.n_layers_cpu   = info.n_layers_total - info.n_layers_gpu;
+
+  // ram
+  if (read_vram(info.vram_used, info.vram_total)) {
+    info.vram_percent = 100.0f * info.vram_used / info.vram_total;
+  }
+
+  // Advice
+  ostringstream advice;
+  if (info.n_layers_cpu > 0) {
+    advice << "CPU offload active (" << info.n_layers_cpu
+           << " layers on CPU) - increase n_gpu_layers if VRAM allows. ";
+  }
+  if (info.vram_percent > 90.0f) {
+    advice << "VRAM >90% - reduce n_ctx or use Q4_0 KV cache. ";
+  } else if (info.vram_percent < 60.0f && info.n_layers_cpu > 0) {
+    advice << "VRAM headroom available - try adding more GPU layers. ";
+  }
+  if (info.kv_percent > 80.0f) {
+    advice << "Context >80% full - consider calling clear_history(). ";
+  }
+  info.advice = advice.str();
+
+  return info;
+}
+
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
index 30714a1..79dc2e3 100644
--- a/llama/llama-sb.h
+++ b/llama/llama-sb.h
@@ -14,6 +14,26 @@
 
 using namespace std;
 
+struct LlamaMemoryInfo {
+  // KV cache
+  int     kv_used;        // slots currently used
+  int     kv_total;       // total slots (== n_ctx)
+  float   kv_percent;     // kv_used / kv_total
+
+  // GPU VRAM (via ggml backend)
+  size_t  vram_used;      // bytes
+  size_t  vram_total;     // bytes
+  float   vram_percent;
+
+  // Model layers
+  int     n_layers_total; // total model layers
+  int     n_layers_gpu;   // layers offloaded to GPU
+  int     n_layers_cpu;   // layers on CPU
+
+  // Advice
+  string  advice;
+};
+
 struct Llama;
 
 struct LlamaIter {
@@ -75,6 +95,9 @@ struct Llama {
   void set_log_level(int level) { _log_level = level; }
   void reset();
 
+  // memory info
+  LlamaMemoryInfo memory_info();
+
   private:
   bool ends_with_sentence_boundary(const string &out);
   bool configure_sampler();
@@ -102,6 +125,7 @@ struct Llama {
   int _top_k;
   int _max_tokens;
   int _log_level;
+  int _n_gpu_layers;
   int _n_past;
   bool _is_gemma4;
   unsigned int _seed;
diff --git a/llama/main.cpp b/llama/main.cpp
index 9eede4a..aa235d9 100644
--- a/llama/main.cpp
+++ b/llama/main.cpp
@@ -431,6 +431,35 @@ static int cmd_llama_add_message(var_s *self, int argc, slib_par_t *arg, var_s *
   return result;
 }
 
+//
+// print llama.mem_info()
+//
+static int cmd_llama_mem_info(var_s *self, int argc, slib_par_t *arg, var_s *retval) {
+  int result = 0;
+  if (argc != 0) {
+    error(retval, "llama.mem_info", 0, 0);
+  } else {
+    int id = get_llama_class_id(self, retval);
+    if (id != -1) {
+      Llama &llama = g_llama.at(id);
+      auto mem_info = llama.memory_info();
+      map_init(retval);
+      v_setint(map_add_var(retval, "kv_used", 0), mem_info.kv_used);
+      v_setint(map_add_var(retval, "kv_total", 0), mem_info.kv_total);
+      v_setreal(map_add_var(retval, "kv_percent", 0), mem_info.kv_percent);
+      v_setint(map_add_var(retval, "vram_used", 0), mem_info.vram_used);
+      v_setint(map_add_var(retval, "vram_total", 0), mem_info.vram_total);
+      v_setreal(map_add_var(retval, "vram_percent", 0), mem_info.vram_percent);
+      v_setint(map_add_var(retval, "n_layers_cpu", 0), mem_info.n_layers_cpu);
+      v_setint(map_add_var(retval, "n_layers_gpu", 0), mem_info.n_layers_gpu);
+      v_setint(map_add_var(retval, "n_layers_total", 0), mem_info.n_layers_total);
+      v_setstr(map_add_var(retval, "advice", 0), mem_info.advice.c_str());
+      result = 1;
+    }
+  }
+  return result;
+}
+
 static int cmd_create_llama(int argc, slib_par_t *params, var_t *retval) {
   int result;
   auto model = expand_path(get_param_str(argc, params, 0, ""));
@@ -456,6 +485,7 @@ static int cmd_create_llama(int argc, slib_par_t *params, var_t *retval) {
     v_create_callback(retval, "set_top_p", cmd_llama_set_top_p);
     v_create_callback(retval, "set_grammar", cmd_llama_set_grammar);
     v_create_callback(retval, "set_seed", cmd_llama_set_seed);
+    v_create_callback(retval, "mem_info", cmd_llama_mem_info);
     result = 1;
   } else {
     error(retval, llama.last_error());
diff --git a/llama/samples/nitro.md b/llama/samples/nitro.md
index 8fa570a..cae4896 100644
--- a/llama/samples/nitro.md
+++ b/llama/samples/nitro.md
@@ -53,6 +53,8 @@ Available commands:
 - TOOL:LIST  `[directory_path. items enclosed in square brackets (`[...]`) represent directories within the file listing output]`
 - TOOL:READ  `[file_path]`
 - TOOL:WRITE `[file_path]`
+- TOOL:EXISTS `[file_path]`
+- TOOL:PERMISSION `[Request user permission before overwriting a file]`
 - TOOL:DATE  `[Returns the current date as string with format “DD/MM/YYYY”]`
 - TOOL:TIME  `[Returns the time in “HH:MM:SS” format]`
 - TOOL:RND   [Returns a random number betweem 0 and 1]`

From 5bff373bddd14a1a90e931414af8d11a66fc3933 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Wed, 13 May 2026 20:01:15 +0930
Subject: [PATCH 28/54] LLAMA: implemented mem_info command

---
 llama/llama-sb.cpp          | 10 +++++++++-
 llama/llama.cpp             |  2 +-
 llama/samples/nitro_cli.bas | 36 +++++++++++++++++++++++++-----------
 3 files changed, 35 insertions(+), 13 deletions(-)

diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index 4d4cf7c..4fde33d 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -537,8 +537,9 @@ LlamaMemoryInfo Llama::memory_info() {
   info.kv_percent    = 100.0f * info.kv_used / info.kv_total;
 
   // Model layers
+  auto n_gpu_layers = std::max(0, _n_gpu_layers);
   info.n_layers_total = llama_model_n_layer(_model);
-  info.n_layers_gpu   = _n_gpu_layers;
+  info.n_layers_gpu   = std::min(info.n_layers_total, n_gpu_layers);
   info.n_layers_cpu   = info.n_layers_total - info.n_layers_gpu;
 
   // ram
@@ -548,6 +549,13 @@ LlamaMemoryInfo Llama::memory_info() {
 
   // Advice
   ostringstream advice;
+
+  if (n_gpu_layers < info.n_layers_total) {
+    advice << "Only " << n_gpu_layers << "/" << info.n_layers_total
+           << " layers on GPU - increase n_gpu_layers if VRAM allows. ";
+  } else {
+    advice << "All " << info.n_layers_total << " layers on GPU. ";
+  }
   if (info.n_layers_cpu > 0) {
     advice << "CPU offload active (" << info.n_layers_cpu
            << " layers on CPU) - increase n_gpu_layers if VRAM allows. ";
diff --git a/llama/llama.cpp b/llama/llama.cpp
index 58e68df..5d44db6 160000
--- a/llama/llama.cpp
+++ b/llama/llama.cpp
@@ -1 +1 @@
-Subproject commit 58e68df0f91dd16ff56423ee5ef44062ed73bdfc
+Subproject commit 5d44db60089b0381cdbf7c45ce9ded43fc0c7f4c
diff --git a/llama/samples/nitro_cli.bas b/llama/samples/nitro_cli.bas
index 7a94294..690ebf5 100644
--- a/llama/samples/nitro_cli.bas
+++ b/llama/samples/nitro_cli.bas
@@ -20,7 +20,7 @@ const WHITE = chr(27) + "[37m"
 const BOLD_CYAN = chr(27) + "[1;36m"
 
 ' llama configuration (quen settings)
-const n_ctx = 32768
+const n_ctx = 65536
 const n_batch = 512
 const n_max_tokens = 4096
 const n_temperature = 0.6
@@ -29,8 +29,12 @@ const n_top_p = 0.95
 const n_min_p = 0
 const n_penalty_repeat = 1.0
 const n_penalty_last_n = 256
+const n_gpu_layers = 32
 
-sandbox_home = cwd
+sandbox_home = iff(len(command) > 0, trim(command), cwd)
+if (left(sandbox_home) == "~") then
+  sandbox_home = home + mid(sandbox_home, 1)
+endif
 
 '
 ' Displays the welcome message
@@ -50,8 +54,8 @@ end sub
 ' handles the TOOL:LIST command
 '
 func tool_list_files(arg)
-  if (arg == "./") then
-    arg = sandbox_home + arg
+  if (left(arg, 2) == "./") then
+    arg = sandbox_home + mid(arg, 2)
   else if (len(arg) == 0 or arg == ".") then
     arg = sandbox_home
   endif
@@ -151,11 +155,11 @@ func process_tool(cmd)
     endif
   endif
 
-  ' print RED
-  ' print "["+op+"]"
-  ' print "["+arg1+"]"
-  ' print "["+arg2+"]"
-  ' print RESET
+   ' print RED
+   ' print "["+op+"]"
+   ' print "["+arg1+"]"
+   ' print "["+arg2+"]"
+   ' print RESET
 
   select case op
   case "TOOL:DATE"
@@ -219,7 +223,7 @@ end
 ' creates the llama instance
 '
 func create_llama()
-  local llama = llm.llama(model, n_ctx, n_batch, 50)
+  local llama = llm.llama(model, n_ctx, n_batch, n_gpu_layers)
   llama.add_stop("<|turn|>")
   llama.set_max_tokens(n_max_tokens)
   llama.set_temperature(n_temperature)
@@ -279,7 +283,16 @@ sub main()
       print
       print WHITE;
       print "--- Tokens/sec: " + round(iter.tokens_sec(), 2) + " ---\n"
-      iter = llama.add_message("user", process_input())
+      local next_iter = false
+      repeat
+        local user_input = process_input()
+        if (user_input == "/meminfo") then
+          print llama.mem_info()
+        else
+          iter = llama.add_message("user", user_input)
+          next_iter = true
+        endif
+      until next_iter
       print BLUE;
     endif
   wend
@@ -287,3 +300,4 @@ end
 
 welcome_message()
 main()
+

From 46cafef2f301f4a781a87dc0e8e5b801b2f190b9 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Wed, 13 May 2026 21:59:41 +0930
Subject: [PATCH 29/54] LLAMA: updated nitro agent - wip

---
 llama/samples/nitro_cli.bas | 51 ++++++++++++++++++++++++++-----------
 1 file changed, 36 insertions(+), 15 deletions(-)

diff --git a/llama/samples/nitro_cli.bas b/llama/samples/nitro_cli.bas
index 690ebf5..3450b42 100644
--- a/llama/samples/nitro_cli.bas
+++ b/llama/samples/nitro_cli.bas
@@ -31,10 +31,18 @@ const n_penalty_repeat = 1.0
 const n_penalty_last_n = 256
 const n_gpu_layers = 32
 
-sandbox_home = iff(len(command) > 0, trim(command), cwd)
-if (left(sandbox_home) == "~") then
-  sandbox_home = home + mid(sandbox_home, 1)
-endif
+'
+' joins the left and right sides with forward slash
+'
+func join_path(s1, s2)
+  if (right(s1, 1) == "/") then
+    s1 = left(s1, len(s1) - 1)
+  endif
+  if (left(s2, 1) == "/") then
+    s2 = mid(s2, 2)
+  endif
+  return s1 + "/" + s2
+end
 
 '
 ' Displays the welcome message
@@ -82,9 +90,9 @@ end
 '
 func tool_read_file(arg)
   try
-    tload sandbox_home + arg, result, 1
+    tload join_path(sandbox_home, arg), result, 1
   catch
-    result = "ERROR: File not found or unreadable."
+    result = "ERROR: File not found or unreadable: " + arg
   end try
   return result
 end
@@ -120,7 +128,7 @@ end
 '
 func tool_write_file(arg, s)
   try
-    tsave sandbox_home + arg, s
+    tsave join_path(sandbox_home, arg), s
     result = "OK: Data written successfully to " + arg
   catch e
     result = "ERROR: " + e
@@ -133,8 +141,13 @@ end
 '
 func tool_permission()
   local k
-  input "Agree?"; k
-  return iff(trim(k) == "YES", "YES", "NO")
+  input "Agree:? ", k
+  select case lower(k)
+  case "y", "yes", "sure", "okay", "k"
+    return "YES"
+  case else
+    return "NO"
+  end select
 end
 
 '
@@ -155,11 +168,11 @@ func process_tool(cmd)
     endif
   endif
 
-   ' print RED
-   ' print "["+op+"]"
-   ' print "["+arg1+"]"
-   ' print "["+arg2+"]"
-   ' print RESET
+  print RED
+  print "["+op+"]"
+  print "["+arg1+"]"
+  print "["+arg2+"]"
+  print RESET
 
   select case op
   case "TOOL:DATE"
@@ -182,7 +195,7 @@ func process_tool(cmd)
     result = "ERROR: unknown command " + op
   end select
 
-  'print RED + "TOOL RESULT:" + result + RESET
+  print RED + "TOOL RESULT:" + result + RESET
   return result
 end
 
@@ -298,6 +311,14 @@ sub main()
   wend
 end
 
+sandbox_home = iff(len(command) > 0, trim(command), cwd)
+if (left(sandbox_home) == "~") then
+  sandbox_home = join_path(home, mid(sandbox_home, 1))
+else if (left(sandbox_home, 2) == "./") then
+  sandbox_home = join_path(cwd, mid(sandbox_home, 2))
+endif
+
 welcome_message()
 main()
 
+

From 6c278bdac4f9840911eaf749d186c93b185e4c89 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Sun, 17 May 2026 17:07:27 +0930
Subject: [PATCH 30/54] LLAMA: RAG experiment to increase domain knowledge of a
 particular lib

---
 llama/CMakeLists.txt        |  22 ++
 llama/RAG.md                | 325 ++++++++++++++++++++++++
 llama/chunk_headers.cpp     | 311 +++++++++++++++++++++++
 llama/llama-sb.cpp          | 479 ++++++++++++++++++++++--------------
 llama/llama-sb.h            |  12 +-
 llama/llama.cpp             |   2 +-
 llama/main.cpp              |   2 +-
 llama/rag.hpp               | 328 ++++++++++++++++++++++++
 llama/rag_index.cpp         | 200 +++++++++++++++
 llama/samples/adventure.bas |   2 +-
 llama/samples/nitro.md      |   1 +
 llama/samples/nitro_cli.bas |  19 +-
 llama/test_main.cpp         |   2 +-
 13 files changed, 1508 insertions(+), 197 deletions(-)
 create mode 100644 llama/RAG.md
 create mode 100644 llama/chunk_headers.cpp
 create mode 100644 llama/rag.hpp
 create mode 100644 llama/rag_index.cpp

diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
index 8eee40e..629a269 100644
--- a/llama/CMakeLists.txt
+++ b/llama/CMakeLists.txt
@@ -171,6 +171,28 @@ set_target_properties(llm_test PROPERTIES
   RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
 )
 
+# -----------------------------
+# RAG indexer
+# -----------------------------
+add_executable(rag_index
+  rag_index.cpp
+)
+
+target_include_directories(rag_index PRIVATE
+  ${LLAMA_DIR}/include
+  ${LLAMA_DIR}/ggml/include
+)
+
+target_link_libraries(rag_index PRIVATE
+  llm
+  llama
+  ggml
+)
+
+set_target_properties(rag_index PROPERTIES
+  RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
+)
+
 # ------------------------------------------------------------------
 # Android native library
 # ------------------------------------------------------------------
diff --git a/llama/RAG.md b/llama/RAG.md
new file mode 100644
index 0000000..ee5e8e7
--- /dev/null
+++ b/llama/RAG.md
@@ -0,0 +1,325 @@
+# notcurses RAG — C++ Library Expert via llama.cpp
+
+A self-contained RAG (Retrieval-Augmented Generation) pipeline in C++17
+that turns a GGUF inference model into a focused expert on any C/C++ library.
+Demonstrated here with [notcurses](https://github.com/dankamongmen/notcurses)
+but works with any header-based library.
+
+No fixed limits on chunk count, chunk length, or embedding dimension.
+No Python, no vector database daemon, no external dependencies beyond llama.cpp.
+
+---
+
+## How it works
+
+```
+INDEXING (one-time offline)
+────────────────────────────────────────────────────────────────
+notcurses headers
+      │
+      ▼
+chunk_headers        ← semantic chunker, outputs chunks.jsonl
+      │
+      ▼
+rag_index            ← embeds each chunk via nomic-embed-text GGUF
+      │
+      ▼
+notcurses.db         ← binary vector store (embeddings + text)
+
+
+RUNTIME (each query)
+────────────────────────────────────────────────────────────────
+user query
+      │
+      ▼
+rag_retrieve()       ← embeds query, cosine similarity against db
+      │              ← skips chunks already seen this session
+      ▼
+new top-k chunks     ← most relevant unseen API fragments
+      │
+      ▼
+prompt assembly      ← system + prior history + new context + query
+      │
+      ▼
+Qwen3 inference      ← <|think|> reasoning + final answer
+      │
+      ▼
+history              ← appended for next turn (KV cache intact)
+```
+
+---
+
+## Files
+
+| File | Purpose |
+|---|---|
+| `chunk_headers.cpp` | Parses C/C++ headers into semantic chunks, outputs `.jsonl` |
+| `rag_index.cpp` | Reads `.jsonl`, embeds each chunk, saves binary `.db` |
+| `rag.hpp` | Single-header C++17 runtime — load db, session, retrieve |
+| `example.cpp` | Full pipeline wired together, multi-turn query loop |
+
+---
+
+## Dependencies
+
+- [llama.cpp](https://github.com/ggerganov/llama.cpp) — `libllama` + `llama.h`
+- A GGUF **inference model** — tested with `Qwen3.5-9B-Q4_K_M.gguf`
+- A GGUF **embedding model** —
+  `nomic-embed-text-v1.5.Q4_K_M.gguf`
+  ([download](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF))
+- C++17 compiler (gcc 8+, clang 7+, MSVC 2019+)
+
+---
+
+## Build
+
+```bash
+c++ -std=c++17 -o chunk_headers chunk_headers.cpp
+c++ -std=c++17 -o rag_index     rag_index.cpp     -lllama -lm
+c++ -std=c++17 -o example       example.cpp       -lllama -lm
+```
+
+If llama.cpp is not on your system library path:
+
+```bash
+c++ -std=c++17 -o rag_index rag_index.cpp \
+    -I/path/to/llama.cpp/include \
+    -L/path/to/llama.cpp/build -lllama -lm
+```
+
+---
+
+## Usage
+
+### Step 1 — Chunk the headers (one-time)
+
+```bash
+./chunk_headers notcurses/include/notcurses/ > chunks.jsonl
+```
+
+Accepts a single file or a directory (walked recursively).
+Multiple paths can be given:
+
+```bash
+./chunk_headers include/foo.h include/bar.h src/examples/ > chunks.jsonl
+```
+
+Handles `.h`, `.hpp`, `.c`, `.cpp`. Inspect before indexing:
+
+```bash
+head -5 chunks.jsonl | python3 -m json.tool
+```
+
+### Step 2 — Embed and index (one-time)
+
+```bash
+./rag_index \
+  --model nomic-embed-text-v1.5.Q4_K_M.gguf \
+  --input chunks.jsonl \
+  --output notcurses.db
+```
+
+Takes a few minutes for a large corpus. The `.db` is reusable
+until the library changes.
+
+### Step 3 — Run
+
+```bash
+./example \
+  --model Qwen3.5-9B-Q4_K_M.gguf \
+  --embed nomic-embed-text-v1.5.Q4_K_M.gguf \
+  --db    notcurses.db
+```
+
+```
+notcurses expert ready. ctrl+d to quit.
+
+you: how do I create a plane and render text into it?
+assistant: ...
+
+you: what options does it take?       ← follow-up; no repeated context
+assistant: ...
+```
+
+---
+
+## Using rag.hpp in your own project
+
+Single-header, stb-style. In **one** `.cpp` file:
+
+```cpp
+#define RAG_IMPLEMENTATION
+#include "rag.hpp"
+```
+
+All other files that need the types:
+
+```cpp
+#include "rag.hpp"
+```
+
+### Minimal integration
+
+```cpp
+// startup
+RagDB db;
+rag_load(db, "notcurses.db");
+
+RagSession session;
+session.init(db.size(), 8192);   // n_chunks, your n_ctx
+session.score_threshold = 0.60f;
+
+// each turn
+std::string context = rag_retrieve(db, embed_ctx, embed_model,
+                                   user_query, 5, session);
+// context is empty string if nothing new/relevant was found
+// build prompt with context and hand to your inference context
+```
+
+### Stateless retrieval (no deduplication)
+
+```cpp
+std::string context = rag_retrieve(db, embed_ctx, embed_model,
+                                   user_query, 5);
+```
+
+### API
+
+```cpp
+// Load .db file (version 2). Returns true on success.
+bool rag_load(RagDB &db, const std::string &path);
+
+// Retrieve with session deduplication + token budget.
+// Returns context string ready to inject into prompt.
+// Empty string if nothing new or relevant was found.
+std::string rag_retrieve(const RagDB       &db,
+                         llama_context     *embed_ctx,
+                         llama_model       *embed_model,
+                         const std::string &query,
+                         int                top_k,
+                         RagSession        &session);
+
+// Stateless overload — no deduplication.
+std::string rag_retrieve(const RagDB       &db,
+                         llama_context     *embed_ctx,
+                         llama_model       *embed_model,
+                         const std::string &query,
+                         int                top_k);
+```
+
+### RagSession fields
+
+```cpp
+struct RagSession {
+  std::vector<bool> seen;         // one bit per chunk, sized to db
+  int  tokens_used  = 0;          // running token estimate
+  int  tokens_max   = 0;          // your n_ctx ceiling
+  float score_threshold = 0.60f;  // skip weak matches
+
+  void init(int n_chunks, int ctx_size);
+  void reset();                   // start a fresh conversation
+};
+```
+
+---
+
+## Chunking strategy
+
+`chunk_headers` uses a state machine that keeps each **semantic unit**
+together as one chunk:
+
+- Block comment (`/* ... */`) + following declaration
+- `//` line comments + following declaration
+- `typedef struct` / `typedef enum` entire body
+- Consecutive `#define` macro groups
+- Multi-line function signatures
+
+Example — this stays as one chunk:
+
+```c
+// ncplane_create() - create a new plane as a child of 'n'.
+// 'nopts' may be NULL for defaults. Returns NULL on error.
+struct ncplane* ncplane_create(struct ncplane *n,
+                               const struct ncplane_options *nopts);
+```
+
+---
+
+## Session deduplication
+
+The KV cache is not cleared between turns, so the model already has
+earlier chunks in memory. `RagSession` tracks which chunks have been
+injected and skips them on subsequent turns:
+
+```
+Turn 1: retrieved chunks [42, 17, 83]  → all new → inject all
+Turn 2: retrieved chunks [42, 55, 17]  → 42,17 seen → inject only [55]
+Turn 3: retrieved chunks [7, 14, 55]   → 55 seen → inject [7, 14]
+```
+
+Context window grows efficiently — no repeated API reference, and the
+model remembers everything already seen via the intact KV cache.
+
+---
+
+## Adapting to other libraries
+
+Change only the input to `chunk_headers`:
+
+| Library | Input |
+|---|---|
+| stb (stb_image, stb_truetype ...) | single `.h` file |
+| SDL2 / OpenGL / Vulkan | `include/` directory |
+| Your own engine | any `.h` / `.hpp` mix |
+| Spring / Java | extend chunker for Javadoc + `.java` |
+
+Re-run steps 1 and 2 to produce a new `.db`. Runtime code unchanged.
+Multiple `.db` files can be loaded and queried independently.
+
+---
+
+## .db file format (version 2)
+
+Variable-length fields — no wasted padding.
+
+```
+Header (16 bytes):
+  uint32  magic      = 0x52414744  ("RAGD")
+  uint32  version    = 2
+  uint32  n_chunks
+  uint32  embed_dim
+
+Per chunk:
+  uint32  text_len
+  char[]  text              (text_len bytes, no null)
+  uint16  source_len
+  char[]  source            (source_len bytes, no null)
+  uint8   type_len
+  char[]  type              (type_len bytes, no null)
+  float[] embedding         (embed_dim × 4 bytes)
+```
+
+---
+
+## GPU memory
+
+On an 8 GB GPU with `Qwen3.5-9B-Q4_K_M`:
+
+| Component | VRAM |
+|---|---|
+| Inference model (Q4_K_M 9B) | ~5.5 GB |
+| Embedding model (nomic Q4) | ~0.3 GB |
+| KV cache (8k ctx, Q4_0 K/V) | ~0.5 GB |
+| **Total** | **~6.3 GB** |
+
+---
+
+## Qwen3 thinking mode
+
+The model emits `<|think|>...<|/think|>` before its answer.
+`example.cpp` strips this with `strip_think()` before printing.
+The think block improves RAG quality — the model explicitly reasons
+over injected context chunks before answering.
+
+To expose reasoning (useful for debugging retrieval quality), remove
+the `strip_think()` call and print `raw` directly.
diff --git a/llama/chunk_headers.cpp b/llama/chunk_headers.cpp
new file mode 100644
index 0000000..51ecd66
--- /dev/null
+++ b/llama/chunk_headers.cpp
@@ -0,0 +1,311 @@
+/*
+ * chunk_headers.cpp
+ *
+ * Smart chunker for C/C++ headers — keeps semantic units together:
+ *   - block comment + following declaration/function
+ *   - struct/enum/typedef blocks
+ *   - grouped #define macros
+ *   - standalone inline-commented declarations
+ *
+ * Output: one chunk per line in a .jsonl file:
+ *   {"source":"notcurses.h","type":"function","text":"..."}
+ *
+ * Build: c++ -std=c++17 -o chunk_headers chunk_headers.cpp
+ * Usage: ./chunk_headers notcurses/include/notcurses/notcurses.h > chunks.jsonl
+ *        ./chunk_headers dir/                                    > chunks.jsonl
+ */
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <vector>
+#include <filesystem>
+
+namespace fs = std::filesystem;
+
+/* ── tunables ──────────────────────────────────────────────── */
+static constexpr size_t MIN_CHUNK = 40;   /* ignore tiny fragments  */
+/* ─────────────────────────────────────────────────────────── */
+
+enum class ChunkType {
+  Function, Struct, Enum, Typedef, Defines, Other
+};
+
+static std::string type_name(ChunkType t) {
+  switch (t) {
+    case ChunkType::Function: return "function";
+    case ChunkType::Struct:   return "struct";
+    case ChunkType::Enum:     return "enum";
+    case ChunkType::Typedef:  return "typedef";
+    case ChunkType::Defines:  return "defines";
+    default:                  return "other";
+  }
+}
+
+/* ── helpers ───────────────────────────────────────────────── */
+
+static bool starts_with(const std::string &s, const std::string &prefix) {
+  return s.size() >= prefix.size() &&
+         s.compare(0, prefix.size(), prefix) == 0;
+}
+
+static bool is_blank(const std::string &s) {
+  for (char c : s) if (!isspace((unsigned char)c)) return false;
+  return true;
+}
+
+static std::string json_escape(const std::string &in) {
+  std::string out;
+  out.reserve(in.size() + 32);
+  for (unsigned char c : in) {
+    if      (c == '"')  { out += "\\\""; }
+    else if (c == '\\') { out += "\\\\"; }
+    else if (c == '\n') { out += "\\n";  }
+    else if (c == '\r') { /* skip */     }
+    else if (c == '\t') { out += "\\t";  }
+    else if (c < 0x20)  { /* skip */     }
+    else                { out += c;      }
+  }
+  return out;
+}
+
+static void emit_chunk(const std::string &source, ChunkType type,
+                       const std::string &text) {
+  if (text.size() < MIN_CHUNK) return;
+
+  /* trim trailing newlines */
+  size_t end = text.size();
+  while (end > 0 && (text[end-1] == '\n' || text[end-1] == '\r')) --end;
+  if (end < MIN_CHUNK) return;
+
+  std::cout << "{\"source\":\"" << json_escape(source)
+            << "\",\"type\":\""  << type_name(type)
+            << "\",\"text\":\""  << json_escape(text.substr(0, end))
+            << "\"}\n";
+}
+
+/* ── state machine ─────────────────────────────────────────── */
+
+enum class State {
+  Idle, BlockComment, LineComment, Declaration, Struct, Defines
+};
+
+static void process_file(const fs::path &path) {
+  std::ifstream f(path);
+  if (!f) { std::cerr << "cannot open: " << path << "\n"; return; }
+
+  const std::string source = path.filename().string();
+
+  State     state       = State::Idle;
+  std::string chunk;
+  ChunkType chunk_type  = ChunkType::Other;
+  int       brace_depth = 0;
+  int       paren_depth = 0;
+  int       define_count = 0;
+
+  auto flush = [&](ChunkType t) {
+    emit_chunk(source, t, chunk);
+    chunk.clear();
+    state       = State::Idle;
+    brace_depth = 0;
+    paren_depth = 0;
+  };
+
+  std::string line;
+  while (std::getline(f, line)) {
+    /* trim trailing CR */
+    if (!line.empty() && line.back() == '\r') line.pop_back();
+
+    /* find first non-whitespace for prefix checks */
+    size_t trim_pos = 0;
+    while (trim_pos < line.size() &&
+           (line[trim_pos] == ' ' || line[trim_pos] == '\t')) ++trim_pos;
+    const std::string trimmed = line.substr(trim_pos);
+
+    /* ── #define handling ─────────────────────────────────── */
+    if (starts_with(trimmed, "#define ")) {
+      if (state == State::BlockComment || state == State::LineComment) {
+        chunk += line + "\n";
+        state = State::Defines;
+        define_count = 1;
+      } else if (state == State::Defines) {
+        chunk += line + "\n";
+        define_count++;
+      } else {
+        if (chunk.size() >= MIN_CHUNK) emit_chunk(source, chunk_type, chunk);
+        chunk.clear();
+        chunk += line + "\n";
+        state = State::Defines;
+        define_count = 1;
+      }
+      continue;
+    }
+
+    /* non-define while in define group */
+    if (state == State::Defines) {
+      flush(ChunkType::Defines);
+      define_count = 0;
+      /* fall through to process this line normally */
+    }
+
+    /* ── block comment start ──────────────────────────────── */
+    if ((starts_with(trimmed, "/*") || starts_with(trimmed, "/**")) &&
+        state == State::Idle) {
+      if (chunk.size() >= MIN_CHUNK) emit_chunk(source, chunk_type, chunk);
+      chunk.clear();
+      chunk_type = ChunkType::Other;
+      chunk += line + "\n";
+      state = (trimmed.find("*/", 2) != std::string::npos)
+              ? State::LineComment
+              : State::BlockComment;
+      continue;
+    }
+
+    /* ── inside block comment ─────────────────────────────── */
+    if (state == State::BlockComment) {
+      chunk += line + "\n";
+      if (trimmed.find("*/") != std::string::npos)
+        state = State::LineComment;
+      continue;
+    }
+
+    /* ── // line comment ──────────────────────────────────── */
+    if (starts_with(trimmed, "//")) {
+      if (state == State::Idle) {
+        if (chunk.size() >= MIN_CHUNK) emit_chunk(source, chunk_type, chunk);
+        chunk.clear();
+        chunk += line + "\n";
+        state = State::LineComment;
+      } else if (state == State::LineComment) {
+        chunk += line + "\n";
+      }
+      continue;
+    }
+
+    /* ── blank line ───────────────────────────────────────── */
+    if (is_blank(trimmed)) {
+      if (state == State::LineComment)
+        flush(ChunkType::Other);
+      else if (state == State::Idle && chunk.size() >= MIN_CHUNK)
+        flush(chunk_type);
+      continue;
+    }
+
+    /* ── skip preprocessor noise ──────────────────────────── */
+    if (starts_with(trimmed, "#ifndef") || starts_with(trimmed, "#ifdef")  ||
+        starts_with(trimmed, "#endif")  || starts_with(trimmed, "#pragma") ||
+        starts_with(trimmed, "#include")) {
+      if (state == State::LineComment || state == State::BlockComment) {
+        chunk.clear();
+        state = State::Idle;
+      }
+      continue;
+    }
+
+    /* ── typedef struct / enum start ─────────────────────── */
+    if ((starts_with(trimmed, "typedef struct") ||
+         starts_with(trimmed, "typedef enum")   ||
+         starts_with(trimmed, "struct ")         ||
+         starts_with(trimmed, "enum "))          &&
+        (state == State::Idle || state == State::LineComment)) {
+
+      if (state == State::Idle && chunk.size() >= MIN_CHUNK)
+        emit_chunk(source, chunk_type, chunk);
+
+      /* preserve any comment already in chunk */
+      if (state == State::Idle) chunk.clear();
+
+      chunk += line + "\n";
+      chunk_type = starts_with(trimmed, "typedef") ? ChunkType::Typedef
+                 : starts_with(trimmed, "enum ")   ? ChunkType::Enum
+                                                    : ChunkType::Struct;
+      state = State::Struct;
+      for (char c : line) {
+        if (c == '{') ++brace_depth;
+        if (c == '}') --brace_depth;
+      }
+      if (brace_depth <= 0 && line.find(';') != std::string::npos)
+        flush(chunk_type);
+      continue;
+    }
+
+    /* ── inside struct/enum body ──────────────────────────── */
+    if (state == State::Struct) {
+      chunk += line + "\n";
+      for (char c : line) {
+        if (c == '{') ++brace_depth;
+        if (c == '}') --brace_depth;
+      }
+      if (brace_depth <= 0 && line.find(';') != std::string::npos)
+        flush(chunk_type);
+      continue;
+    }
+
+    /* ── function / other declaration ────────────────────── */
+    if (state == State::LineComment || state == State::Idle) {
+      if (state == State::Idle && chunk.size() >= MIN_CHUNK) {
+        emit_chunk(source, chunk_type, chunk);
+        chunk.clear();
+      }
+      chunk += line + "\n";
+      chunk_type = ChunkType::Function;
+      state = State::Declaration;
+      for (char c : line) {
+        if (c == '(') ++paren_depth;
+        if (c == ')') --paren_depth;
+      }
+      if (paren_depth <= 0 && line.find(';') != std::string::npos)
+        flush(ChunkType::Function);
+      continue;
+    }
+
+    /* ── multi-line declaration ───────────────────────────── */
+    if (state == State::Declaration) {
+      chunk += line + "\n";
+      for (char c : line) {
+        if (c == '(') ++paren_depth;
+        if (c == ')') --paren_depth;
+      }
+      if (paren_depth <= 0 && line.find(';') != std::string::npos)
+        flush(ChunkType::Function);
+      continue;
+    }
+  }
+
+  /* flush remainder */
+  if (chunk.size() >= MIN_CHUNK) emit_chunk(source, chunk_type, chunk);
+}
+
+/* ── directory walker ──────────────────────────────────────── */
+
+static void process_path(const fs::path &path) {
+  if (fs::is_directory(path)) {
+    /* sorted for deterministic output */
+    std::vector<fs::path> entries;
+    for (auto &e : fs::recursive_directory_iterator(path))
+      entries.push_back(e.path());
+    std::sort(entries.begin(), entries.end());
+    for (auto &e : entries) {
+      if (!fs::is_regular_file(e)) continue;
+      auto ext = e.extension().string();
+      if (ext == ".h" || ext == ".hpp" || ext == ".c" || ext == ".cpp")
+        process_file(e);
+    }
+  } else if (fs::is_regular_file(path)) {
+    process_file(path);
+  }
+}
+
+/* ── main ──────────────────────────────────────────────────── */
+
+int main(int argc, char **argv) {
+  if (argc < 2) {
+    std::cerr << "usage: " << argv[0]
+              << " <header.h|dir> [header2.h ...]\n";
+    return 1;
+  }
+  for (int i = 1; i < argc; i++)
+    process_path(fs::path(argv[i]));
+  return 0;
+}
diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index 4fde33d..05463fb 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -7,6 +7,7 @@
 
 #include <format>
 #include <span>
+#include <cmath>
 #include <utility>
 #include "ggml-cuda.h"
 
@@ -63,8 +64,12 @@ Llama::Llama() :
   _n_past(0),
   _is_gemma4(false),
   _seed(LLAMA_DEFAULT_SEED) {
-  llama_log_set([](enum ggml_log_level level, const char * text, void *user_data) {
+  llama_log_set([](enum ggml_log_level level, const char *text, void *user_data) {
     Llama *llama = (Llama *)user_data;
+    if (level == GGML_LOG_LEVEL_ERROR && llama->_last_error.empty()) {
+      // remember the first error message
+      llama->_last_error = text;
+    }
     if (level > llama->_log_level) {
       fprintf(stderr, "LLAMA: %s", text);
     }
@@ -135,7 +140,7 @@ void Llama::reset() {
   }
 }
 
-bool Llama::construct(string model_path, int n_ctx, int n_batch, int n_gpu_layers, int log_level) {
+bool Llama::load_model(string model_path, int n_ctx, int n_batch, int n_gpu_layers, int log_level) {
   ggml_backend_load_all();
 
   llama_model_params mparams = llama_model_default_params();
@@ -143,11 +148,12 @@ bool Llama::construct(string model_path, int n_ctx, int n_batch, int n_gpu_layer
     mparams.n_gpu_layers = n_gpu_layers;
   }
 
+  _last_error.clear();
   _log_level = log_level;
   _n_gpu_layers = n_gpu_layers;
   _model = llama_model_load_from_file(model_path.c_str(), mparams);
   if (!_model) {
-    _last_error = "Failed to load model";
+    set_last_error("Load model");
   } else {
     llama_context_params cparams = llama_context_default_params();
     cparams.n_ctx   = n_ctx;
@@ -166,7 +172,7 @@ bool Llama::construct(string model_path, int n_ctx, int n_batch, int n_gpu_layer
 
     _ctx = llama_init_from_model(_model, cparams);
     if (!_ctx) {
-      _last_error = "Failed to create context";
+      set_last_error("Create context");
     } else {
       _vocab = llama_model_get_vocab(_model);
     }
@@ -177,125 +183,91 @@ bool Llama::construct(string model_path, int n_ctx, int n_batch, int n_gpu_layer
   return _last_error.empty();
 }
 
-void Llama::set_grammar(const string &src, const string &root) {
-  _grammar_src = src;
-  _grammar_root = root;
-}
+bool Llama::load_embedding_model(string model_path) {
+  ggml_backend_load_all();
 
-bool Llama::configure_sampler() {
-  auto sparams = llama_sampler_chain_default_params();
-  sparams.no_perf = false;
-  llama_sampler *chain = llama_sampler_chain_init(sparams);
+  llama_model_params mparams = llama_model_default_params();
+  mparams.n_gpu_layers = 99;
 
-  if (!_grammar_src.empty()) {
-    llama_sampler *grammar = llama_sampler_init_grammar(_vocab, _grammar_src.c_str(), _grammar_root.c_str());
-    if (!grammar) {
-      _last_error = "failed to initialize grammar sampler";
-      return false;
-    }
-    llama_sampler_chain_add(chain, grammar);
-  }
-  if (_penalty_last_n != 0 && _penalty_repeat != 1.0f) {
-    auto penalties = llama_sampler_init_penalties(_penalty_last_n, _penalty_repeat, _penalty_freq, _penalty_present);
-    llama_sampler_chain_add(chain, penalties);
-  }
-  if (_temperature <= 0.0f) {
-    llama_sampler_chain_add(chain, llama_sampler_init_greedy());
+  _last_error.clear();
+  _model = llama_model_load_from_file(model_path.c_str(), mparams);
+  if (!_model) {
+    set_last_error("Load model");
   } else {
-    if (_top_k > 0) {
-      llama_sampler_chain_add(chain, llama_sampler_init_top_k(_top_k));
-    }
-    if (_top_p < 1.0f || _min_p > 0.0f) {
-      llama_sampler_chain_add(chain, llama_sampler_init_top_p(_top_p, 1));
-    }
-    if (_min_p > 0.0f) {
-      llama_sampler_chain_add(chain, llama_sampler_init_min_p(_min_p, 1));
+    llama_context_params cparams = llama_context_default_params();
+    cparams.n_ctx        = 512;
+    cparams.n_batch      = 512;
+    cparams.embeddings   = true;
+    cparams.pooling_type = LLAMA_POOLING_TYPE_MEAN;
+
+    _ctx = llama_init_from_model(_model, cparams);
+    if (!_ctx) {
+      set_last_error("Create context");
     }
-    llama_sampler_chain_add(chain, llama_sampler_init_temp(_temperature));
-    llama_sampler_chain_add(chain, llama_sampler_init_dist(_seed));
-  }
-  if (_sampler) {
-    llama_sampler_free(_sampler);
   }
-  _sampler = chain;
-  return true;
-}
 
-vector<llama_token> Llama::tokenize(const string &prompt) {
-  vector<llama_token> result;
+  return _last_error.empty();
+}
 
-  int n_prompt = -llama_tokenize(_vocab, prompt.c_str(), prompt.size(), nullptr, 0, true, true);
-  if (n_prompt <= 0) {
-    _last_error = "Failed to tokenize prompt";
-  } else {
-    result.reserve(n_prompt);
-    result.resize(n_prompt);
-    if (llama_tokenize(_vocab, prompt.c_str(), prompt.size(),
-                       result.data(), n_prompt, true, true) < 0) {
-      _last_error = "Failed to tokenize prompt";
-    }
-  }
-  return result;
+int Llama::get_embed_dim() {
+  return _model != nullptr ? llama_model_n_embd(_model) : 0;
 }
 
-// Makes space in the context for n_tokens by removing old tokens if necessary
-// Returns true if successful, false if impossible to make space
-//
-// Strategies:
-// - If enough space exists, does nothing
-// - If n_tokens > n_ctx, fails (impossible to fit)
-// - Otherwise, removes oldest tokens to make room
-//
-// Parameters:
-//   n_tokens  - Number of tokens we need space for
-//   keep_min  - Minimum tokens to keep (e.g., system prompt), default 0
-//
-bool Llama::make_space_for_tokens(int n_tokens, int keep_min) {
-  int n_ctx = llama_n_ctx(_ctx);
-  if (n_tokens > n_ctx) {
-    _last_error = "Too many tokens, increase context size (n_ctx)";
+bool Llama::embed_text(const std::string &text, std::vector<float> &out, int embed_dim) {
+  std::string prefixed = "search_document: " + text;
+
+  vector<llama_token> tokens = tokenize(prefixed);
+  if (tokens.size() == 0) {
     return false;
   }
 
-  llama_memory_t mem = llama_get_memory(_ctx);
-
-  // Get current position range
-  llama_pos pos_min = llama_memory_seq_pos_min(mem, 0);
-  llama_pos pos_max = llama_memory_seq_pos_max(mem, 0);
-
-  // Empty memory - nothing to do
-  if (pos_max < 0) {
-    return true;
+  // truncate to context window
+  int n_ctx = llama_n_ctx(_ctx);
+  int n = tokens.size();
+  if (n > n_ctx) {
+    _last_error = std::format("warning: chunk truncated {} -> {} tokens ", n, n_ctx);
+    n = n_ctx;
+    tokens.resize(n);
   }
 
-  int current_used = pos_max - pos_min + 1;
-  int space_needed = n_tokens;
-  int space_available = n_ctx - current_used;
+  llama_memory_clear(llama_get_memory(_ctx), true);
 
-  // Already have enough space
-  if (space_available >= space_needed) {
-    return true;
+  if (!batch_decode_tokens(tokens)) {
+    return false;
   }
 
-  // Calculate how many tokens to remove
-  int tokens_to_remove = space_needed - space_available;
+  float *emb = llama_get_embeddings_seq(_ctx, 0);
+  if (!emb) {
+    emb = llama_get_embeddings_ith(_ctx, n - 1);
+  }
 
-  // Can't remove more than we have (minus keep_min)
-  int removable = current_used - keep_min;
-  if (tokens_to_remove > removable) {
-    _last_error = "Can't make enough space while keeping keep_min tokens";
+  if (!emb) {
+    _last_error = "no embedding returned\n";
     return false;
   }
 
-  // Remove oldest tokens (from pos_min to pos_min + tokens_to_remove)
-  llama_memory_seq_rm(mem, 0, pos_min, pos_min + tokens_to_remove);
+  out.assign(emb, emb + embed_dim);
 
-  // Shift remaining tokens down
-  llama_memory_seq_add(mem, 0, pos_min + tokens_to_remove, -1, -tokens_to_remove);
+  /* L2 normalize */
+  float norm = 0.0f;
+  for (float v : out) {
+    norm += v * v;
+  }
+  norm = std::sqrt(norm);
+  if (norm > 1e-9f) {
+    for (float &v : out) {
+      v /= norm;
+    }
+  }
 
   return true;
 }
 
+void Llama::set_grammar(const string &src, const string &root) {
+  _grammar_src = src;
+  _grammar_root = root;
+}
+
 bool Llama::add_message(LlamaIter &iter, const string &role, const string &content) {
   llama_chat_message message = {role.c_str(), content.c_str()};
   int buf_size = 2 * (int)(role.size() + content.size() + 64);
@@ -342,16 +314,8 @@ bool Llama::add_message(LlamaIter &iter, const string &role, const string &conte
   }
 
   // batch decode tokens
-  uint32_t n_batch = llama_n_batch(_ctx);
-  for (size_t i = 0; i < prompt_tokens.size(); i += n_batch) {
-    size_t batch_size = std::min((size_t)n_batch, prompt_tokens.size() - i);
-    llama_batch batch = llama_batch_get_one(prompt_tokens.data() + i, batch_size);
-    int result = llama_decode(_ctx, batch);
-    if (result != 0) {
-      _last_error = std::format("Failed to decode batch. position:{} error:{} [size:{}, past:{}]",
-                                i, result, prompt_tokens.size(), _n_past);
-      return false;
-    }
+  if (!batch_decode_tokens(prompt_tokens)) {
+    return false;
   }
 
   // handle encoder models
@@ -377,87 +341,6 @@ bool Llama::add_message(LlamaIter &iter, const string &role, const string &conte
   return true;
 }
 
-bool Llama::ends_with_sentence_boundary(const string &text) {
-  if (text.empty()) {
-    return false;
-  }
-
-  // Get last few characters (in case of whitespace after punctuation)
-  size_t check_len = std::min(text.length(), (size_t)5);
-  std::string ending = text.substr(text.length() - check_len);
-
-  // Check for various sentence endings
-  // Period followed by space or end
-  if (ending.find(". ") != std::string::npos ||
-      ending.back() == '.') {
-    return true;
-  }
-
-  // Exclamation mark
-  if (ending.find("! ") != std::string::npos ||
-      ending.back() == '!') {
-    return true;
-  }
-
-  // Question mark
-  if (ending.find("? ") != std::string::npos ||
-      ending.back() == '?') {
-    return true;
-  }
-
-  // Newline (paragraph break)
-  if (ending.find('\n') != std::string::npos) {
-    return true;
-  }
-
-  // Quote followed by period: "something."
-  if (ending.find(".\"") != std::string::npos ||
-      ending.find("!\"") != std::string::npos ||
-      ending.find("?\"") != std::string::npos) {
-    return true;
-  }
-
-  return false;
-}
-
-string Llama::token_to_string(LlamaIter &iter, llama_token tok) {
-  string result;
-  char buf[512];
-  int n = llama_token_to_piece(_vocab, tok, buf, sizeof(buf), 0, false);
-  if (n > 0) {
-    // detect repetition
-    if (iter._last_word == buf) {
-      if (++iter._repetition_count == MAX_REPEAT) {
-        iter._has_next = false;
-      }
-    } else {
-      iter._repetition_count = 0;
-      iter._last_word = buf;
-    }
-
-    result.append(buf, n);
-
-    // detect end of max-tokens
-    if (++iter._tokens_generated > _max_tokens && ends_with_sentence_boundary(result)) {
-      iter._has_next = false;
-    }
-
-    // detect stop words
-    if (iter._has_next) {
-      for (const auto &stop : _stop_sequences) {
-        size_t pos = result.find(stop);
-        if (pos != std::string::npos) {
-          // found stop sequence - truncate and signal end
-          result = result.substr(0, pos);
-          iter._has_next = false;
-          break;
-        }
-      }
-    }
-  }
-  return result;
-}
-
 string Llama::next(LlamaIter &iter) {
   if (!iter._has_next) {
     _last_error = "Iteration beyond end of stream";
@@ -573,3 +456,223 @@ LlamaMemoryInfo Llama::memory_info() {
   return info;
 }
 
+bool Llama::batch_decode_tokens(vector<llama_token> &tokens) {
+  uint32_t n_batch = llama_n_batch(_ctx);
+  for (size_t i = 0; i < tokens.size(); i += n_batch) {
+    size_t batch_size = std::min((size_t)n_batch, tokens.size() - i);
+    llama_batch batch = llama_batch_get_one(tokens.data() + i, batch_size);
+    int result = llama_decode(_ctx, batch);
+    if (result != 0) {
+      _last_error = std::format("Failed to decode batch. position:{} error:{} [size:{}, past:{}]",
+                                i, result, tokens.size(), _n_past);
+      return false;
+    }
+  }
+  return true;
+}
+
+bool Llama::configure_sampler() {
+  auto sparams = llama_sampler_chain_default_params();
+  sparams.no_perf = false;
+  llama_sampler *chain = llama_sampler_chain_init(sparams);
+
+  if (!_grammar_src.empty()) {
+    llama_sampler *grammar = llama_sampler_init_grammar(_vocab, _grammar_src.c_str(), _grammar_root.c_str());
+    if (!grammar) {
+      _last_error = "failed to initialize grammar sampler";
+      return false;
+    }
+    llama_sampler_chain_add(chain, grammar);
+  }
+  if (_penalty_last_n != 0 && _penalty_repeat != 1.0f) {
+    auto penalties = llama_sampler_init_penalties(_penalty_last_n, _penalty_repeat, _penalty_freq, _penalty_present);
+    llama_sampler_chain_add(chain, penalties);
+  }
+  if (_temperature <= 0.0f) {
+    llama_sampler_chain_add(chain, llama_sampler_init_greedy());
+  } else {
+    if (_top_k > 0) {
+      llama_sampler_chain_add(chain, llama_sampler_init_top_k(_top_k));
+    }
+    if (_top_p < 1.0f || _min_p > 0.0f) {
+      llama_sampler_chain_add(chain, llama_sampler_init_top_p(_top_p, 1));
+    }
+    if (_min_p > 0.0f) {
+      llama_sampler_chain_add(chain, llama_sampler_init_min_p(_min_p, 1));
+    }
+    llama_sampler_chain_add(chain, llama_sampler_init_temp(_temperature));
+    llama_sampler_chain_add(chain, llama_sampler_init_dist(_seed));
+  }
+  if (_sampler) {
+    llama_sampler_free(_sampler);
+  }
+  _sampler = chain;
+  return true;
+}
+
+bool Llama::ends_with_sentence_boundary(const string &text) {
+  if (text.empty()) {
+    return false;
+  }
+
+  // Get last few characters (in case of whitespace after punctuation)
+  size_t check_len = std::min(text.length(), (size_t)5);
+  std::string ending = text.substr(text.length() - check_len);
+
+  // Check for various sentence endings
+  // Period followed by space or end
+  if (ending.find(". ") != std::string::npos ||
+      ending.back() == '.') {
+    return true;
+  }
+
+  // Exclamation mark
+  if (ending.find("! ") != std::string::npos ||
+      ending.back() == '!') {
+    return true;
+  }
+
+  // Question mark
+  if (ending.find("? ") != std::string::npos ||
+      ending.back() == '?') {
+    return true;
+  }
+
+  // Newline (paragraph break)
+  if (ending.find('\n') != std::string::npos) {
+    return true;
+  }
+
+  // Quote followed by period: "something."
+  if (ending.find(".\"") != std::string::npos ||
+      ending.find("!\"") != std::string::npos ||
+      ending.find("?\"") != std::string::npos) {
+    return true;
+  }
+
+  return false;
+}
+
+// Makes space in the context for n_tokens by removing old tokens if necessary
+// Returns true if successful, false if impossible to make space
+//
+// Strategies:
+// - If enough space exists, does nothing
+// - If n_tokens > n_ctx, fails (impossible to fit)
+// - Otherwise, removes oldest tokens to make room
+//
+// Parameters:
+//   n_tokens  - Number of tokens we need space for
+//   keep_min  - Minimum tokens to keep (e.g., system prompt), default 0
+//
+bool Llama::make_space_for_tokens(int n_tokens, int keep_min) {
+  int n_ctx = llama_n_ctx(_ctx);
+  if (n_tokens > n_ctx) {
+    _last_error = "Too many tokens, increase context size (n_ctx)";
+    return false;
+  }
+
+  llama_memory_t mem = llama_get_memory(_ctx);
+
+  // Get current position range
+  llama_pos pos_min = llama_memory_seq_pos_min(mem, 0);
+  llama_pos pos_max = llama_memory_seq_pos_max(mem, 0);
+
+  // Empty memory - nothing to do
+  if (pos_max < 0) {
+    return true;
+  }
+
+  int current_used = pos_max - pos_min + 1;
+  int space_needed = n_tokens;
+  int space_available = n_ctx - current_used;
+
+  // Already have enough space
+  if (space_available >= space_needed) {
+    return true;
+  }
+
+  // Calculate how many tokens to remove
+  int tokens_to_remove = space_needed - space_available;
+
+  // Can't remove more than we have (minus keep_min)
+  int removable = current_used - keep_min;
+  if (tokens_to_remove > removable) {
+    _last_error = "Can't make enough space while keeping keep_min tokens";
+    return false;
+  }
+
+  // Remove oldest tokens (from pos_min to pos_min + tokens_to_remove)
+  llama_memory_seq_rm(mem, 0, pos_min, pos_min + tokens_to_remove);
+
+  // Shift remaining tokens down
+  llama_memory_seq_add(mem, 0, pos_min + tokens_to_remove, -1, -tokens_to_remove);
+
+  return true;
+}
+
+vector<llama_token> Llama::tokenize(const string &prompt) {
+  vector<llama_token> result;
+
+  int n_prompt = -llama_tokenize(_vocab, prompt.c_str(), prompt.size(), nullptr, 0, true, true);
+  if (n_prompt <= 0) {
+    _last_error = "Failed to tokenize prompt";
+  } else {
+    result.reserve(n_prompt);
+    result.resize(n_prompt);
+    if (llama_tokenize(_vocab, prompt.c_str(), prompt.size(),
+                       result.data(), n_prompt, true, true) < 0) {
+      _last_error = "Failed to tokenize prompt";
+    }
+  }
+  return result;
+}
+
+string Llama::token_to_string(LlamaIter &iter, llama_token tok) {
+  string result;
+  char buf[512];
+  int n = llama_token_to_piece(_vocab, tok, buf, sizeof(buf), 0, false);
+  if (n > 0) {
+    // detect repetition
+    if (iter._last_word == buf) {
+      if (++iter._repetition_count == MAX_REPEAT) {
+        iter._has_next = false;
+      }
+    } else {
+      iter._repetition_count = 0;
+      iter._last_word = buf;
+    }
+
+    result.append(buf, n);
+
+    // detect end of max-tokens
+    if (++iter._tokens_generated > _max_tokens && ends_with_sentence_boundary(result)) {
+      iter._has_next = false;
+    }
+
+    // detect stop words
+    if (iter._has_next) {
+      for (const auto &stop : _stop_sequences) {
+        size_t pos = result.find(stop);
+        if (pos != std::string::npos) {
+          // found stop sequence - truncate and signal end
+          result = result.substr(0, pos);
+          iter._has_next = false;
+          break;
+        }
+      }
+    }
+  }
+  return result;
+}
+
+void Llama::set_last_error(const char *message) {
+  if (!_last_error.empty()) {
+    if (_last_error.back() == '\n') {
+      _last_error.pop_back();
+    }
+    _last_error = std::format("{}: {}", message, _last_error);
+  } else {
+    _last_error = std::format("{} failed", message);
+  }
+}
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
index 79dc2e3..99a3864 100644
--- a/llama/llama-sb.h
+++ b/llama/llama-sb.h
@@ -68,7 +68,8 @@ struct Llama {
   ~Llama();
 
   // init
-  bool construct(string model_path, int n_ctx, int n_batch, int n_gpu_layers, int log_level);
+  bool load_model(string model_path, int n_ctx, int n_batch, int n_gpu_layers, int log_level);
+  bool load_embedding_model(string model_path);
 
   // generation
   bool add_message(LlamaIter &iter, const string &role, const string &content);
@@ -98,13 +99,18 @@ struct Llama {
   // memory info
   LlamaMemoryInfo memory_info();
 
+  // rag support
+  bool embed_text(const std::string &text, std::vector<float> &out, int embed);
+  int get_embed_dim();
+
   private:
-  bool ends_with_sentence_boundary(const string &out);
+  bool batch_decode_tokens(vector<llama_token> &tokens);
   bool configure_sampler();
+  bool ends_with_sentence_boundary(const string &out);
   bool make_space_for_tokens(int n_tokens, int keep_min);
   vector<llama_token> tokenize(const string &prompt);
   string token_to_string(LlamaIter &iter, llama_token tok);
-  bool encode(const string &role, const string &content, bool add_assistant_prompt) ;
+  void set_last_error(const char *message);
 
   llama_model *_model;
   llama_context *_ctx;
diff --git a/llama/llama.cpp b/llama/llama.cpp
index 5d44db6..7f3f843 160000
--- a/llama/llama.cpp
+++ b/llama/llama.cpp
@@ -1 +1 @@
-Subproject commit 5d44db60089b0381cdbf7c45ce9ded43fc0c7f4c
+Subproject commit 7f3f843c31cd32dc4adc10b393342dfee071c332
diff --git a/llama/main.cpp b/llama/main.cpp
index aa235d9..b78d015 100644
--- a/llama/main.cpp
+++ b/llama/main.cpp
@@ -469,7 +469,7 @@ static int cmd_create_llama(int argc, slib_par_t *params, var_t *retval) {
   auto n_log_level = get_param_int(argc, params, 4, GGML_LOG_LEVEL_CONT);
   int id = ++g_nextId;
   Llama &llama = g_llama[id];
-  if (llama.construct(model, n_ctx, n_batch, n_gpu_layers, n_log_level)) {
+  if (llama.load_model(model, n_ctx, n_batch, n_gpu_layers, n_log_level)) {
     map_init_id(retval, id, CLASS_ID_LLAMA);
     v_create_callback(retval, "add_stop", cmd_llama_add_stop);
     v_create_callback(retval, "add_message", cmd_llama_add_message);
diff --git a/llama/rag.hpp b/llama/rag.hpp
new file mode 100644
index 0000000..3e133da
--- /dev/null
+++ b/llama/rag.hpp
@@ -0,0 +1,328 @@
+/*
+ * rag.hpp
+ *
+ * Single-header C++ RAG runtime library.
+ * No fixed limits on chunk count, chunk length, or embedding dimension.
+ * Includes RagSession for deduplication across turns.
+ *
+ * Usage (in ONE .cpp file):
+ *   #define RAG_IMPLEMENTATION
+ *   #include "rag.hpp"
+ *
+ * All other files:
+ *   #include "rag.hpp"
+ *
+ * Db format version: 2  (written by rag_index.cpp)
+ */
+
+#pragma once
+
+#include "llama.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <fstream>
+#include <iostream>
+#include <numeric>
+#include <sstream>
+#include <string>
+#include <vector>
+
+/* ═══════════════════════════════════════════════════════════
+ * Data structures
+ * ═══════════════════════════════════════════════════════════ */
+
+struct RagChunk {
+  std::string        text;
+  std::string        source;
+  std::string        type;
+  std::vector<float> embedding;
+};
+
+struct RagDB {
+  std::vector<RagChunk> chunks;
+  int                   embed_dim = 0;
+
+  int  size()  const { return (int)chunks.size(); }
+  bool empty() const { return chunks.empty(); }
+};
+
+/* ── per-session deduplication + token budget ──────────────── */
+struct RagSession {
+  std::vector<bool> seen;        /* sized to db.size() on init  */
+  int  tokens_used  = 0;
+  int  tokens_max   = 0;         /* set to your n_ctx           */
+  float score_threshold = 0.60f; /* skip weak matches           */
+
+  void init(int n_chunks, int ctx_size) {
+    seen.assign(n_chunks, false);
+    tokens_used = 0;
+    tokens_max  = ctx_size;
+  }
+
+  void reset() {
+    std::fill(seen.begin(), seen.end(), false);
+    tokens_used = 0;
+  }
+
+  bool is_seen(int idx)  const { return idx < (int)seen.size() && seen[idx]; }
+  void mark(int idx)           { if (idx < (int)seen.size()) seen[idx] = true; }
+
+  /* rough token estimate: 1 token ≈ 4 chars */
+  bool budget_ok(const std::string &text) const {
+    return tokens_max == 0 ||
+           (tokens_used + (int)text.size() / 4) < (int)(tokens_max * 0.85f);
+  }
+
+  void charge(const std::string &text) {
+    tokens_used += (int)text.size() / 4;
+  }
+};
+
+/* ═══════════════════════════════════════════════════════════
+ * API declarations
+ * ═══════════════════════════════════════════════════════════ */
+
+/* Load .db file produced by rag_index.cpp (version 2).
+ * Returns true on success. */
+bool rag_load(RagDB &db, const std::string &path);
+
+/*
+ * Embed query, score all chunks, inject top_k unseen results into out.
+ * Skips chunks already in session.seen and below session.score_threshold.
+ * Updates session.seen and session.tokens_used.
+ * Returns the context string (empty if nothing retrieved).
+ */
+std::string rag_retrieve(const RagDB      &db,
+                         llama_context    *embed_ctx,
+                         llama_model      *embed_model,
+                         const std::string &query,
+                         int               top_k,
+                         RagSession       &session);
+
+/*
+ * Stateless overload — no deduplication, no budget tracking.
+ * Useful for one-shot queries or testing retrieval quality.
+ */
+std::string rag_retrieve(const RagDB      &db,
+                         llama_context    *embed_ctx,
+                         llama_model      *embed_model,
+                         const std::string &query,
+                         int               top_k);
+
+/* ═══════════════════════════════════════════════════════════
+ * Implementation
+ * ═══════════════════════════════════════════════════════════ */
+#ifdef RAG_IMPLEMENTATION
+
+/* ── db load ───────────────────────────────────────────────── */
+bool rag_load(RagDB &db, const std::string &path) {
+  std::ifstream f(path, std::ios::binary);
+  if (!f) { std::cerr << "rag_load: cannot open " << path << "\n"; return false; }
+
+  auto read32 = [&]() -> uint32_t {
+    uint32_t v = 0; f.read((char*)&v, 4); return v;
+  };
+  auto read16 = [&]() -> uint16_t {
+    uint16_t v = 0; f.read((char*)&v, 2); return v;
+  };
+  auto read8 = [&]() -> uint8_t {
+    uint8_t v = 0; f.read((char*)&v, 1); return v;
+  };
+  auto readstr = [&](size_t len) -> std::string {
+    std::string s(len, '\0');
+    f.read(&s[0], (std::streamsize)len);
+    return s;
+  };
+
+  uint32_t magic   = read32();
+  uint32_t version = read32();
+  uint32_t n       = read32();
+  uint32_t edim    = read32();
+
+  if (magic != 0x52414744) {
+    std::cerr << "rag_load: bad magic\n"; return false;
+  }
+  if (version != 2) {
+    std::cerr << "rag_load: unsupported version " << version
+              << " (expected 2)\n"; return false;
+  }
+
+  db.embed_dim = (int)edim;
+  db.chunks.resize(n);
+
+  for (uint32_t i = 0; i < n; i++) {
+    RagChunk &c = db.chunks[i];
+
+    uint32_t text_len = read32();
+    c.text = readstr(text_len);
+
+    uint16_t src_len = read16();
+    c.source = readstr(src_len);
+
+    uint8_t type_len = read8();
+    c.type = readstr(type_len);
+
+    c.embedding.resize(edim);
+    f.read((char*)c.embedding.data(),
+           (std::streamsize)(edim * sizeof(float)));
+  }
+
+  if (!f) { std::cerr << "rag_load: read error\n"; return false; }
+
+  std::cerr << "rag: loaded " << db.chunks.size()
+            << " chunks (dim=" << db.embed_dim
+            << ") from " << path << "\n";
+  return true;
+}
+
+/* ── embed query ───────────────────────────────────────────── */
+static bool rag_embed_query(llama_context     *ctx,
+                            llama_model       *model,
+                            const std::string &query,
+                            std::vector<float> &out,
+                            int                embed_dim) {
+  std::string prefixed = "search_query: " + query;
+
+  int n = -llama_tokenize(model,
+                          prefixed.c_str(), (int)prefixed.size(),
+                          nullptr, 0, true, true);
+  if (n <= 0) return false;
+
+  std::vector<llama_token> tokens(n);
+  llama_tokenize(model,
+                 prefixed.c_str(), (int)prefixed.size(),
+                 tokens.data(), n, true, true);
+
+  int n_ctx = llama_n_ctx(ctx);
+  if (n > n_ctx) n = n_ctx;
+
+  llama_kv_cache_clear(ctx);
+
+  llama_batch batch = llama_batch_init(n, 0, 1);
+  for (int i = 0; i < n; i++) {
+    llama_seq_id seq = 0;
+    llama_batch_add(batch, tokens[i], i, &seq, 1, i == n - 1);
+  }
+
+  if (llama_decode(ctx, batch) != 0) {
+    llama_batch_free(batch);
+    return false;
+  }
+
+  float *emb = llama_get_embeddings_seq(ctx, 0);
+  if (!emb) emb = llama_get_embeddings_ith(ctx, n - 1);
+  if (!emb) { llama_batch_free(batch); return false; }
+
+  out.assign(emb, emb + embed_dim);
+
+  float norm = 0.0f;
+  for (float v : out) norm += v * v;
+  norm = std::sqrt(norm);
+  if (norm > 1e-9f)
+    for (float &v : out) v /= norm;
+
+  llama_batch_free(batch);
+  return true;
+}
+
+/* ── cosine similarity (vectors already L2-normalized) ─────── */
+static float rag_cosine(const std::vector<float> &a,
+                        const std::vector<float> &b) {
+  float dot = 0.0f;
+  size_t n = std::min(a.size(), b.size());
+  for (size_t i = 0; i < n; i++) dot += a[i] * b[i];
+  return dot;
+}
+
+/* ── build context string from ranked results ──────────────── */
+static std::string rag_build_context(
+    const RagDB              &db,
+    const std::vector<int>   &indices,
+    const std::vector<float> &scores) {
+  std::ostringstream out;
+  for (size_t i = 0; i < indices.size(); i++) {
+    const RagChunk &c = db.chunks[indices[i]];
+    out << "// source: " << c.source
+        << " [" << c.type << "]"
+        << " (score: " << scores[i] << ")\n"
+        << c.text << "\n---\n";
+  }
+  return out.str();
+}
+
+/* ── retrieve with session ─────────────────────────────────── */
+std::string rag_retrieve(const RagDB      &db,
+                         llama_context    *embed_ctx,
+                         llama_model      *embed_model,
+                         const std::string &query,
+                         int               top_k,
+                         RagSession       &session) {
+  if (db.empty()) return {};
+
+  std::vector<float> qvec;
+  if (!rag_embed_query(embed_ctx, embed_model, query, qvec, db.embed_dim))
+    return {};
+
+  /* score all chunks */
+  std::vector<int>   order(db.size());
+  std::iota(order.begin(), order.end(), 0);
+  std::vector<float> scores(db.size());
+  for (int i = 0; i < db.size(); i++)
+    scores[i] = rag_cosine(qvec, db.chunks[i].embedding);
+
+  std::sort(order.begin(), order.end(),
+            [&](int a, int b){ return scores[a] > scores[b]; });
+
+  /* collect top_k unseen, within budget, above threshold */
+  std::vector<int>   result_idx;
+  std::vector<float> result_scores;
+
+  for (int idx : order) {
+    if ((int)result_idx.size() >= top_k) break;
+    if (session.is_seen(idx))            continue;
+    if (scores[idx] < session.score_threshold) break; /* sorted, so stop */
+    if (!session.budget_ok(db.chunks[idx].text)) break;
+
+    result_idx.push_back(idx);
+    result_scores.push_back(scores[idx]);
+    session.mark(idx);
+    session.charge(db.chunks[idx].text);
+  }
+
+  return rag_build_context(db, result_idx, result_scores);
+}
+
+/* ── stateless retrieve ────────────────────────────────────── */
+std::string rag_retrieve(const RagDB      &db,
+                         llama_context    *embed_ctx,
+                         llama_model      *embed_model,
+                         const std::string &query,
+                         int               top_k) {
+  if (db.empty()) return {};
+
+  std::vector<float> qvec;
+  if (!rag_embed_query(embed_ctx, embed_model, query, qvec, db.embed_dim))
+    return {};
+
+  std::vector<int>   order(db.size());
+  std::iota(order.begin(), order.end(), 0);
+  std::vector<float> scores(db.size());
+  for (int i = 0; i < db.size(); i++)
+    scores[i] = rag_cosine(qvec, db.chunks[i].embedding);
+
+  std::sort(order.begin(), order.end(),
+            [&](int a, int b){ return scores[a] > scores[b]; });
+
+  std::vector<int>   result_idx;
+  std::vector<float> result_scores;
+  for (int i = 0; i < std::min(top_k, db.size()); i++) {
+    result_idx.push_back(order[i]);
+    result_scores.push_back(scores[order[i]]);
+  }
+
+  return rag_build_context(db, result_idx, result_scores);
+}
+
+#endif /* RAG_IMPLEMENTATION */
diff --git a/llama/rag_index.cpp b/llama/rag_index.cpp
new file mode 100644
index 0000000..8365cb2
--- /dev/null
+++ b/llama/rag_index.cpp
@@ -0,0 +1,200 @@
+/*
+ * rag_index.cpp
+ *
+ * Reads chunks.jsonl produced by chunk_headers, embeds each chunk
+ * using a GGUF embedding model via llama.h, saves a binary .db file.
+ *
+ * No fixed limits on chunk count or chunk length.
+ *
+ * Build:
+ *   c++ -std=c++17 -o rag_index rag_index.cpp -lllama -lm
+ *
+ * Usage:
+ *   ./rag_index \
+ *     --model  nomic-embed-text-v1.5.Q4_K_M.gguf \
+ *     --input  chunks.jsonl \
+ *     --output notcurses.db
+ */
+
+#include "llama-sb.h"
+
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+#include <fstream>
+#include <iostream>
+#include <sstream>
+#include <stdexcept>
+#include <string>
+#include <vector>
+
+/* ── tunables ──────────────────────────────────────────────── */
+static constexpr int BATCH_SIZE = 512;
+/* ─────────────────────────────────────────────────────────── */
+
+/* ── on-disk chunk (variable-length text) ──────────────────── */
+/*
+ * db header  (16 bytes):
+ *   uint32  magic      = 0x52414744  "RAGD"
+ *   uint32  version    = 2
+ *   uint32  n_chunks
+ *   uint32  embed_dim
+ *
+ * per chunk:
+ *   uint32  text_len
+ *   char[]  text          (text_len bytes, no null)
+ *   uint16  source_len
+ *   char[]  source        (source_len bytes, no null)
+ *   uint8   type_len
+ *   char[]  type          (type_len bytes, no null)
+ *   float[] embedding     (embed_dim floats)
+ */
+
+struct Chunk {
+  std::string         text;
+  std::string         source;
+  std::string         type;
+  std::vector<float>  embedding;
+};
+
+/* ── tiny JSON string extractor ────────────────────────────── */
+static bool json_get_string(const std::string &json,
+                            const std::string &key,
+                            std::string       &out) {
+  std::string search = "\"" + key + "\":";
+  size_t pos = json.find(search);
+  if (pos == std::string::npos) return false;
+  pos += search.size();
+  while (pos < json.size() && json[pos] == ' ') ++pos;
+  if (pos >= json.size() || json[pos] != '"') return false;
+  ++pos; /* skip opening quote */
+  out.clear();
+  while (pos < json.size()) {
+    char c = json[pos++];
+    if (c == '\\' && pos < json.size()) {
+      char e = json[pos++];
+      switch (e) {
+      case 'n':  out += '\n'; break;
+      case 't':  out += '\t'; break;
+      case '"':  out += '"';  break;
+      case '\\': out += '\\'; break;
+      default:   out += e;    break;
+      }
+    } else if (c == '"') {
+      break;
+    } else {
+      out += c;
+    }
+  }
+  return true;
+}
+
+/* ── db save ───────────────────────────────────────────────── */
+static bool save_db(const std::string        &path,
+                    const std::vector<Chunk> &chunks,
+                    int                       embed_dim) {
+  std::ofstream f(path, std::ios::binary);
+  if (!f) { std::cerr << "cannot open for write: " << path << "\n"; return false; }
+
+  auto write32 = [&](uint32_t v) { f.write((char*)&v, 4); };
+  auto write16 = [&](uint16_t v) { f.write((char*)&v, 2); };
+  auto write8  = [&](uint8_t  v) { f.write((char*)&v, 1); };
+  auto writestr = [&](const std::string &s, size_t max_len) {
+    size_t len = std::min(s.size(), max_len);
+    f.write(s.c_str(), (std::streamsize)len);
+  };
+
+  write32(0x52414744);              /* magic "RAGD" */
+  write32(2);                       /* version      */
+  write32((uint32_t)chunks.size()); /* n_chunks     */
+  write32((uint32_t)embed_dim);     /* embed_dim    */
+
+  for (const Chunk &c : chunks) {
+    write32((uint32_t)c.text.size());
+    f.write(c.text.c_str(), (std::streamsize)c.text.size());
+
+    uint16_t src_len = (uint16_t)std::min(c.source.size(), (size_t)65535);
+    write16(src_len);
+    writestr(c.source, src_len);
+
+    uint8_t type_len = (uint8_t)std::min(c.type.size(), (size_t)255);
+    write8(type_len);
+    writestr(c.type, type_len);
+
+    f.write((char*)c.embedding.data(),
+            (std::streamsize)(embed_dim * sizeof(float)));
+  }
+
+  return f.good();
+}
+
+/* ── main ──────────────────────────────────────────────────── */
+int main(int argc, char **argv) {
+  std::string model_path;
+  std::string input_path;
+  std::string output_path = "corpus.db";
+
+  for (int i = 1; i < argc; i++) {
+    if (!strcmp(argv[i], "--model")  && i+1 < argc) model_path  = argv[++i];
+    if (!strcmp(argv[i], "--input")  && i+1 < argc) input_path  = argv[++i];
+    if (!strcmp(argv[i], "--output") && i+1 < argc) output_path = argv[++i];
+  }
+
+  if (model_path.empty() || input_path.empty()) {
+    std::cerr << "usage: rag_index --model <embed.gguf> "
+      "--input <chunks.jsonl> [--output <db>]\n";
+    return 1;
+  }
+
+  /* ── load embedding model ─────────────────────────────── */
+
+  Llama llama;
+  if (!llama.load_embedding_model(model_path)) {
+    return 1;
+  }
+
+  int embed_dim = llama.get_embed_dim();
+  std::cerr << "embedding dim: " << embed_dim << "\n";
+
+  /* ── read and embed chunks ────────────────────────────── */
+  std::vector<Chunk> chunks;
+  std::ifstream fin(input_path);
+  if (!fin) { std::cerr << "cannot open: " << input_path << "\n"; return 1; }
+
+  std::string line;
+  int skipped = 0;
+
+  while (std::getline(fin, line)) {
+    if (line.empty() || line[0] != '{') continue;
+
+    Chunk c;
+    if (!json_get_string(line, "text",   c.text)   ||
+        !json_get_string(line, "source", c.source)) {
+      ++skipped;
+      continue;
+    }
+    json_get_string(line, "type", c.type);
+
+    std::cerr << "\r[" << chunks.size() << "] embedding: "
+              << c.text.substr(0, 40) << "...";
+
+    if (!llama.embed_text(c.text, c.embedding, embed_dim)) {
+      ++skipped;
+      continue;
+    }
+
+    chunks.push_back(std::move(c));
+  }
+  std::cerr << "\n";
+  std::cerr << "embedded " << chunks.size()
+            << " chunks (" << skipped << " skipped)\n";
+
+  /* ── save ─────────────────────────────────────────────── */
+  if (!save_db(output_path, chunks, embed_dim)) {
+    std::cerr << "failed to save db\n";
+    return 1;
+  }
+  std::cerr << "saved → " << output_path << "\n";
+
+  return 0;
+}
diff --git a/llama/samples/adventure.bas b/llama/samples/adventure.bas
index eb88fea..23252f1 100644
--- a/llama/samples/adventure.bas
+++ b/llama/samples/adventure.bas
@@ -3,7 +3,7 @@ import llm
 ' Configuration
 const n_ctx = 5000
 const n_batch = 512
-const model_path = "models/Qwen_Qwen2.5-1.5B-Instruct-GGUF-Q4/qwen2.5-1.5b-instruct-q4_k_m.gguf"
+const model_path = "models/google_gemma-4-E4B-it-Q4_K_L.gguf"
 const max_turns = 10
 
 ' Initialize two separate LLM instances
diff --git a/llama/samples/nitro.md b/llama/samples/nitro.md
index cae4896..85effcb 100644
--- a/llama/samples/nitro.md
+++ b/llama/samples/nitro.md
@@ -58,6 +58,7 @@ Available commands:
 - TOOL:DATE  `[Returns the current date as string with format “DD/MM/YYYY”]`
 - TOOL:TIME  `[Returns the time in “HH:MM:SS” format]`
 - TOOL:RND   [Returns a random number betweem 0 and 1]`
+- TOOL:RUN   [invokes an external command or script in the active project]`
 ---
 
 ## Tool Decision Rules
diff --git a/llama/samples/nitro_cli.bas b/llama/samples/nitro_cli.bas
index 3450b42..28ab4c3 100644
--- a/llama/samples/nitro_cli.bas
+++ b/llama/samples/nitro_cli.bas
@@ -8,7 +8,7 @@ import llm
 ' --- Configuration ---
 const model = "models/Qwen3.5-9B-Q4_K_M.gguf"
 const knowledge_files = ["nitro.md"]
-const code_files = [".py", ".cpp", ".h", ".bas", ".java", ".html", ".js", "jsp", ".tag"]
+const code_files = [".py", ".c", ".cpp", ".h", ".bas", ".java", ".html", ".js", "jsp", ".tag"]
 
 ' ANSI Color Codes
 const RESET = chr(27) + "[0m"
@@ -150,6 +150,17 @@ func tool_permission()
   end select
 end
 
+'
+' build the active project
+'
+func tool_run(arg1, arg2)
+  try
+    return run(join_path(sandbox_home, arg1) + " " + arg2)
+  catch e
+    return e
+  end try
+end
+
 '
 ' Handles file system commands received from the LLM.
 '
@@ -191,6 +202,8 @@ func process_tool(cmd)
     result = iff(exist(arg1), "YES", "NO")
   case "TOOL:PERMISSION"
     result = tool_permission()
+  case "TOOL:RUN"
+    result = tool_run(arg1, arg2)
   case else
     result = "ERROR: unknown command " + op
   end select
@@ -255,6 +268,9 @@ sub main()
   ' note: this construct requires recent sbasic fixes
   local llama = create_llama()
   local iter = llama.add_message("system", initialize_agent())
+  local mem = llama.mem_info()
+
+  print GREEN + "  ✅ " + mem.advice + RESET
 
   sub handle_think(s)
     if s == "<|think|>" then
@@ -321,4 +337,3 @@ endif
 welcome_message()
 main()
 
-
diff --git a/llama/test_main.cpp b/llama/test_main.cpp
index fa87c43..3ab572d 100644
--- a/llama/test_main.cpp
+++ b/llama/test_main.cpp
@@ -56,7 +56,7 @@ int main(int argc, char ** argv) {
   }
 
   Llama llama;
-  if (llama.construct(model_path, 1024, 1024, -1, GGML_LOG_LEVEL_CONT)) {
+  if (llama.load_model(model_path, 1024, 1024, -1, GGML_LOG_LEVEL_CONT)) {
     LlamaIter iter;
     llama.set_max_tokens(n_predict);
     llama.add_message(iter, "user", prompt);

From 891a6f46bcf29ffa6040e5a315cbf7a556c1140f Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Mon, 18 May 2026 21:33:57 +0930
Subject: [PATCH 31/54] LLAMA: RAG experiment to increase domain knowledge of a
 particular lib

---
 llama/CMakeLists.txt    |  12 ++
 llama/RAG.md            |  10 +-
 llama/chunk_headers.cpp |   1 +
 llama/llama-sb-rag.cpp  | 257 +++++++++++++++++++++++++++++++
 llama/llama-sb.cpp      |  56 +------
 llama/llama-sb.h        |  10 +-
 llama/llama.cpp         |   2 +-
 llama/rag.hpp           | 328 ----------------------------------------
 llama/rag_index.cpp     |  12 +-
 9 files changed, 293 insertions(+), 395 deletions(-)
 create mode 100644 llama/llama-sb-rag.cpp
 delete mode 100644 llama/rag.hpp

diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
index 629a269..749b55e 100644
--- a/llama/CMakeLists.txt
+++ b/llama/CMakeLists.txt
@@ -113,6 +113,7 @@ add_subdirectory(${LLAMA_DIR})
 set(PLUGIN_SOURCES
   main.cpp
   llama-sb.cpp
+  llama-sb-rag.cpp
   ../include/param.cpp
   ../include/hashmap.cpp
   ../include/apiexec.cpp
@@ -193,6 +194,17 @@ set_target_properties(rag_index PROPERTIES
   RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
 )
 
+# -----------------------------
+# Header preparation for RAG indexer
+# -----------------------------
+add_executable(chunk_headers
+  chunk_headers.cpp
+)
+
+set_target_properties(chunk_headers PROPERTIES
+  RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
+)
+
 # ------------------------------------------------------------------
 # Android native library
 # ------------------------------------------------------------------
diff --git a/llama/RAG.md b/llama/RAG.md
index ee5e8e7..6c69c06 100644
--- a/llama/RAG.md
+++ b/llama/RAG.md
@@ -21,7 +21,7 @@ notcurses headers
 chunk_headers        ← semantic chunker, outputs chunks.jsonl
       │
       ▼
-rag_index            ← embeds each chunk via nomic-embed-text GGUF
+rag_index            ← embeds each chunk via qwen3-embedding-0.6b-q4_k_m.gguf
       │
       ▼
 notcurses.db         ← binary vector store (embeddings + text)
@@ -64,9 +64,7 @@ history              ← appended for next turn (KV cache intact)
 
 - [llama.cpp](https://github.com/ggerganov/llama.cpp) — `libllama` + `llama.h`
 - A GGUF **inference model** — tested with `Qwen3.5-9B-Q4_K_M.gguf`
-- A GGUF **embedding model** —
-  `nomic-embed-text-v1.5.Q4_K_M.gguf`
-  ([download](https://huggingface.co/nomic-ai/nomic-embed-text-v1.5-GGUF))
+- A GGUF **embedding model** — `qwen3-embedding-0.6b-q4_k_m.gguf`
 - C++17 compiler (gcc 8+, clang 7+, MSVC 2019+)
 
 ---
@@ -114,7 +112,7 @@ head -5 chunks.jsonl | python3 -m json.tool
 
 ```bash
 ./rag_index \
-  --model nomic-embed-text-v1.5.Q4_K_M.gguf \
+  --model qwen3-embedding-0.6b-q4_k_m.gguf \
   --input chunks.jsonl \
   --output notcurses.db
 ```
@@ -127,7 +125,7 @@ until the library changes.
 ```bash
 ./example \
   --model Qwen3.5-9B-Q4_K_M.gguf \
-  --embed nomic-embed-text-v1.5.Q4_K_M.gguf \
+  --embed qwen3-embedding-0.6b-q4_k_m.gguf \
   --db    notcurses.db
 ```
 
diff --git a/llama/chunk_headers.cpp b/llama/chunk_headers.cpp
index 51ecd66..0d8eff0 100644
--- a/llama/chunk_headers.cpp
+++ b/llama/chunk_headers.cpp
@@ -20,6 +20,7 @@
 #include <sstream>
 #include <string>
 #include <vector>
+#include <algorithm>
 #include <filesystem>
 
 namespace fs = std::filesystem;
diff --git a/llama/llama-sb-rag.cpp b/llama/llama-sb-rag.cpp
new file mode 100644
index 0000000..77c08b3
--- /dev/null
+++ b/llama/llama-sb-rag.cpp
@@ -0,0 +1,257 @@
+// This file is part of SmallBASIC
+//
+// This program is distributed under the terms of the GPL v2.0 or later
+// Download the GNU Public License (GPL) from www.gnu.org
+//
+// Copyright(C) 2026 Chris Warren-Smith
+
+#include "llama-sb.h"
+
+#include <algorithm>
+#include <cmath>
+#include <cstdint>
+#include <fstream>
+#include <iostream>
+#include <numeric>
+#include <sstream>
+#include <string>
+#include <vector>
+
+struct RagChunk {
+  std::string        text;
+  std::string        source;
+  std::string        type;
+  std::vector<float> embedding;
+};
+
+struct RagDB {
+  std::vector<RagChunk> chunks;
+  int embed_dim = 0;
+
+  int  size()  const { return (int)chunks.size(); }
+  bool empty() const { return chunks.empty(); }
+};
+
+//
+// per-session deduplication + token budget
+//
+struct RagSession {
+  std::vector<bool> seen;        /* sized to db.size() on init  */
+  int  tokens_used  = 0;
+  int  tokens_max   = 0;         /* set to your n_ctx           */
+  float score_threshold = 0.60f; /* skip weak matches           */
+
+  void init(int n_chunks, int ctx_size) {
+    seen.assign(n_chunks, false);
+    tokens_used = 0;
+    tokens_max  = ctx_size;
+  }
+
+  void reset() {
+    std::fill(seen.begin(), seen.end(), false);
+    tokens_used = 0;
+  }
+
+  bool is_seen(int idx)  const { return idx < (int)seen.size() && seen[idx]; }
+  void mark(int idx)           { if (idx < (int)seen.size()) seen[idx] = true; }
+
+  /* rough token estimate: 1 token ≈ 4 chars */
+  bool budget_ok(const std::string &text) const {
+    return tokens_max == 0 ||
+           (tokens_used + (int)text.size() / 4) < (int)(tokens_max * 0.85f);
+  }
+
+  void charge(const std::string &text) {
+    tokens_used += (int)text.size() / 4;
+  }
+};
+
+bool Llama::embed_text(const std::string &text, std::vector<float> &out, int embed_dim) {
+  vector<llama_token> tokens = tokenize(text);
+  if (tokens.size() == 0) {
+    return false;
+  }
+
+  // truncate to context window
+  int n_ctx = llama_n_ctx(_ctx);
+  int n = tokens.size();
+  if (n > n_ctx) {
+    _last_error = std::format("warning: chunk truncated {} -> {} tokens ", n, n_ctx);
+    n = n_ctx;
+    tokens.resize(n);
+  }
+
+  llama_memory_clear(llama_get_memory(_ctx), true);
+
+  if (!batch_decode_tokens(tokens)) {
+    return false;
+  }
+
+  float *emb = llama_get_embeddings_seq(_ctx, 0);
+  if (!emb) {
+    emb = llama_get_embeddings_ith(_ctx, n - 1);
+  }
+
+  if (!emb) {
+    _last_error = "no embedding returned\n";
+    return false;
+  }
+
+  out.assign(emb, emb + embed_dim);
+
+  /* L2 normalize */
+  float norm = 0.0f;
+  for (float v : out) {
+    norm += v * v;
+  }
+  norm = std::sqrt(norm);
+  if (norm > 1e-9f) {
+    for (float &v : out) {
+      v /= norm;
+    }
+  }
+
+  return true;
+}
+
+bool Llama::rag_load(RagDB &db, const std::string &path) {
+  std::ifstream f(path, std::ios::binary);
+  if (!f) {
+    _last_error = std::format("rag_load: cannot open {}", path);
+    return false;
+  }
+
+  auto read32 = [&]() -> uint32_t {
+    uint32_t v = 0; f.read((char*)&v, 4); return v;
+  };
+  auto read16 = [&]() -> uint16_t {
+    uint16_t v = 0; f.read((char*)&v, 2); return v;
+  };
+  auto read8 = [&]() -> uint8_t {
+    uint8_t v = 0; f.read((char*)&v, 1); return v;
+  };
+  auto readstr = [&](size_t len) -> std::string {
+    std::string s(len, '\0');
+    f.read(&s[0], (std::streamsize)len);
+    return s;
+  };
+
+  uint32_t magic   = read32();
+  uint32_t version = read32();
+  uint32_t n       = read32();
+  uint32_t edim    = read32();
+
+  if (magic != 0x52414744) {
+    _last_error = "rag_load: bad magic";
+    return false;
+  }
+  if (version != 2) {
+    _last_error = std::format("rag_load: unsupported version {} (expected 2)", version);
+    return false;
+  }
+
+  db.embed_dim = (int)edim;
+  db.chunks.resize(n);
+
+  for (uint32_t i = 0; i < n; i++) {
+    RagChunk &c = db.chunks[i];
+
+    uint32_t text_len = read32();
+    c.text = readstr(text_len);
+
+    uint16_t src_len = read16();
+    c.source = readstr(src_len);
+
+    uint8_t type_len = read8();
+    c.type = readstr(type_len);
+
+    c.embedding.resize(edim);
+    f.read((char*)c.embedding.data(), (std::streamsize)(edim * sizeof(float)));
+  }
+
+  if (!f) {
+    _last_error = "rag_load: read error";
+    return false;
+  }
+
+  std::cerr << "rag: loaded " << db.chunks.size()
+            << " chunks (dim=" << db.embed_dim
+            << ") from " << path << "\n";
+  return true;
+}
+
+//
+// cosine similarity (vectors already L2-normalized)
+//
+static float rag_cosine(const std::vector<float> &a,
+                        const std::vector<float> &b) {
+  float dot = 0.0f;
+  size_t n = std::min(a.size(), b.size());
+  for (size_t i = 0; i < n; i++) {
+    dot += a[i] * b[i];
+  }
+  return dot;
+}
+
+//
+// build context string from ranked results
+//
+static std::string rag_build_context(const RagDB &db,
+                                     const std::vector<int> &indices,
+                                     const std::vector<float> &scores) {
+  std::ostringstream out;
+  for (size_t i = 0; i < indices.size(); i++) {
+    const RagChunk &c = db.chunks[indices[i]];
+    out << "// source: " << c.source
+        << " [" << c.type << "]"
+        << " (score: " << scores[i] << ")\n"
+        << c.text << "\n---\n";
+  }
+  return out.str();
+}
+
+//
+// retrieve with session
+//
+std::string Llama::rag_retrieve(const RagDB &db,
+                                const std::string &query,
+                                int top_k,
+                                RagSession &session) {
+  if (db.empty()) {
+    return {};
+  }
+
+  std::vector<float> qvec;
+  std::string text = "Instruct: Given a programming question, retrieve relevant API documentation\nQuery: " + query;
+  if (!embed_text(text, qvec, db.embed_dim)) {
+    return {};
+  }
+
+  // score all chunks
+  std::vector<int>   order(db.size());
+  std::iota(order.begin(), order.end(), 0);
+  std::vector<float> scores(db.size());
+  for (int i = 0; i < db.size(); i++)
+    scores[i] = rag_cosine(qvec, db.chunks[i].embedding);
+
+  std::sort(order.begin(), order.end(),
+            [&](int a, int b){ return scores[a] > scores[b]; });
+
+  // collect top_k unseen, within budget, above threshold
+  std::vector<int>   result_idx;
+  std::vector<float> result_scores;
+
+  for (int idx : order) {
+    if ((int)result_idx.size() >= top_k) break;
+    if (session.is_seen(idx))            continue;
+    if (scores[idx] < session.score_threshold) break; /* sorted, so stop */
+    if (!session.budget_ok(db.chunks[idx].text)) break;
+
+    result_idx.push_back(idx);
+    result_scores.push_back(scores[idx]);
+    session.mark(idx);
+    session.charge(db.chunks[idx].text);
+  }
+
+  return rag_build_context(db, result_idx, result_scores);
+}
diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index 05463fb..9e3b54a 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -203,66 +203,14 @@ bool Llama::load_embedding_model(string model_path) {
     _ctx = llama_init_from_model(_model, cparams);
     if (!_ctx) {
       set_last_error("Create context");
+    } else {
+      _vocab = llama_model_get_vocab(_model);
     }
   }
 
   return _last_error.empty();
 }
 
-int Llama::get_embed_dim() {
-  return _model != nullptr ? llama_model_n_embd(_model) : 0;
-}
-
-bool Llama::embed_text(const std::string &text, std::vector<float> &out, int embed_dim) {
-  std::string prefixed = "search_document: " + text;
-
-  vector<llama_token> tokens = tokenize(prefixed);
-  if (tokens.size() == 0) {
-    return false;
-  }
-
-  // truncate to context window
-  int n_ctx = llama_n_ctx(_ctx);
-  int n = tokens.size();
-  if (n > n_ctx) {
-    _last_error = std::format("warning: chunk truncated {} -> {} tokens ", n, n_ctx);
-    n = n_ctx;
-    tokens.resize(n);
-  }
-
-  llama_memory_clear(llama_get_memory(_ctx), true);
-
-  if (!batch_decode_tokens(tokens)) {
-    return false;
-  }
-
-  float *emb = llama_get_embeddings_seq(_ctx, 0);
-  if (!emb) {
-    emb = llama_get_embeddings_ith(_ctx, n - 1);
-  }
-
-  if (!emb) {
-    _last_error = "no embedding returned\n";
-    return false;
-  }
-
-  out.assign(emb, emb + embed_dim);
-
-  /* L2 normalize */
-  float norm = 0.0f;
-  for (float v : out) {
-    norm += v * v;
-  }
-  norm = std::sqrt(norm);
-  if (norm > 1e-9f) {
-    for (float &v : out) {
-      v /= norm;
-    }
-  }
-
-  return true;
-}
-
 void Llama::set_grammar(const string &src, const string &root) {
   _grammar_src = src;
   _grammar_root = root;
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
index 99a3864..0ca3998 100644
--- a/llama/llama-sb.h
+++ b/llama/llama-sb.h
@@ -14,6 +14,10 @@
 
 using namespace std;
 
+struct Llama;
+struct RagDB;
+struct RagSession;
+
 struct LlamaMemoryInfo {
   // KV cache
   int     kv_used;        // slots currently used
@@ -34,8 +38,6 @@ struct LlamaMemoryInfo {
   string  advice;
 };
 
-struct Llama;
-
 struct LlamaIter {
   explicit LlamaIter();
   ~LlamaIter() {}
@@ -101,7 +103,9 @@ struct Llama {
 
   // rag support
   bool embed_text(const std::string &text, std::vector<float> &out, int embed);
-  int get_embed_dim();
+  int get_embed_dim() const { return _model != nullptr ? llama_model_n_embd(_model) : 0; }
+  bool rag_load(RagDB &db, const std::string &path);
+  std::string rag_retrieve(const RagDB &db, const std::string &query, int top_k, RagSession &session);
 
   private:
   bool batch_decode_tokens(vector<llama_token> &tokens);
diff --git a/llama/llama.cpp b/llama/llama.cpp
index 7f3f843..3fbadb0 160000
--- a/llama/llama.cpp
+++ b/llama/llama.cpp
@@ -1 +1 @@
-Subproject commit 7f3f843c31cd32dc4adc10b393342dfee071c332
+Subproject commit 3fbadb06dc867d3236937705477e090724ebbc6e
diff --git a/llama/rag.hpp b/llama/rag.hpp
deleted file mode 100644
index 3e133da..0000000
--- a/llama/rag.hpp
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * rag.hpp
- *
- * Single-header C++ RAG runtime library.
- * No fixed limits on chunk count, chunk length, or embedding dimension.
- * Includes RagSession for deduplication across turns.
- *
- * Usage (in ONE .cpp file):
- *   #define RAG_IMPLEMENTATION
- *   #include "rag.hpp"
- *
- * All other files:
- *   #include "rag.hpp"
- *
- * Db format version: 2  (written by rag_index.cpp)
- */
-
-#pragma once
-
-#include "llama.h"
-
-#include <algorithm>
-#include <cmath>
-#include <cstdint>
-#include <fstream>
-#include <iostream>
-#include <numeric>
-#include <sstream>
-#include <string>
-#include <vector>
-
-/* ═══════════════════════════════════════════════════════════
- * Data structures
- * ═══════════════════════════════════════════════════════════ */
-
-struct RagChunk {
-  std::string        text;
-  std::string        source;
-  std::string        type;
-  std::vector<float> embedding;
-};
-
-struct RagDB {
-  std::vector<RagChunk> chunks;
-  int                   embed_dim = 0;
-
-  int  size()  const { return (int)chunks.size(); }
-  bool empty() const { return chunks.empty(); }
-};
-
-/* ── per-session deduplication + token budget ──────────────── */
-struct RagSession {
-  std::vector<bool> seen;        /* sized to db.size() on init  */
-  int  tokens_used  = 0;
-  int  tokens_max   = 0;         /* set to your n_ctx           */
-  float score_threshold = 0.60f; /* skip weak matches           */
-
-  void init(int n_chunks, int ctx_size) {
-    seen.assign(n_chunks, false);
-    tokens_used = 0;
-    tokens_max  = ctx_size;
-  }
-
-  void reset() {
-    std::fill(seen.begin(), seen.end(), false);
-    tokens_used = 0;
-  }
-
-  bool is_seen(int idx)  const { return idx < (int)seen.size() && seen[idx]; }
-  void mark(int idx)           { if (idx < (int)seen.size()) seen[idx] = true; }
-
-  /* rough token estimate: 1 token ≈ 4 chars */
-  bool budget_ok(const std::string &text) const {
-    return tokens_max == 0 ||
-           (tokens_used + (int)text.size() / 4) < (int)(tokens_max * 0.85f);
-  }
-
-  void charge(const std::string &text) {
-    tokens_used += (int)text.size() / 4;
-  }
-};
-
-/* ═══════════════════════════════════════════════════════════
- * API declarations
- * ═══════════════════════════════════════════════════════════ */
-
-/* Load .db file produced by rag_index.cpp (version 2).
- * Returns true on success. */
-bool rag_load(RagDB &db, const std::string &path);
-
-/*
- * Embed query, score all chunks, inject top_k unseen results into out.
- * Skips chunks already in session.seen and below session.score_threshold.
- * Updates session.seen and session.tokens_used.
- * Returns the context string (empty if nothing retrieved).
- */
-std::string rag_retrieve(const RagDB      &db,
-                         llama_context    *embed_ctx,
-                         llama_model      *embed_model,
-                         const std::string &query,
-                         int               top_k,
-                         RagSession       &session);
-
-/*
- * Stateless overload — no deduplication, no budget tracking.
- * Useful for one-shot queries or testing retrieval quality.
- */
-std::string rag_retrieve(const RagDB      &db,
-                         llama_context    *embed_ctx,
-                         llama_model      *embed_model,
-                         const std::string &query,
-                         int               top_k);
-
-/* ═══════════════════════════════════════════════════════════
- * Implementation
- * ═══════════════════════════════════════════════════════════ */
-#ifdef RAG_IMPLEMENTATION
-
-/* ── db load ───────────────────────────────────────────────── */
-bool rag_load(RagDB &db, const std::string &path) {
-  std::ifstream f(path, std::ios::binary);
-  if (!f) { std::cerr << "rag_load: cannot open " << path << "\n"; return false; }
-
-  auto read32 = [&]() -> uint32_t {
-    uint32_t v = 0; f.read((char*)&v, 4); return v;
-  };
-  auto read16 = [&]() -> uint16_t {
-    uint16_t v = 0; f.read((char*)&v, 2); return v;
-  };
-  auto read8 = [&]() -> uint8_t {
-    uint8_t v = 0; f.read((char*)&v, 1); return v;
-  };
-  auto readstr = [&](size_t len) -> std::string {
-    std::string s(len, '\0');
-    f.read(&s[0], (std::streamsize)len);
-    return s;
-  };
-
-  uint32_t magic   = read32();
-  uint32_t version = read32();
-  uint32_t n       = read32();
-  uint32_t edim    = read32();
-
-  if (magic != 0x52414744) {
-    std::cerr << "rag_load: bad magic\n"; return false;
-  }
-  if (version != 2) {
-    std::cerr << "rag_load: unsupported version " << version
-              << " (expected 2)\n"; return false;
-  }
-
-  db.embed_dim = (int)edim;
-  db.chunks.resize(n);
-
-  for (uint32_t i = 0; i < n; i++) {
-    RagChunk &c = db.chunks[i];
-
-    uint32_t text_len = read32();
-    c.text = readstr(text_len);
-
-    uint16_t src_len = read16();
-    c.source = readstr(src_len);
-
-    uint8_t type_len = read8();
-    c.type = readstr(type_len);
-
-    c.embedding.resize(edim);
-    f.read((char*)c.embedding.data(),
-           (std::streamsize)(edim * sizeof(float)));
-  }
-
-  if (!f) { std::cerr << "rag_load: read error\n"; return false; }
-
-  std::cerr << "rag: loaded " << db.chunks.size()
-            << " chunks (dim=" << db.embed_dim
-            << ") from " << path << "\n";
-  return true;
-}
-
-/* ── embed query ───────────────────────────────────────────── */
-static bool rag_embed_query(llama_context     *ctx,
-                            llama_model       *model,
-                            const std::string &query,
-                            std::vector<float> &out,
-                            int                embed_dim) {
-  std::string prefixed = "search_query: " + query;
-
-  int n = -llama_tokenize(model,
-                          prefixed.c_str(), (int)prefixed.size(),
-                          nullptr, 0, true, true);
-  if (n <= 0) return false;
-
-  std::vector<llama_token> tokens(n);
-  llama_tokenize(model,
-                 prefixed.c_str(), (int)prefixed.size(),
-                 tokens.data(), n, true, true);
-
-  int n_ctx = llama_n_ctx(ctx);
-  if (n > n_ctx) n = n_ctx;
-
-  llama_kv_cache_clear(ctx);
-
-  llama_batch batch = llama_batch_init(n, 0, 1);
-  for (int i = 0; i < n; i++) {
-    llama_seq_id seq = 0;
-    llama_batch_add(batch, tokens[i], i, &seq, 1, i == n - 1);
-  }
-
-  if (llama_decode(ctx, batch) != 0) {
-    llama_batch_free(batch);
-    return false;
-  }
-
-  float *emb = llama_get_embeddings_seq(ctx, 0);
-  if (!emb) emb = llama_get_embeddings_ith(ctx, n - 1);
-  if (!emb) { llama_batch_free(batch); return false; }
-
-  out.assign(emb, emb + embed_dim);
-
-  float norm = 0.0f;
-  for (float v : out) norm += v * v;
-  norm = std::sqrt(norm);
-  if (norm > 1e-9f)
-    for (float &v : out) v /= norm;
-
-  llama_batch_free(batch);
-  return true;
-}
-
-/* ── cosine similarity (vectors already L2-normalized) ─────── */
-static float rag_cosine(const std::vector<float> &a,
-                        const std::vector<float> &b) {
-  float dot = 0.0f;
-  size_t n = std::min(a.size(), b.size());
-  for (size_t i = 0; i < n; i++) dot += a[i] * b[i];
-  return dot;
-}
-
-/* ── build context string from ranked results ──────────────── */
-static std::string rag_build_context(
-    const RagDB              &db,
-    const std::vector<int>   &indices,
-    const std::vector<float> &scores) {
-  std::ostringstream out;
-  for (size_t i = 0; i < indices.size(); i++) {
-    const RagChunk &c = db.chunks[indices[i]];
-    out << "// source: " << c.source
-        << " [" << c.type << "]"
-        << " (score: " << scores[i] << ")\n"
-        << c.text << "\n---\n";
-  }
-  return out.str();
-}
-
-/* ── retrieve with session ─────────────────────────────────── */
-std::string rag_retrieve(const RagDB      &db,
-                         llama_context    *embed_ctx,
-                         llama_model      *embed_model,
-                         const std::string &query,
-                         int               top_k,
-                         RagSession       &session) {
-  if (db.empty()) return {};
-
-  std::vector<float> qvec;
-  if (!rag_embed_query(embed_ctx, embed_model, query, qvec, db.embed_dim))
-    return {};
-
-  /* score all chunks */
-  std::vector<int>   order(db.size());
-  std::iota(order.begin(), order.end(), 0);
-  std::vector<float> scores(db.size());
-  for (int i = 0; i < db.size(); i++)
-    scores[i] = rag_cosine(qvec, db.chunks[i].embedding);
-
-  std::sort(order.begin(), order.end(),
-            [&](int a, int b){ return scores[a] > scores[b]; });
-
-  /* collect top_k unseen, within budget, above threshold */
-  std::vector<int>   result_idx;
-  std::vector<float> result_scores;
-
-  for (int idx : order) {
-    if ((int)result_idx.size() >= top_k) break;
-    if (session.is_seen(idx))            continue;
-    if (scores[idx] < session.score_threshold) break; /* sorted, so stop */
-    if (!session.budget_ok(db.chunks[idx].text)) break;
-
-    result_idx.push_back(idx);
-    result_scores.push_back(scores[idx]);
-    session.mark(idx);
-    session.charge(db.chunks[idx].text);
-  }
-
-  return rag_build_context(db, result_idx, result_scores);
-}
-
-/* ── stateless retrieve ────────────────────────────────────── */
-std::string rag_retrieve(const RagDB      &db,
-                         llama_context    *embed_ctx,
-                         llama_model      *embed_model,
-                         const std::string &query,
-                         int               top_k) {
-  if (db.empty()) return {};
-
-  std::vector<float> qvec;
-  if (!rag_embed_query(embed_ctx, embed_model, query, qvec, db.embed_dim))
-    return {};
-
-  std::vector<int>   order(db.size());
-  std::iota(order.begin(), order.end(), 0);
-  std::vector<float> scores(db.size());
-  for (int i = 0; i < db.size(); i++)
-    scores[i] = rag_cosine(qvec, db.chunks[i].embedding);
-
-  std::sort(order.begin(), order.end(),
-            [&](int a, int b){ return scores[a] > scores[b]; });
-
-  std::vector<int>   result_idx;
-  std::vector<float> result_scores;
-  for (int i = 0; i < std::min(top_k, db.size()); i++) {
-    result_idx.push_back(order[i]);
-    result_scores.push_back(scores[order[i]]);
-  }
-
-  return rag_build_context(db, result_idx, result_scores);
-}
-
-#endif /* RAG_IMPLEMENTATION */
diff --git a/llama/rag_index.cpp b/llama/rag_index.cpp
index 8365cb2..9f8a4fe 100644
--- a/llama/rag_index.cpp
+++ b/llama/rag_index.cpp
@@ -159,13 +159,18 @@ int main(int argc, char **argv) {
   /* ── read and embed chunks ────────────────────────────── */
   std::vector<Chunk> chunks;
   std::ifstream fin(input_path);
-  if (!fin) { std::cerr << "cannot open: " << input_path << "\n"; return 1; }
+  if (!fin) {
+    std::cerr << "cannot open: " << input_path << "\n";
+    return 1;
+  }
 
   std::string line;
   int skipped = 0;
 
   while (std::getline(fin, line)) {
-    if (line.empty() || line[0] != '{') continue;
+    if (line.empty() || line[0] != '{') {
+      continue;
+    }
 
     Chunk c;
     if (!json_get_string(line, "text",   c.text)   ||
@@ -178,7 +183,8 @@ int main(int argc, char **argv) {
     std::cerr << "\r[" << chunks.size() << "] embedding: "
               << c.text.substr(0, 40) << "...";
 
-    if (!llama.embed_text(c.text, c.embedding, embed_dim)) {
+    std::string text = "Instruct: Represent this API documentation for code retrieval\nQuery: " + c.text;
+    if (!llama.embed_text(text, c.embedding, embed_dim)) {
       ++skipped;
       continue;
     }

From 702026e20a4de90d26288c99ec1c2ea2d482ccfe Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Wed, 20 May 2026 19:55:59 +0930
Subject: [PATCH 32/54] simple agent using llama-sb.h

---
 llama/nitro.cpp | 1212 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 1212 insertions(+)
 create mode 100644 llama/nitro.cpp

diff --git a/llama/nitro.cpp b/llama/nitro.cpp
new file mode 100644
index 0000000..6057ff2
--- /dev/null
+++ b/llama/nitro.cpp
@@ -0,0 +1,1212 @@
+// nitro.cpp — Nitro Agent
+// A standalone agentic LLM shell with notcurses TUI.
+// Uses llama-sb.h as the sole llama.cpp integration layer.
+//
+// Build (example):
+//   g++ -std=c++20 -O2 nitro.cpp llama-sb.cpp \
+//       -I/path/to/llama.cpp/include \
+//       -L/path/to/llama.cpp/build/src \
+//       -lllama -lggml -lnotcurses-core -lnotcurses \
+//       -o nitro
+//
+// Usage:
+//   ./nitro [options] [project_dir]
+//
+// Options:
+//   -m, --model  <path>       GGUF model to load on startup
+//   -e, --embed  <path>       embedding model for RAG
+//   -g, --gpu-layers <n>      layers to offload to GPU (default: 32)
+//
+// Slash commands:
+//   /model  <path>            — load / hot-reload a GGUF model
+//   /embed  <path>            — load an embedding model for RAG
+//   /rag    <path>            — index a file or directory into RAG
+//   /memory                   — show KV / VRAM / layer stats
+//   /clear                    — reset conversation (keeps system prompt)
+//   /help                     — list commands
+//
+// Tool protocol (LLM emits, Nitro executes):
+//   TOOL:LIST   [dir]
+//   TOOL:READ   <file>
+//   TOOL:WRITE  <file> <content>
+//   TOOL:EXISTS <file>
+//   TOOL:RUN    <program> [args]
+//   TOOL:DATE
+//   TOOL:TIME
+//   TOOL:RND
+//   TOOL:PERMISSION
+//
+// Copyright (C) 2026 Chris Warren-Smith  —  GPLv2 or later
+
+// ─── Standard library ────────────────────────────────────────────────────────
+#include <algorithm>
+#include <chrono>
+#include <ctime>
+#include <filesystem>
+#include <fstream>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <vector>
+
+// ─── Integration layer (sole llama.cpp dependency for nitro) ─────────────────
+#include "llama-sb.h"
+#include "llama-sb-rag.h"
+
+// ─── TUI ─────────────────────────────────────────────────────────────────────
+#include <notcurses/notcurses.h>
+
+namespace fs = std::filesystem;
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Forward declarations
+// ═══════════════════════════════════════════════════════════════════════════
+struct NitroConfig;
+struct TuiState;
+struct AgentState;
+
+static std::string  join_path(const std::string &a, const std::string &b);
+static std::string  read_file(const std::string &path);
+static bool         write_file(const std::string &path, const std::string &data);
+static std::string  list_dir(const std::string &path);
+static bool         path_in_sandbox(const std::string &sandbox, const std::string &path);
+static std::string  strip_code_fences(const std::string &filename, const std::string &src);
+static std::string  process_tool(const std::string &line, const std::string &sandbox,
+                                  TuiState &tui);
+static std::string  build_system_prompt(const std::vector<std::string> &knowledge_files,
+                                         const std::string &sandbox);
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Config  (mirrors the SB agent constants)
+// ═══════════════════════════════════════════════════════════════════════════
+
+struct NitroConfig {
+  std::string model_path;   // empty = no model yet; set via -m/--model or /model
+  std::string embed_path;
+  std::string sandbox;
+  int   n_ctx          = 65536;
+  int   n_batch        = 512;
+  int   n_gpu_layers   = 32;
+  int   n_max_tokens   = 4096;
+  int   log_level      = GGML_LOG_LEVEL_CONT;
+  float temperature    = 0.6f;
+  float top_p          = 0.95f;
+  float min_p          = 0.0f;
+  int   top_k          = 20;
+  float penalty_repeat = 1.0f;
+  int   penalty_last_n = 256;
+  std::vector<std::string> knowledge_files;
+  int   rag_top_k      = 5;
+};
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Notcurses TUI
+// ═══════════════════════════════════════════════════════════════════════════
+//
+//  ┌──────────────────── header (1 row) ─────────────────────────────────┐
+//  │ ✦ NITRO  model: …  tok/s: …  KV: …%  VRAM: …%                       │
+//  ├─────────────────────────────────────────────────────────────────────┤
+//  │                                                                     │
+//  │  chat pane  (rows 1 … term_rows-3)                                  │
+//  │                                                                     │
+//  ├─────────────────────────────────────────────────────────────────────┤
+//  │ ─────────────────────────────────────  (separator)                  │
+//  │ ❯ input                                                             │
+//  └─────────────────────────────────────────────────────────────────────┘
+
+struct TuiState {
+  // ── notcurses handles ──────────────────────────────────────────────
+  struct notcurses *nc      = nullptr;
+  struct ncplane   *stdpl   = nullptr;
+  struct ncplane   *header  = nullptr;
+  struct ncplane   *chatpl  = nullptr;
+  struct ncplane   *inputpl = nullptr;
+
+  // ── chat buffer ───────────────────────────────────────────────────
+  std::vector<std::string> chat_lines;
+  int scroll_offset = 0;       // lines scrolled up from bottom (0 = pinned)
+  std::mutex lines_mutex;
+
+  // ── streaming accumulator ─────────────────────────────────────────
+  // Tokens arrive without newlines; we accumulate here and flush on \n.
+  std::string token_acc;
+
+  // ── input ─────────────────────────────────────────────────────────
+  std::string input_buf;
+  size_t      cursor_pos = 0;
+
+  // ── status bar values (written by agent loop) ─────────────────────
+  std::string current_model  = "none";
+  float       tokens_per_sec = 0.0f;
+  int         kv_used        = 0;
+  int         kv_total       = 1;
+  size_t      vram_used      = 0;
+  size_t      vram_total     = 1;
+
+  int term_rows = 0;
+  int term_cols = 0;
+
+  // ── lifecycle ─────────────────────────────────────────────────────
+  void init();
+  void destroy();
+  void resize();
+
+  // ── draw ──────────────────────────────────────────────────────────
+  void redraw_header();
+  void redraw_chat();
+  void redraw_input();
+  void redraw_all();
+
+  // ── content helpers ───────────────────────────────────────────────
+  // Append a complete line (wraps at terminal width, colour-coded by prefix).
+  void append_line(const std::string &line);
+  // Feed a streaming token fragment; flushes complete lines on \n.
+  void append_token(const std::string &token);
+  // Flush whatever is left in token_acc as a final line.
+  void flush_token_acc();
+
+  // ── interaction ───────────────────────────────────────────────────
+  // Show a YES/NO confirm dialog in the input plane; writes "YES" or "NO".
+  void confirm_dialog(const std::string &prompt, std::string &result);
+  // Blocking readline with cursor, arrow-key scrolling, basic editing.
+  std::string readline_blocking();
+};
+
+// ─── colour helpers ──────────────────────────────────────────────────────
+
+static inline uint64_t fg_rgb(uint32_t r, uint32_t g, uint32_t b) {
+  return NCCHANNELS_INITIALIZER(r, g, b, 0, 0, 0);
+}
+
+// ─── TuiState::init ──────────────────────────────────────────────────────
+
+void TuiState::init() {
+  notcurses_options opts{};
+  opts.flags = NCOPTION_SUPPRESS_BANNERS;
+  nc = notcurses_init(&opts, nullptr);
+  if (!nc) { std::fputs("notcurses_init failed\n", stderr); std::exit(1); }
+
+  stdpl = notcurses_stdplane(nc);
+  notcurses_term_dim_yx(nc, (unsigned *)&term_rows, (unsigned *)&term_cols);
+
+  // Header: row 0
+  ncplane_options hopt{};
+  hopt.y = 0; hopt.x = 0;
+  hopt.rows = 1; hopt.cols = (unsigned)term_cols;
+  header = ncplane_create(stdpl, &hopt);
+
+  // Chat pane: rows 1 … term_rows-3
+  int chat_rows = std::max(1, term_rows - 3);
+  ncplane_options copt{};
+  copt.y = 1; copt.x = 0;
+  copt.rows = (unsigned)chat_rows; copt.cols = (unsigned)term_cols;
+  chatpl = ncplane_create(stdpl, &copt);
+
+  // Input pane: last 2 rows
+  ncplane_options iopt{};
+  iopt.y = term_rows - 2; iopt.x = 0;
+  iopt.rows = 2; iopt.cols = (unsigned)term_cols;
+  inputpl = ncplane_create(stdpl, &iopt);
+
+  redraw_all();
+}
+
+void TuiState::destroy() {
+  if (nc) { notcurses_stop(nc); nc = nullptr; }
+}
+
+void TuiState::resize() {
+  notcurses_term_dim_yx(nc, (unsigned *)&term_rows, (unsigned *)&term_cols);
+  ncplane_resize_simple(header,  1,                       (unsigned)term_cols);
+  int cr = std::max(1, term_rows - 3);
+  ncplane_resize_simple(chatpl,  (unsigned)cr,            (unsigned)term_cols);
+  ncplane_move_yx(inputpl, term_rows - 2, 0);
+  ncplane_resize_simple(inputpl, 2,                       (unsigned)term_cols);
+  redraw_all();
+}
+
+// ─── TuiState::redraw_* ──────────────────────────────────────────────────
+
+void TuiState::redraw_header() {
+  ncplane_erase(header);
+  ncplane_set_base(header, " ", 0, fg_rgb(30, 40, 55));
+
+  float kv_pct   = kv_total   > 0 ? 100.f * (float)kv_used   / (float)kv_total   : 0.f;
+  float vram_pct = vram_total  > 0 ? 100.f * (float)vram_used / (float)vram_total : 0.f;
+
+  char buf[512];
+  int n = std::snprintf(buf, sizeof(buf),
+    " ✦ NITRO  │ %-32s │ %5.1f tok/s │ KV %4.1f%%  VRAM %4.1f%%",
+    current_model.c_str(), (double)tokens_per_sec,
+    (double)kv_pct, (double)vram_pct);
+  if (n > term_cols) buf[term_cols] = '\0';
+
+  ncplane_set_channels(header, fg_rgb(130, 220, 200));
+  ncplane_putstr_yx(header, 0, 0, buf);
+}
+
+void TuiState::redraw_chat() {
+  ncplane_erase(chatpl);
+  unsigned rows, cols;
+  ncplane_dim_yx(chatpl, &rows, &cols);
+
+  std::lock_guard<std::mutex> lk(lines_mutex);
+  int total   = (int)chat_lines.size();
+  int visible = (int)rows;
+  int start   = std::max(0, total - visible - scroll_offset);
+  int end     = std::min(total, start + visible);
+
+  for (int i = start, row = 0; i < end; ++i, ++row) {
+    const std::string &line = chat_lines[i];
+
+    uint64_t ch;
+    if      (line.rfind("You: ",   0) == 0) ch = fg_rgb(100, 200, 255);
+    else if (line.rfind("Nitro: ", 0) == 0) ch = fg_rgb(180, 255, 180);
+    else if (line.rfind("[tool]",  0) == 0) ch = fg_rgb(255, 180,  80);
+    else if (line.rfind("[err]",   0) == 0) ch = fg_rgb(255,  80,  80);
+    else if (line.rfind("[sys]",   0) == 0) ch = fg_rgb(140, 140, 200);
+    else                                     ch = fg_rgb(210, 210, 210);
+
+    ncplane_set_channels(chatpl, ch);
+    std::string display = line.size() > cols ? line.substr(0, cols) : line;
+    ncplane_putstr_yx(chatpl, row, 0, display.c_str());
+  }
+}
+
+void TuiState::redraw_input() {
+  ncplane_erase(inputpl);
+
+  // Separator
+  ncplane_set_channels(inputpl, fg_rgb(80, 120, 160));
+  std::string sep(term_cols, '-');
+  ncplane_putstr_yx(inputpl, 0, 0, sep.c_str());
+
+  // Prompt + buffer
+  const std::string prompt = " ❯ ";
+  ncplane_set_channels(inputpl, fg_rgb(230, 230, 230));
+  ncplane_putstr_yx(inputpl, 1, 0, prompt.c_str());
+
+  int max_w = std::max(0, term_cols - (int)prompt.size() - 1);
+  std::string display = input_buf;
+  if ((int)display.size() > max_w && max_w > 0)
+    display = display.substr(display.size() - max_w);
+  ncplane_putstr_yx(inputpl, 1, (int)prompt.size(), display.c_str());
+
+  // Cursor position
+  int cx = std::min((int)prompt.size() + (int)cursor_pos, term_cols - 1);
+  ncplane_cursor_move_yx(inputpl, 1, cx);
+}
+
+void TuiState::redraw_all() {
+  redraw_header();
+  redraw_chat();
+  redraw_input();
+  notcurses_render(nc);
+}
+
+// ─── TuiState content helpers ─────────────────────────────────────────────
+
+void TuiState::append_line(const std::string &line) {
+  std::lock_guard<std::mutex> lk(lines_mutex);
+  int w = std::max(1, term_cols - 1);
+  if ((int)line.size() <= w) {
+    chat_lines.push_back(line);
+  } else {
+    for (int off = 0; off < (int)line.size(); off += w)
+      chat_lines.push_back(line.substr(off, w));
+  }
+}
+
+void TuiState::append_token(const std::string &token) {
+  token_acc += token;
+  for (;;) {
+    auto pos = token_acc.find('\n');
+    if (pos == std::string::npos) break;
+    append_line(token_acc.substr(0, pos));
+    token_acc = token_acc.substr(pos + 1);
+  }
+  redraw_chat();
+  notcurses_render(nc);
+}
+
+void TuiState::flush_token_acc() {
+  if (!token_acc.empty()) {
+    append_line(token_acc);
+    token_acc.clear();
+    redraw_chat();
+    notcurses_render(nc);
+  }
+}
+
+// ─── TuiState::confirm_dialog ─────────────────────────────────────────────
+
+void TuiState::confirm_dialog(const std::string &prompt, std::string &result) {
+  ncplane_erase(inputpl);
+  ncplane_set_channels(inputpl, fg_rgb(255, 200, 80));
+  std::string msg = " " + prompt + " [y/n] ❯ ";
+  ncplane_putstr_yx(inputpl, 1, 0, msg.c_str());
+  notcurses_render(nc);
+
+  std::string answer;
+  for (;;) {
+    ncinput ni{};
+    notcurses_get_blocking(nc, &ni);
+    if (ni.id == NCKEY_ENTER || ni.id == '\r' || ni.id == '\n') break;
+    if (ni.id == NCKEY_BACKSPACE && !answer.empty()) { answer.pop_back(); }
+    else if (ni.id >= 32 && ni.id < 127) { answer += (char)ni.id; }
+
+    ncplane_erase(inputpl);
+    ncplane_set_channels(inputpl, fg_rgb(255, 200, 80));
+    ncplane_putstr_yx(inputpl, 1, 0, (msg + answer).c_str());
+    notcurses_render(nc);
+  }
+
+  std::string lo = answer;
+  std::transform(lo.begin(), lo.end(), lo.begin(), ::tolower);
+  result = (lo == "y" || lo == "yes" || lo == "sure" || lo == "k") ? "YES" : "NO";
+  redraw_input();
+  notcurses_render(nc);
+}
+
+// ─── TuiState::readline_blocking ──────────────────────────────────────────
+
+std::string TuiState::readline_blocking() {
+  input_buf.clear();
+  cursor_pos = 0;
+  redraw_input();
+  notcurses_render(nc);
+
+  for (;;) {
+    ncinput ni{};
+    notcurses_get_blocking(nc, &ni);
+
+    if (ni.id == NCKEY_ENTER || ni.id == '\r' || ni.id == '\n') {
+      std::string result = input_buf;
+      input_buf.clear(); cursor_pos = 0;
+      redraw_input(); notcurses_render(nc);
+      return result;
+    }
+    if (ni.id == NCKEY_BACKSPACE || ni.id == 127) {
+      if (cursor_pos > 0) { input_buf.erase(cursor_pos - 1, 1); --cursor_pos; }
+    } else if (ni.id == NCKEY_LEFT) {
+      if (cursor_pos > 0) --cursor_pos;
+    } else if (ni.id == NCKEY_RIGHT) {
+      if (cursor_pos < input_buf.size()) ++cursor_pos;
+    } else if (ni.id == NCKEY_HOME) {
+      cursor_pos = 0;
+    } else if (ni.id == NCKEY_END) {
+      cursor_pos = input_buf.size();
+    } else if (ni.id == NCKEY_UP) {
+      ++scroll_offset; redraw_chat(); notcurses_render(nc); continue;
+    } else if (ni.id == NCKEY_DOWN) {
+      if (scroll_offset > 0) --scroll_offset;
+      redraw_chat(); notcurses_render(nc); continue;
+    } else if (ni.id >= 32 && ni.id < 0xD800) {
+      input_buf.insert(cursor_pos, 1, (char)ni.id);
+      ++cursor_pos;
+    }
+    redraw_input();
+    notcurses_render(nc);
+  }
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// AgentState  —  thin owner of Llama + LlamaIter + optional RAG objects.
+//
+// Design:
+//   • Llama is created once; settings are applied before load_model().
+//   • iter is an std::optional so we can construct it lazily (LlamaIter has
+//     no default-construct-then-assign path once _has_next is false; we just
+//     move a fresh one in via add_message()).
+//   • reset_conversation() calls llama.reset() which clears the KV cache,
+//     then re-injects the system prompt as the first message of the new turn.
+//   • run_turn() mirrors the SB main() loop exactly:
+//       while iter.has_next → next() → accumulate line → on TOOL: dispatch
+// ═══════════════════════════════════════════════════════════════════════════
+
+struct AgentState {
+  Llama llama;
+
+  // iter is valid (has_next may be false) after the first add_message call.
+  // We use a pointer so it can be replaced by move.
+  std::unique_ptr<LlamaIter> iter;
+
+  // Separate Llama instance for embeddings (optional)
+  std::unique_ptr<Llama> embed_llama;
+
+  // RAG objects
+  std::unique_ptr<RagDB>      rag_db;
+  std::unique_ptr<RagSession> rag_session;
+
+  bool model_loaded = false;
+  std::string system_prompt;
+
+  // ── setup ─────────────────────────────────────────────────────────
+  bool setup_model(const NitroConfig &cfg, TuiState &tui);
+  bool setup_embed(const std::string &path, TuiState &tui);
+  void apply_generation_params(const NitroConfig &cfg);
+
+  // ── conversation management ───────────────────────────────────────
+  // Injects the system prompt as a fresh first turn.
+  // Call after setup_model() or whenever /clear is issued.
+  void reset_conversation(const std::string &sysprompt, TuiState &tui);
+
+  // ── generation ────────────────────────────────────────────────────
+  // Returns false on fatal error.
+  bool run_turn(const std::string &user_message,
+                const NitroConfig &cfg,
+                TuiState          &tui);
+
+  // ── RAG ───────────────────────────────────────────────────────────
+  bool rag_index(const std::string &path, TuiState &tui);
+
+  // ── status ────────────────────────────────────────────────────────
+  std::string memory_info_text();
+
+  // Compute tok/s from iter (matches SB iter.tokens_sec() idiom)
+  float tokens_per_sec() const;
+};
+
+// ─── AgentState::setup_model ──────────────────────────────────────────────
+
+void AgentState::apply_generation_params(const NitroConfig &cfg) {
+  llama.add_stop("<|turn|>");
+  llama.add_stop("<|im_end|>");
+  llama.set_max_tokens(cfg.n_max_tokens);
+  llama.set_temperature(cfg.temperature);
+  llama.set_top_k(cfg.top_k);
+  llama.set_top_p(cfg.top_p);
+  llama.set_min_p(cfg.min_p);
+  llama.set_penalty_repeat(cfg.penalty_repeat);
+  llama.set_penalty_last_n(cfg.penalty_last_n);
+  llama.set_log_level(cfg.log_level);
+}
+
+bool AgentState::setup_model(const NitroConfig &cfg, TuiState &tui) {
+  if (cfg.model_path.empty()) {
+    tui.append_line("[sys] No model loaded.  Use /model <path> to load a GGUF.");
+    tui.redraw_all();
+    return false;
+  }
+
+  // reset() clears any previous KV state cleanly
+  llama.reset();
+  apply_generation_params(cfg);
+
+  if (!llama.load_model(cfg.model_path, cfg.n_ctx, cfg.n_batch,
+                         cfg.n_gpu_layers, cfg.log_level)) {
+    tui.append_line(std::string("[err] ") + llama.last_error());
+    tui.redraw_all();
+    return false;
+  }
+
+  model_loaded = true;
+  tui.current_model = fs::path(cfg.model_path).filename().string();
+  tui.append_line("[sys] Model ready: " + tui.current_model);
+
+  // Show memory advice (mirrors SB: print GREEN + mem.advice)
+  LlamaMemoryInfo mem = llama.memory_info();
+  tui.append_line("[sys] " + mem.advice);
+  tui.kv_used  = mem.kv_used;
+  tui.kv_total = mem.kv_total;
+  tui.vram_used  = mem.vram_used;
+  tui.vram_total = mem.vram_total;
+  tui.redraw_all();
+  return true;
+}
+
+bool AgentState::setup_embed(const std::string &path, TuiState &tui) {
+  tui.append_line("[sys] Loading embedding model: " + path);
+  tui.redraw_all();
+  embed_llama = std::make_unique<Llama>();
+  if (!embed_llama->load_embedding_model(path)) {
+    tui.append_line(std::string("[err] ") + embed_llama->last_error());
+    tui.redraw_all();
+    embed_llama.reset();
+    return false;
+  }
+  rag_db      = std::make_unique<RagDB>();
+  rag_session = std::make_unique<RagSession>();
+  tui.append_line("[sys] Embedding model ready.");
+  tui.redraw_all();
+  return true;
+}
+
+// ─── AgentState::reset_conversation ──────────────────────────────────────
+// Mirrors the SB pattern:
+//   local iter = llama.add_message("system", initialize_agent())
+
+void AgentState::reset_conversation(const std::string &sysprompt, TuiState &tui) {
+  system_prompt = sysprompt;
+  llama.reset();                    // clears KV cache + sampler state
+  apply_generation_params(NitroConfig{}); // re-apply stops / params after reset
+
+  iter = std::make_unique<LlamaIter>();
+  if (!llama.add_message(*iter, "system", system_prompt)) {
+    tui.append_line(std::string("[err] System prompt injection: ") + llama.last_error());
+    tui.redraw_all();
+  }
+}
+
+// ─── AgentState::tokens_per_sec ──────────────────────────────────────────
+// LlamaIter stores _t_start and _tokens_generated; we replicate the SB
+// iter.tokens_sec() calculation here since LlamaIter doesn't expose it
+// as a method in the public header.
+
+float AgentState::tokens_per_sec() const {
+  if (!iter) return 0.0f;
+  auto now = std::chrono::high_resolution_clock::now();
+  double elapsed = std::chrono::duration<double>(now - iter->_t_start).count();
+  if (elapsed <= 0.0 || iter->_tokens_generated <= 0) return 0.0f;
+  return (float)(iter->_tokens_generated / elapsed);
+}
+
+// ─── AgentState::memory_info_text ────────────────────────────────────────
+
+std::string AgentState::memory_info_text() {
+  if (!model_loaded) return "No model loaded.";
+  LlamaMemoryInfo m = llama.memory_info();
+  std::ostringstream oss;
+  oss << "KV cache  : " << m.kv_used << " / " << m.kv_total
+      << "  (" << m.kv_percent << "%)\n";
+  if (m.vram_total > 0) {
+    oss << "VRAM      : " << (m.vram_used >> 20) << " MB / "
+        << (m.vram_total >> 20) << " MB  (" << m.vram_percent << "%)\n";
+  }
+  oss << "GPU layers: " << m.n_layers_gpu << " / " << m.n_layers_total << "\n";
+  oss << "CPU layers: " << m.n_layers_cpu << "\n";
+  oss << "Advice    : " << m.advice << "\n";
+  return oss.str();
+}
+
+// ─── AgentState::rag_index ───────────────────────────────────────────────
+
+bool AgentState::rag_index(const std::string &path, TuiState &tui) {
+  if (!embed_llama || !rag_db) {
+    tui.append_line("[err] Load an embedding model first: /embed <path>");
+    tui.redraw_all();
+    return false;
+  }
+
+  auto index_one = [&](const std::string &filepath) {
+    tui.append_line("[sys]   indexing: " + filepath);
+    tui.redraw_all();
+    if (!embed_llama->rag_load(*rag_db, filepath)) {
+      tui.append_line(std::string("[err] rag_load: ") + embed_llama->last_error());
+      tui.redraw_all();
+    }
+  };
+
+  fs::path rp(path);
+  std::error_code ec;
+  if (fs::is_directory(rp, ec)) {
+    for (const auto &entry : fs::recursive_directory_iterator(rp, ec)) {
+      if (entry.is_regular_file()) index_one(entry.path().string());
+    }
+  } else {
+    index_one(path);
+  }
+  return true;
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Agent turn — mirrors SB main() loop
+//
+// SB pattern (condensed):
+//   while iter.has_next()
+//     buffer += iter.next()
+//     if newline in buffer:
+//       line = left side of buffer
+//       if TOOL: → line += buffer + iter.all()
+//                  iter = llama.add_message("tool", process_tool(line))
+//       else print line
+//   if remaining buffer is TOOL: → process it
+//   else flush remaining buffer
+// ═══════════════════════════════════════════════════════════════════════════
+
+bool AgentState::run_turn(const std::string &user_message,
+                           const NitroConfig &cfg,
+                           TuiState          &tui) {
+  if (!model_loaded) {
+    tui.append_line("[err] No model loaded. Use /model <path>");
+    tui.redraw_all();
+    return false;
+  }
+
+  // ── optional RAG context injection ───────────────────────────────
+  // If we have a RAG session, prepend retrieved context to the user message.
+  std::string effective_message = user_message;
+  if (embed_llama && rag_db && rag_session) {
+    std::string context = llama.rag_retrieve(*rag_db, user_message,
+                                              cfg.rag_top_k, *rag_session);
+    if (!context.empty()) {
+      effective_message = "Context:\n" + context + "\n\nUser: " + user_message;
+    }
+  }
+
+  // ── inject user message ───────────────────────────────────────────
+  // iter must already exist (reset_conversation initialises it with "system").
+  // add_message("user", …) appends to the existing KV context.
+  if (!iter) {
+    tui.append_line("[err] Conversation not initialised (call /clear to reset)");
+    tui.redraw_all();
+    return false;
+  }
+
+  if (!llama.add_message(*iter, "user", effective_message)) {
+    tui.append_line(std::string("[err] add_message: ") + llama.last_error());
+    tui.redraw_all();
+    return false;
+  }
+
+  // ── label the assistant response in the chat pane ────────────────
+  tui.append_line("Nitro: ");
+  tui.redraw_all();
+
+  // ── generation loop ───────────────────────────────────────────────
+  // Exact translation of the SB streaming / tool-dispatch loop.
+  bool in_think  = false;
+  std::string buffer;      // accumulates tokens until we see a newline
+
+  auto handle_think = [&](const std::string &line) {
+    if (line == "<|think|>")   in_think = true;
+    else if (line == "</|think|>") in_think = false;
+  };
+
+  while (iter->_has_next) {
+    std::string tok = llama.next(*iter);
+    buffer += tok;
+
+    auto nl = buffer.find('\n');
+    if (nl != std::string::npos) {
+      std::string text_line = buffer.substr(0, nl);
+      buffer = buffer.substr(nl + 1);
+
+      // Trim leading whitespace to detect TOOL: reliably
+      std::string trimmed = text_line;
+      trimmed.erase(0, trimmed.find_first_not_of(" \t"));
+
+      if (trimmed.substr(0, 5) == "TOOL:") {
+        // Collect any tail that remains in the buffer plus iter.all()
+        // — mirrors: text_line += buffer + " " + iter.all()
+        std::string tool_line = trimmed + " " + buffer + " " + llama.all(*iter);
+
+        // Strip stray newlines from the single-line tool command
+        tool_line.erase(
+          std::remove(tool_line.begin(), tool_line.end(), '\n'),
+          tool_line.end());
+
+        // Trim trailing whitespace
+        while (!tool_line.empty() && std::isspace((unsigned char)tool_line.back()))
+          tool_line.pop_back();
+
+        tui.append_line("[tool] " + tool_line);
+        tui.redraw_all();
+
+        std::string result = process_tool(tool_line, cfg.sandbox, tui);
+
+        tui.append_line("[tool] → " +
+          result.substr(0, 200) + (result.size() > 200 ? "…" : ""));
+        tui.redraw_all();
+
+        // Inject tool result and get a new iter for the continuation
+        // — mirrors: iter = llama.add_message("tool", process_tool(…))
+        if (!llama.add_message(*iter, "tool", result)) {
+          tui.append_line(std::string("[err] tool result inject: ") + llama.last_error());
+          tui.redraw_all();
+          break;
+        }
+        buffer.clear();
+
+      } else {
+        // Normal output line
+        if (!in_think) {
+          tui.append_token(text_line + "\n");
+        }
+        handle_think(text_line);
+      }
+    }
+  }
+
+  // ── flush remaining buffer (SB: "Flush remaining line buffer") ────
+  if (!buffer.empty()) {
+    std::string trimmed = buffer;
+    trimmed.erase(0, trimmed.find_first_not_of(" \t"));
+
+    if (trimmed.substr(0, 5) == "TOOL:") {
+      std::string result = process_tool(trimmed, cfg.sandbox, tui);
+      tui.append_line("[tool] → " + result.substr(0, 200));
+      tui.redraw_all();
+      llama.add_message(*iter, "tool", result);
+    } else {
+      if (!in_think) tui.append_token(buffer);
+    }
+  }
+  tui.flush_token_acc();
+
+  // ── update status bar ─────────────────────────────────────────────
+  tui.tokens_per_sec = tokens_per_sec();
+  LlamaMemoryInfo mem = llama.memory_info();
+  tui.kv_used    = mem.kv_used;
+  tui.kv_total   = mem.kv_total;
+  tui.vram_used  = mem.vram_used;
+  tui.vram_total = mem.vram_total;
+
+  // ── stat line (mirrors SB: "Tokens/sec: …") ──────────────────────
+  char stat[128];
+  std::snprintf(stat, sizeof(stat), "[sys] %.1f tok/s  (%d tokens)  KV %.1f%%",
+    (double)tui.tokens_per_sec,
+    iter->_tokens_generated,
+    (double)mem.kv_percent);
+  tui.append_line(stat);
+  tui.redraw_all();
+  return true;
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// File-system helpers
+// ═══════════════════════════════════════════════════════════════════════════
+
+static std::string join_path(const std::string &a, const std::string &b) {
+  if (b.empty()) return a;
+  if (b[0] == '/') return b;
+  std::string pa = a;
+  if (!pa.empty() && pa.back() == '/') pa.pop_back();
+  std::string pb = (b.front() == '/') ? b.substr(1) : b;
+  return pa + "/" + pb;
+}
+
+static bool path_in_sandbox(const std::string &sandbox, const std::string &path) {
+  std::error_code ec;
+  auto base   = fs::canonical(sandbox, ec);  if (ec) return false;
+  auto target = fs::weakly_canonical(path, ec);
+  std::string bstr = base.string() + "/";
+  std::string tstr = target.string();
+  return tstr == base.string() || tstr.compare(0, bstr.size(), bstr) == 0;
+}
+
+static std::string read_file(const std::string &path) {
+  std::ifstream f(path, std::ios::binary);
+  if (!f) return "ERROR: cannot open " + path;
+  std::ostringstream oss; oss << f.rdbuf();
+  return oss.str();
+}
+
+static bool write_file(const std::string &path, const std::string &data) {
+  fs::path p(path);
+  if (p.has_parent_path()) {
+    std::error_code ec;
+    fs::create_directories(p.parent_path(), ec);
+  }
+  std::ofstream f(path, std::ios::binary | std::ios::trunc);
+  if (!f) return false;
+  f.write(data.data(), (std::streamsize)data.size());
+  return f.good();
+}
+
+static std::string list_dir(const std::string &path) {
+  std::ostringstream oss;
+  std::error_code ec;
+  for (const auto &e : fs::directory_iterator(path, ec)) {
+    if (ec) break;
+    std::string name = e.path().filename().string();
+    if (name.empty() || name[0] == '.') continue;
+    oss << (e.is_directory() ? "[" + name + "]" : name) << "\n";
+  }
+  return oss.str();
+}
+
+static const std::vector<std::string> CODE_EXTENSIONS = {
+  ".py",".c",".cpp",".h",".bas",".java",".html",".js",".ts",
+  ".json",".yaml",".toml",".sh",".go",".rs",".jsx",".tsx"
+};
+
+static std::string strip_code_fences(const std::string &filename,
+                                      const std::string &src) {
+  auto ext = fs::path(filename).extension().string();
+  bool is_code = std::any_of(CODE_EXTENSIONS.begin(), CODE_EXTENSIONS.end(),
+                              [&](const std::string &e){ return ext == e; });
+  if (!is_code) return src;
+  auto pos = src.find("```");
+  if (pos == std::string::npos) return src;
+  auto nl = src.find('\n', pos + 3);
+  if (nl == std::string::npos) return src;
+  std::string inner = src.substr(nl + 1);
+  auto end = inner.rfind("```");
+  if (end != std::string::npos) inner = inner.substr(0, end);
+  return inner;
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Tool dispatch  (mirrors SB process_tool)
+// ═══════════════════════════════════════════════════════════════════════════
+
+static std::string process_tool(const std::string &cmd,
+                                  const std::string &sandbox,
+                                  TuiState &tui) {
+  // Parse:  OP [ARG1 [REST…]]
+  std::string op, arg1, arg2;
+  auto sp1 = cmd.find(' ');
+  if (sp1 == std::string::npos) {
+    op = cmd;
+  } else {
+    op = cmd.substr(0, sp1);
+    std::string rest = cmd.substr(sp1 + 1);
+    // ltrim
+    rest.erase(0, rest.find_first_not_of(" \t"));
+    auto sp2 = rest.find(' ');
+    if (sp2 == std::string::npos) {
+      arg1 = rest;
+    } else {
+      arg1 = rest.substr(0, sp2);
+      arg2 = rest.substr(sp2 + 1);
+    }
+  }
+
+  // Resolve arg1 into an absolute path inside the sandbox
+  auto resolve = [&](const std::string &p) -> std::string {
+    if (p.empty() || p == ".") return sandbox;
+    if (p.substr(0, 2) == "./") return join_path(sandbox, p.substr(2));
+    if (p[0] == '/') return p;
+    return join_path(sandbox, p);
+  };
+
+  if (op == "TOOL:DATE") {
+    char buf[32]; time_t t = time(nullptr);
+    strftime(buf, sizeof(buf), "%Y-%m-%d", localtime(&t));
+    return buf;
+  }
+  if (op == "TOOL:TIME") {
+    char buf[32]; time_t t = time(nullptr);
+    strftime(buf, sizeof(buf), "%H:%M:%S", localtime(&t));
+    return buf;
+  }
+  if (op == "TOOL:RND") {
+    return std::to_string((double)rand() / RAND_MAX);
+  }
+  if (op == "TOOL:LIST") {
+    std::string dir = resolve(arg1);
+    if (!path_in_sandbox(sandbox, dir)) return "ERROR: path outside sandbox";
+    return list_dir(dir);
+  }
+  if (op == "TOOL:EXISTS") {
+    std::string p = resolve(arg1);
+    if (!path_in_sandbox(sandbox, p)) return "NO";
+    return fs::exists(p) ? "YES" : "NO";
+  }
+  if (op == "TOOL:READ") {
+    std::string p = resolve(arg1);
+    if (!path_in_sandbox(sandbox, p)) return "ERROR: path outside sandbox";
+    return read_file(p);
+  }
+  if (op == "TOOL:WRITE") {
+    std::string p = resolve(arg1);
+    if (!path_in_sandbox(sandbox, p)) return "ERROR: path outside sandbox";
+    std::string content = strip_code_fences(arg1, arg2);
+    return write_file(p, content) ? "OK: written to " + arg1
+                                   : "ERROR: write failed for " + arg1;
+  }
+  if (op == "TOOL:PERMISSION") {
+    std::string result;
+    tui.confirm_dialog("Allow model to proceed?", result);
+    return result;
+  }
+  if (op == "TOOL:RUN") {
+    std::string prog = resolve(arg1);
+    if (!path_in_sandbox(sandbox, prog)) return "ERROR: path outside sandbox";
+    std::string command = prog + " " + arg2 + " 2>&1";
+    FILE *fp = popen(command.c_str(), "r");
+    if (!fp) return "ERROR: popen failed";
+    std::string out;
+    char buf[256];
+    while (fgets(buf, sizeof(buf), fp)) out += buf;
+    pclose(fp);
+    if (out.size() > 4096) out = out.substr(0, 4096) + "\n…(truncated)";
+    return out;
+  }
+
+  return "ERROR: unknown tool: " + op;
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// System prompt  (mirrors SB initialize_agent)
+// ═══════════════════════════════════════════════════════════════════════════
+
+static std::string build_system_prompt(const std::vector<std::string> &knowledge_files,
+                                        const std::string &sandbox) {
+  std::string p;
+  p += "You are Nitro, an agentic AI assistant for software development.\n"
+       "Your sandbox (project directory) is: " + sandbox + "\n\n"
+       "## Tool protocol\n"
+       "Emit tool calls on their own line. The host executes them and returns\n"
+       "TOOL_RESULT: <value> on the next line.\n\n"
+       "Available tools:\n"
+       "  TOOL:LIST   [dir]          list files (default: sandbox root)\n"
+       "  TOOL:READ   <file>         read file contents\n"
+       "  TOOL:WRITE  <file> <text>  write text to file\n"
+       "  TOOL:EXISTS <file>         YES or NO\n"
+       "  TOOL:RUN    <prog> [args]  run program inside sandbox\n"
+       "  TOOL:DATE                  current date\n"
+       "  TOOL:TIME                  current time\n"
+       "  TOOL:RND                   random float\n"
+       "  TOOL:PERMISSION            ask user for explicit permission\n\n"
+       "Rules:\n"
+       "- Never access files outside the sandbox.\n"
+       "- Use TOOL:PERMISSION before destructive or irreversible operations.\n"
+       "- Reason step-by-step inside <|think|>…</|think|> (hidden from user).\n"
+       "- After each tool call, explain what you did in plain English.\n\n";
+
+  for (const auto &kf : knowledge_files) {
+    std::ifstream f(kf);
+    if (!f) continue;
+    std::ostringstream oss; oss << f.rdbuf();
+    p += "## Knowledge: " + kf + "\n" + oss.str() + "\n\n";
+  }
+  return p;
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Slash command handler
+// ═══════════════════════════════════════════════════════════════════════════
+
+static void handle_slash(const std::string &input,
+                          NitroConfig       &cfg,
+                          AgentState        &agent,
+                          TuiState          &tui) {
+  auto sp = input.find(' ');
+  std::string verb = (sp == std::string::npos) ? input : input.substr(0, sp);
+  std::string rest;
+  if (sp != std::string::npos) {
+    rest = input.substr(sp + 1);
+    rest.erase(0, rest.find_first_not_of(" \t"));
+  }
+
+  if (verb == "/help") {
+    tui.append_line("[sys] Commands:");
+    tui.append_line("[sys]   /model  <path>   load a GGUF model");
+    tui.append_line("[sys]   /embed  <path>   load an embedding model for RAG");
+    tui.append_line("[sys]   /rag    <path>   index file or directory");
+    tui.append_line("[sys]   /memory          KV / VRAM / layer stats");
+    tui.append_line("[sys]   /clear           reset conversation");
+    tui.append_line("[sys]   /help            this message");
+    tui.append_line("[sys]   exit / quit      exit Nitro");
+    tui.redraw_all();
+    return;
+  }
+
+  if (verb == "/model") {
+    if (rest.empty()) {
+      tui.append_line("[err] Usage: /model <path-to-gguf>");
+      tui.redraw_all(); return;
+    }
+    cfg.model_path = rest;
+    if (agent.setup_model(cfg, tui)) {
+      std::string sysp = build_system_prompt(cfg.knowledge_files, cfg.sandbox);
+      agent.reset_conversation(sysp, tui);
+    }
+    tui.redraw_all();
+    return;
+  }
+
+  if (verb == "/embed") {
+    if (rest.empty()) {
+      tui.append_line("[err] Usage: /embed <path-to-gguf>");
+      tui.redraw_all(); return;
+    }
+    cfg.embed_path = rest;
+    agent.setup_embed(rest, tui);
+    return;
+  }
+
+  if (verb == "/rag") {
+    if (rest.empty()) {
+      tui.append_line("[err] Usage: /rag <file-or-dir>");
+      tui.redraw_all(); return;
+    }
+    agent.rag_index(rest, tui);
+    return;
+  }
+
+  if (verb == "/memory") {
+    std::istringstream iss(agent.memory_info_text());
+    std::string line;
+    while (std::getline(iss, line)) tui.append_line("[sys] " + line);
+    tui.redraw_all();
+    return;
+  }
+
+  if (verb == "/clear") {
+    { std::lock_guard<std::mutex> lk(tui.lines_mutex);
+      tui.chat_lines.clear(); }
+    std::string sysp = build_system_prompt(cfg.knowledge_files, cfg.sandbox);
+    agent.reset_conversation(sysp, tui);
+    tui.append_line("[sys] Conversation cleared.");
+    tui.redraw_all();
+    return;
+  }
+
+  tui.append_line("[err] Unknown command: " + verb + "  (try /help)");
+  tui.redraw_all();
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Welcome banner
+// ═══════════════════════════════════════════════════════════════════════════
+
+static void welcome(TuiState &tui, const std::string &sandbox) {
+  tui.append_line("[sys] ╔═══════════════════════════════════════════╗");
+  tui.append_line("[sys] ║       N I T R O   A G E N T    v1.0       ║");
+  tui.append_line("[sys] ╚═══════════════════════════════════════════╝");
+  tui.append_line("[sys] Sandbox : " + sandbox);
+  tui.append_line("[sys] /help for commands  ·  exit to quit");
+  tui.append_line("");
+  tui.redraw_all();
+}
+
+// ═══════════════════════════════════════════════════════════════════════════
+// main()
+// ═══════════════════════════════════════════════════════════════════════════
+
+int main(int argc, char **argv) {
+  NitroConfig cfg;
+
+  // ── Parse arguments ───────────────────────────────────────────────
+  // Accepted forms:
+  //   ./nitro [options] [project_dir]
+  //   -m / --model  <path>       GGUF to load
+  //   -e / --embed  <path>       embedding model
+  //   -g / --gpu-layers <n>      GPU layer count
+  // The first non-option argument is treated as project_dir.
+
+  auto resolve_path = [](const std::string &arg) -> std::string {
+    std::error_code ec;
+    if (arg.substr(0, 2) == "~/") {
+      const char *home = getenv("HOME");
+      return std::string(home ? home : ".") + "/" + arg.substr(2);
+    }
+    if (arg.substr(0, 2) == "./")
+      return (fs::current_path(ec) / arg.substr(2)).string();
+    return arg;
+  };
+
+  for (int i = 1; i < argc; ++i) {
+    std::string a = argv[i];
+
+    auto take_next = [&](const char *flag) -> std::string {
+      if (i + 1 >= argc) {
+        std::fprintf(stderr, "nitro: %s requires an argument\n", flag);
+        std::exit(1);
+      }
+      return argv[++i];
+    };
+
+    if (a == "-m" || a == "--model") {
+      cfg.model_path = resolve_path(take_next(a.c_str()));
+    } else if (a == "-e" || a == "--embed") {
+      cfg.embed_path = resolve_path(take_next(a.c_str()));
+    } else if (a == "-g" || a == "--gpu-layers") {
+      cfg.n_gpu_layers = std::stoi(take_next(a.c_str()));
+    } else if (a == "-h" || a == "--help") {
+      std::puts(
+        "Usage: nitro [options] [project_dir]\n"
+        "\n"
+        "Options:\n"
+        "  -m, --model  <path>      GGUF model to load on startup\n"
+        "  -e, --embed  <path>      embedding model for RAG\n"
+        "  -g, --gpu-layers <n>     GPU layers to offload (default: 32)\n"
+        "  -h, --help               show this help\n"
+        "\n"
+        "project_dir defaults to the current working directory.\n"
+        "\n"
+        "Slash commands inside nitro:\n"
+        "  /model  <path>           load / hot-reload a GGUF\n"
+        "  /embed  <path>           load an embedding model\n"
+        "  /rag    <path>           index file or directory\n"
+        "  /memory                  KV / VRAM / layer stats\n"
+        "  /clear                   reset conversation\n"
+        "  /help                    list commands\n"
+      );
+      return 0;
+    } else if (!a.empty() && a[0] == '-') {
+      std::fprintf(stderr, "nitro: unknown option '%s'  (try --help)\n", a.c_str());
+      std::exit(1);
+    } else {
+      // positional → project_dir
+      cfg.sandbox = resolve_path(a);
+    }
+  }
+
+  // ── Resolve sandbox ───────────────────────────────────────────────
+  if (cfg.sandbox.empty()) {
+    std::error_code ec;
+    cfg.sandbox = fs::current_path(ec).string();
+  }
+  { std::error_code ec; fs::create_directories(cfg.sandbox, ec); }
+
+  // ── Auto-discover knowledge files ─────────────────────────────────
+  for (const char *kf : {"nitro.md", "AGENTS.md", "README.md"}) {
+    if (fs::exists(kf)) cfg.knowledge_files.push_back(kf);
+  }
+
+  // ── Init TUI ──────────────────────────────────────────────────────
+  TuiState tui;
+  tui.init();
+  welcome(tui, cfg.sandbox);
+
+  // ── Init agent ────────────────────────────────────────────────────
+  // AgentState owns a Llama whose constructor calls llama_backend_init();
+  // its destructor calls llama_backend_free() — nitro never touches
+  // the raw llama API directly.
+  AgentState agent;
+
+  if (!cfg.model_path.empty()) {
+    // Model provided on the command line — load immediately.
+    if (agent.setup_model(cfg, tui)) {
+      std::string sysp = build_system_prompt(cfg.knowledge_files, cfg.sandbox);
+      agent.reset_conversation(sysp, tui);
+    }
+    // Load embedding model if also provided up-front.
+    if (!cfg.embed_path.empty())
+      agent.setup_embed(cfg.embed_path, tui);
+  } else {
+    // No model yet — friendly prompt, not an error.
+    tui.append_line("[sys] No model specified.  Use /model <path> to load one.");
+    tui.append_line("[sys] Example: /model ~/models/qwen2.5-7b-q4_k_m.gguf");
+    tui.redraw_all();
+  }
+
+  // ── Main loop ─────────────────────────────────────────────────────
+  for (;;) {
+    // Check for terminal resize
+    {
+      unsigned rows = 0, cols = 0;
+      notcurses_stddim_yx(tui.nc, &rows, &cols);
+      if ((int)rows != tui.term_rows || (int)cols != tui.term_cols)
+        tui.resize();
+    }
+
+    std::string input = tui.readline_blocking();
+    // trim
+    input.erase(0, input.find_first_not_of(" \t"));
+    if (!input.empty())
+      input.erase(input.find_last_not_of(" \t\r\n") + 1);
+    if (input.empty()) continue;
+
+    tui.append_line("You: " + input);
+    tui.redraw_all();
+
+    if (input == "exit" || input == "quit") break;
+
+    if (input[0] == '/') {
+      handle_slash(input, cfg, agent, tui);
+    } else {
+      agent.run_turn(input, cfg, tui);
+    }
+  }
+
+  tui.destroy();
+  // agent destructor cleans up Llama (which calls llama_backend_free)
+  return 0;
+}

From 370e440b9a672ebb61167c3aee839d85b731304d Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Wed, 20 May 2026 20:21:38 +0930
Subject: [PATCH 33/54] LLAMA: replace test_main with nitro agent application

---
 llama/CMakeLists.txt   |  75 ++++++++++----
 llama/llama-sb-rag.cpp |  50 +--------
 llama/llama-sb-rag.h   |  58 +++++++++++
 llama/llama-sb.cpp     |   4 -
 llama/llama.cpp        |   2 +-
 llama/nitro.cpp        | 223 ++++++++++++++++++++++++++++++++---------
 llama/test_main.cpp    |  74 --------------
 7 files changed, 294 insertions(+), 192 deletions(-)
 create mode 100644 llama/llama-sb-rag.h
 delete mode 100644 llama/test_main.cpp

diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
index 749b55e..282337b 100644
--- a/llama/CMakeLists.txt
+++ b/llama/CMakeLists.txt
@@ -150,27 +150,68 @@ set_target_properties(llm PROPERTIES
 )
 
 # -----------------------------
-# Optional test application
+# nitro agent application
+# (only built when notcurses is available)
 # -----------------------------
-add_executable(llm_test
-  test_main.cpp
-)
+find_package(PkgConfig QUIET)
+
+set(NC_FOUND FALSE)
+set(NC_TARGET "")
+
+if(DEFINED NOTCURSES_DIR)
+  # Explicit path — create an imported target manually
+  find_library(NC_LIB      NAMES notcurses      HINTS "${NOTCURSES_DIR}/lib" REQUIRED)
+  find_library(NC_CORE_LIB NAMES notcurses-core HINTS "${NOTCURSES_DIR}/lib")
+  add_library(notcurses_imported INTERFACE IMPORTED)
+  target_include_directories(notcurses_imported INTERFACE "${NOTCURSES_DIR}/include")
+  target_link_libraries(notcurses_imported INTERFACE ${NC_LIB})
+  if(NC_CORE_LIB)
+    target_link_libraries(notcurses_imported INTERFACE ${NC_CORE_LIB})
+  endif()
+  set(NC_TARGET notcurses_imported)
+  set(NC_FOUND TRUE)
+
+elseif(PkgConfig_FOUND)
+  # IMPORTED_TARGET gives a PkgConfig::NC target with full lib paths baked in
+  pkg_check_modules(NC QUIET IMPORTED_TARGET notcurses)
+  if(NC_FOUND)
+    set(NC_TARGET PkgConfig::NC)
+  endif()
 
-target_include_directories(llm_test PRIVATE
-  ${LLAMA_DIR}/include
-  ${LLAMA_DIR}/ggml/include
-  ${CMAKE_CURRENT_SOURCE_DIR}/../include
-)
+else()
+  find_library(NC_LIB      NAMES notcurses)
+  find_library(NC_CORE_LIB NAMES notcurses-core)
+  if(NC_LIB AND NC_CORE_LIB)
+    add_library(notcurses_imported INTERFACE IMPORTED)
+    target_link_libraries(notcurses_imported INTERFACE ${NC_LIB} ${NC_CORE_LIB})
+    set(NC_TARGET notcurses_imported)
+    set(NC_FOUND TRUE)
+  endif()
+endif()
 
-target_link_libraries(llm_test PRIVATE
-  llm
-  llama
-  ggml
-)
+if(NC_FOUND)
+  message(STATUS "notcurses found — building nitro")
 
-set_target_properties(llm_test PROPERTIES
-  RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
-)
+  add_executable(nitro
+    nitro.cpp
+  )
+  target_include_directories(nitro PRIVATE
+    ${LLAMA_DIR}/include
+    ${LLAMA_DIR}/ggml/include
+    ${CMAKE_CURRENT_SOURCE_DIR}/../include
+  )
+  target_link_libraries(nitro PRIVATE
+    llm
+    llama
+    ggml
+    ${NC_TARGET}           # imported target carries include + lib paths
+  )
+  set_target_properties(nitro PROPERTIES
+    RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
+  )
+else()
+  message(STATUS "notcurses not found — skipping nitro (set -DNOTCURSES_DIR=... to enable)")
+endif()
 
 # -----------------------------
 # RAG indexer
diff --git a/llama/llama-sb-rag.cpp b/llama/llama-sb-rag.cpp
index 77c08b3..a64f31f 100644
--- a/llama/llama-sb-rag.cpp
+++ b/llama/llama-sb-rag.cpp
@@ -6,6 +6,7 @@
 // Copyright(C) 2026 Chris Warren-Smith
 
 #include "llama-sb.h"
+#include "llama-sb-rag.h"
 
 #include <algorithm>
 #include <cmath>
@@ -17,55 +18,6 @@
 #include <string>
 #include <vector>
 
-struct RagChunk {
-  std::string        text;
-  std::string        source;
-  std::string        type;
-  std::vector<float> embedding;
-};
-
-struct RagDB {
-  std::vector<RagChunk> chunks;
-  int embed_dim = 0;
-
-  int  size()  const { return (int)chunks.size(); }
-  bool empty() const { return chunks.empty(); }
-};
-
-//
-// per-session deduplication + token budget
-//
-struct RagSession {
-  std::vector<bool> seen;        /* sized to db.size() on init  */
-  int  tokens_used  = 0;
-  int  tokens_max   = 0;         /* set to your n_ctx           */
-  float score_threshold = 0.60f; /* skip weak matches           */
-
-  void init(int n_chunks, int ctx_size) {
-    seen.assign(n_chunks, false);
-    tokens_used = 0;
-    tokens_max  = ctx_size;
-  }
-
-  void reset() {
-    std::fill(seen.begin(), seen.end(), false);
-    tokens_used = 0;
-  }
-
-  bool is_seen(int idx)  const { return idx < (int)seen.size() && seen[idx]; }
-  void mark(int idx)           { if (idx < (int)seen.size()) seen[idx] = true; }
-
-  /* rough token estimate: 1 token ≈ 4 chars */
-  bool budget_ok(const std::string &text) const {
-    return tokens_max == 0 ||
-           (tokens_used + (int)text.size() / 4) < (int)(tokens_max * 0.85f);
-  }
-
-  void charge(const std::string &text) {
-    tokens_used += (int)text.size() / 4;
-  }
-};
-
 bool Llama::embed_text(const std::string &text, std::vector<float> &out, int embed_dim) {
   vector<llama_token> tokens = tokenize(text);
   if (tokens.size() == 0) {
diff --git a/llama/llama-sb-rag.h b/llama/llama-sb-rag.h
new file mode 100644
index 0000000..d31706c
--- /dev/null
+++ b/llama/llama-sb-rag.h
@@ -0,0 +1,58 @@
+// This file is part of SmallBASIC
+//
+// This program is distributed under the terms of the GPL v2.0 or later
+// Download the GNU Public License (GPL) from www.gnu.org
+//
+// Copyright(C) 2026 Chris Warren-Smith
+
+#pragma once
+
+struct RagChunk {
+  std::string        text;
+  std::string        source;
+  std::string        type;
+  std::vector<float> embedding;
+};
+
+struct RagDB {
+  std::vector<RagChunk> chunks;
+  int embed_dim = 0;
+
+  int  size()  const { return (int)chunks.size(); }
+  bool empty() const { return chunks.empty(); }
+};
+
+//
+// per-session deduplication + token budget
+//
+struct RagSession {
+  std::vector<bool> seen;        /* sized to db.size() on init  */
+  int  tokens_used  = 0;
+  int  tokens_max   = 0;         /* set to your n_ctx           */
+  float score_threshold = 0.60f; /* skip weak matches           */
+
+  void init(int n_chunks, int ctx_size) {
+    seen.assign(n_chunks, false);
+    tokens_used = 0;
+    tokens_max  = ctx_size;
+  }
+
+  void reset() {
+    std::fill(seen.begin(), seen.end(), false);
+    tokens_used = 0;
+  }
+
+  bool is_seen(int idx)  const { return idx < (int)seen.size() && seen[idx]; }
+  void mark(int idx)           { if (idx < (int)seen.size()) seen[idx] = true; }
+
+  /* rough token estimate: 1 token ≈ 4 chars */
+  bool budget_ok(const std::string &text) const {
+    return tokens_max == 0 ||
+           (tokens_used + (int)text.size() / 4) < (int)(tokens_max * 0.85f);
+  }
+
+  void charge(const std::string &text) {
+    tokens_used += (int)text.size() / 4;
+  }
+};
+
diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index 9e3b54a..6a74c69 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -130,10 +130,6 @@ void Llama::reset() {
   _min_p = 0.0f;
   _max_tokens = 150;
   _n_past = 0;
-  _is_gemma4 = false;
-  _grammar_src.clear();
-  _grammar_root.clear();
-  _template.clear();
   _seed = LLAMA_DEFAULT_SEED;
   if (_ctx) {
     llama_memory_clear(llama_get_memory(_ctx), true);
diff --git a/llama/llama.cpp b/llama/llama.cpp
index 3fbadb0..6db1304 160000
--- a/llama/llama.cpp
+++ b/llama/llama.cpp
@@ -1 +1 @@
-Subproject commit 3fbadb06dc867d3236937705477e090724ebbc6e
+Subproject commit 6db130445d29b243ee2171efb8cd61b84a1c5322
diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index 6057ff2..6e82149 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -148,6 +148,14 @@ struct TuiState {
   int term_rows = 0;
   int term_cols = 0;
 
+  // ── thinking spinner ──────────────────────────────────────────────
+  bool    thinking      = false;
+  int     spinner_frame = 0;
+  // Advance spinner by one frame and redraw the header.
+  void tick_spinner();
+  // Toggle thinking mode; redraws header immediately.
+  void set_thinking(bool on);
+
   // ── lifecycle ─────────────────────────────────────────────────────
   void init();
   void destroy();
@@ -175,10 +183,26 @@ struct TuiState {
 };
 
 // ─── colour helpers ──────────────────────────────────────────────────────
+// Our dark background colours (must match ncplane_set_base values in init).
+static constexpr uint32_t BG_CHAT_R = 18,  BG_CHAT_G = 22,  BG_CHAT_B = 30;
+static constexpr uint32_t BG_INP_R  = 22,  BG_INP_G  = 28,  BG_INP_B  = 38;
+static constexpr uint32_t BG_HDR_R  = 30,  BG_HDR_G  = 40,  BG_HDR_B  = 55;
 
+// fg only (use only where bg is already set via ncplane_set_base)
 static inline uint64_t fg_rgb(uint32_t r, uint32_t g, uint32_t b) {
   return NCCHANNELS_INITIALIZER(r, g, b, 0, 0, 0);
 }
+// fg + explicit bg — use this for all ncplane_set_channels calls so the
+// background behind each glyph matches the plane's base colour exactly.
+static inline uint64_t chat_ch(uint32_t r, uint32_t g, uint32_t b) {
+  return NCCHANNELS_INITIALIZER(r, g, b, BG_CHAT_R, BG_CHAT_G, BG_CHAT_B);
+}
+static inline uint64_t inp_ch(uint32_t r, uint32_t g, uint32_t b) {
+  return NCCHANNELS_INITIALIZER(r, g, b, BG_INP_R, BG_INP_G, BG_INP_B);
+}
+static inline uint64_t hdr_ch(uint32_t r, uint32_t g, uint32_t b) {
+  return NCCHANNELS_INITIALIZER(r, g, b, BG_HDR_R, BG_HDR_G, BG_HDR_B);
+}
 
 // ─── TuiState::init ──────────────────────────────────────────────────────
 
@@ -191,6 +215,13 @@ void TuiState::init() {
   stdpl = notcurses_stdplane(nc);
   notcurses_term_dim_yx(nc, (unsigned *)&term_rows, (unsigned *)&term_cols);
 
+  // Fill the entire terminal with our dark background before creating
+  // child planes — eliminates the "terminal colour showing through" artefact.
+  uint64_t bg = NCCHANNELS_INITIALIZER(BG_CHAT_R, BG_CHAT_G, BG_CHAT_B,
+                                        BG_CHAT_R, BG_CHAT_G, BG_CHAT_B);
+  ncplane_set_base(stdpl, " ", 0, bg);
+  ncplane_erase(stdpl);
+
   // Header: row 0
   ncplane_options hopt{};
   hopt.y = 0; hopt.x = 0;
@@ -203,12 +234,18 @@ void TuiState::init() {
   copt.y = 1; copt.x = 0;
   copt.rows = (unsigned)chat_rows; copt.cols = (unsigned)term_cols;
   chatpl = ncplane_create(stdpl, &copt);
+  ncplane_set_base(chatpl, " ", 0,
+    NCCHANNELS_INITIALIZER(BG_CHAT_R, BG_CHAT_G, BG_CHAT_B,
+                            BG_CHAT_R, BG_CHAT_G, BG_CHAT_B));
 
   // Input pane: last 2 rows
   ncplane_options iopt{};
   iopt.y = term_rows - 2; iopt.x = 0;
   iopt.rows = 2; iopt.cols = (unsigned)term_cols;
   inputpl = ncplane_create(stdpl, &iopt);
+  ncplane_set_base(inputpl, " ", 0,
+    NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B,
+                            BG_INP_R, BG_INP_G, BG_INP_B));
 
   redraw_all();
 }
@@ -231,19 +268,25 @@ void TuiState::resize() {
 
 void TuiState::redraw_header() {
   ncplane_erase(header);
-  ncplane_set_base(header, " ", 0, fg_rgb(30, 40, 55));
+  ncplane_set_base(header, " ", 0,
+    NCCHANNELS_INITIALIZER(BG_HDR_R, BG_HDR_G, BG_HDR_B,
+                            BG_HDR_R, BG_HDR_G, BG_HDR_B));
 
   float kv_pct   = kv_total   > 0 ? 100.f * (float)kv_used   / (float)kv_total   : 0.f;
   float vram_pct = vram_total  > 0 ? 100.f * (float)vram_used / (float)vram_total : 0.f;
 
+  // Spinner: braille dots rotate smoothly, clearly visible on the header
+  static const char *const SPIN[] = { "⣾","⣽","⣻","⢿","⡿","⣟","⣯","⣷" };
+  const char *spin_str = thinking ? SPIN[spinner_frame % 8] : " ";
+
   char buf[512];
   int n = std::snprintf(buf, sizeof(buf),
-    " ✦ NITRO  │ %-32s │ %5.1f tok/s │ KV %4.1f%%  VRAM %4.1f%%",
+    " ✦ NITRO  │ %-32s │ %5.1f tok/s │ KV %4.1f%%  VRAM %4.1f%%  %s",
     current_model.c_str(), (double)tokens_per_sec,
-    (double)kv_pct, (double)vram_pct);
+    (double)kv_pct, (double)vram_pct, spin_str);
   if (n > term_cols) buf[term_cols] = '\0';
 
-  ncplane_set_channels(header, fg_rgb(130, 220, 200));
+  ncplane_set_channels(header, hdr_ch(130, 220, 200));
   ncplane_putstr_yx(header, 0, 0, buf);
 }
 
@@ -262,12 +305,12 @@ void TuiState::redraw_chat() {
     const std::string &line = chat_lines[i];
 
     uint64_t ch;
-    if      (line.rfind("You: ",   0) == 0) ch = fg_rgb(100, 200, 255);
-    else if (line.rfind("Nitro: ", 0) == 0) ch = fg_rgb(180, 255, 180);
-    else if (line.rfind("[tool]",  0) == 0) ch = fg_rgb(255, 180,  80);
-    else if (line.rfind("[err]",   0) == 0) ch = fg_rgb(255,  80,  80);
-    else if (line.rfind("[sys]",   0) == 0) ch = fg_rgb(140, 140, 200);
-    else                                     ch = fg_rgb(210, 210, 210);
+    if      (line.rfind("You: ",   0) == 0) ch = chat_ch(100, 200, 255);
+    else if (line.rfind("Nitro: ", 0) == 0) ch = chat_ch(180, 255, 180);
+    else if (line.rfind("[tool]",  0) == 0) ch = chat_ch(255, 180,  80);
+    else if (line.rfind("[err]",   0) == 0) ch = chat_ch(255,  80,  80);
+    else if (line.rfind("[sys]",   0) == 0) ch = chat_ch(140, 140, 200);
+    else                                     ch = chat_ch(210, 210, 210);
 
     ncplane_set_channels(chatpl, ch);
     std::string display = line.size() > cols ? line.substr(0, cols) : line;
@@ -279,24 +322,51 @@ void TuiState::redraw_input() {
   ncplane_erase(inputpl);
 
   // Separator
-  ncplane_set_channels(inputpl, fg_rgb(80, 120, 160));
-  std::string sep(term_cols, '-');
+  ncplane_set_channels(inputpl, inp_ch(80, 120, 160));
+  std::string sep(term_cols, '─');
   ncplane_putstr_yx(inputpl, 0, 0, sep.c_str());
 
-  // Prompt + buffer
+  // Prompt
   const std::string prompt = " ❯ ";
-  ncplane_set_channels(inputpl, fg_rgb(230, 230, 230));
+  const int prompt_cols = 4;
+  ncplane_set_channels(inputpl, inp_ch(100, 210, 255));
   ncplane_putstr_yx(inputpl, 1, 0, prompt.c_str());
 
-  int max_w = std::max(0, term_cols - (int)prompt.size() - 1);
-  std::string display = input_buf;
-  if ((int)display.size() > max_w && max_w > 0)
-    display = display.substr(display.size() - max_w);
-  ncplane_putstr_yx(inputpl, 1, (int)prompt.size(), display.c_str());
+  // Buffer — split at cursor so we can render the cursor cell distinctly
+  int max_w = std::max(0, term_cols - prompt_cols - 1);
 
-  // Cursor position
-  int cx = std::min((int)prompt.size() + (int)cursor_pos, term_cols - 1);
-  ncplane_cursor_move_yx(inputpl, 1, cx);
+  // Viewport: if buffer is wider than the available space, show the tail
+  std::string visible = input_buf;
+  int view_offset = 0;
+  if ((int)visible.size() > max_w && max_w > 0) {
+    view_offset = (int)visible.size() - max_w;
+    visible = visible.substr(view_offset);
+  }
+
+  // Text before cursor
+  int cur_in_view = std::max(0, (int)cursor_pos - view_offset);
+  cur_in_view = std::min(cur_in_view, (int)visible.size());
+
+  std::string before = visible.substr(0, cur_in_view);
+  std::string after  = cur_in_view < (int)visible.size()
+                         ? visible.substr(cur_in_view + 1) : "";
+  char cursor_ch = cur_in_view < (int)visible.size()
+                     ? visible[cur_in_view] : ' ';
+
+  ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
+  ncplane_putstr_yx(inputpl, 1, prompt_cols, before.c_str());
+
+  // Cursor cell: bright bg, dark text — stands out against the input bg
+  int cx = prompt_cols + cur_in_view;
+  ncplane_set_channels(inputpl,
+    NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B, 180, 230, 255));
+  char cbuf[2] = { cursor_ch, '\0' };
+  ncplane_putstr_yx(inputpl, 1, cx, cbuf);
+
+  // Text after cursor
+  ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
+  if (!after.empty())
+    ncplane_putstr_yx(inputpl, 1, cx + 1, after.c_str());
 }
 
 void TuiState::redraw_all() {
@@ -306,6 +376,19 @@ void TuiState::redraw_all() {
   notcurses_render(nc);
 }
 
+void TuiState::tick_spinner() {
+  ++spinner_frame;
+  redraw_header();
+  notcurses_render(nc);
+}
+
+void TuiState::set_thinking(bool on) {
+  thinking = on;
+  if (!on) spinner_frame = 0;
+  redraw_header();
+  notcurses_render(nc);
+}
+
 // ─── TuiState content helpers ─────────────────────────────────────────────
 
 void TuiState::append_line(const std::string &line) {
@@ -344,7 +427,7 @@ void TuiState::flush_token_acc() {
 
 void TuiState::confirm_dialog(const std::string &prompt, std::string &result) {
   ncplane_erase(inputpl);
-  ncplane_set_channels(inputpl, fg_rgb(255, 200, 80));
+  ncplane_set_channels(inputpl, inp_ch(255, 200, 80));
   std::string msg = " " + prompt + " [y/n] ❯ ";
   ncplane_putstr_yx(inputpl, 1, 0, msg.c_str());
   notcurses_render(nc);
@@ -358,7 +441,7 @@ void TuiState::confirm_dialog(const std::string &prompt, std::string &result) {
     else if (ni.id >= 32 && ni.id < 127) { answer += (char)ni.id; }
 
     ncplane_erase(inputpl);
-    ncplane_set_channels(inputpl, fg_rgb(255, 200, 80));
+    ncplane_set_channels(inputpl, inp_ch(255, 200, 80));
     ncplane_putstr_yx(inputpl, 1, 0, (msg + answer).c_str());
     notcurses_render(nc);
   }
@@ -663,20 +746,31 @@ bool AgentState::run_turn(const std::string &user_message,
 
   // ── label the assistant response in the chat pane ────────────────
   tui.append_line("Nitro: ");
-  tui.redraw_all();
+  tui.set_thinking(true);
 
   // ── generation loop ───────────────────────────────────────────────
-  // Exact translation of the SB streaming / tool-dispatch loop.
-  bool in_think  = false;
+  bool in_think = false;
   std::string buffer;      // accumulates tokens until we see a newline
 
-  auto handle_think = [&](const std::string &line) {
-    if (line == "<|think|>")   in_think = true;
-    else if (line == "</|think|>") in_think = false;
+  // Scan buffer for think open/close tags anywhere in the text.
+  // Models use either <think> or <|think|> depending on the template.
+  auto update_think_state = [&](const std::string &text) {
+    // Opening tags
+    if (text.find("<think>")    != std::string::npos ||
+        text.find("<|think|>")  != std::string::npos)  in_think = true;
+    // Closing tags — check after open so a single-line <think>…</think> ends correctly
+    if (text.find("</think>")   != std::string::npos ||
+        text.find("</|think|>") != std::string::npos)  in_think = false;
   };
 
   while (iter->_has_next) {
     std::string tok = llama.next(*iter);
+    tui.tick_spinner();
+
+    // Update think state on every token so tags that arrive mid-buffer
+    // suppress display immediately rather than waiting for a newline.
+    update_think_state(tok);
+
     buffer += tok;
 
     auto nl = buffer.find('\n');
@@ -689,20 +783,57 @@ bool AgentState::run_turn(const std::string &user_message,
       trimmed.erase(0, trimmed.find_first_not_of(" \t"));
 
       if (trimmed.substr(0, 5) == "TOOL:") {
-        // Collect any tail that remains in the buffer plus iter.all()
-        // — mirrors: text_line += buffer + " " + iter.all()
-        std::string tool_line = trimmed + " " + buffer + " " + llama.all(*iter);
-
-        // Strip stray newlines from the single-line tool command
-        tool_line.erase(
-          std::remove(tool_line.begin(), tool_line.end(), '\n'),
-          tool_line.end());
+        // Collect remainder: rest of buffer + everything iter still has.
+        // For TOOL:WRITE the content may contain newlines, so we keep
+        // the raw text and only strip newlines from the op+arg1 prefix.
+        std::string tail = buffer + llama.all(*iter);
+
+        // Parse op and arg1 from trimmed (first two space-separated tokens)
+        // then treat everything after as the raw payload (preserving newlines).
+        std::string op, arg1, payload;
+        {
+          auto s1 = trimmed.find(' ');
+          if (s1 != std::string::npos) {
+            op = trimmed.substr(0, s1);
+            std::string rest = trimmed.substr(s1 + 1);
+            rest.erase(0, rest.find_first_not_of(" \t"));
+            auto s2 = rest.find(' ');
+            if (s2 != std::string::npos) {
+              arg1    = rest.substr(0, s2);
+              payload = rest.substr(s2 + 1) + tail;
+            } else {
+              arg1    = rest;
+              payload = tail;
+            }
+          } else {
+            op = trimmed;
+          }
+        }
 
-        // Trim trailing whitespace
-        while (!tool_line.empty() && std::isspace((unsigned char)tool_line.back()))
-          tool_line.pop_back();
+        // Reconstruct the tool line.  For ops that don't carry a file payload
+        // (LIST, EXISTS, READ, DATE, TIME, RND, PERMISSION, RUN) we still
+        // collapse newlines in payload so the single-line format is preserved.
+        // For TOOL:WRITE we keep newlines in the payload intact.
+        std::string tool_line;
+        if (op == "TOOL:WRITE") {
+          tool_line = op + " " + arg1 + " " + payload;
+          // Trim only trailing whitespace
+          while (!tool_line.empty() && tool_line.back() == '\n')
+            tool_line.pop_back();
+        } else {
+          tool_line = op;
+          if (!arg1.empty())    tool_line += " " + arg1;
+          if (!payload.empty()) {
+            std::string flat = payload;
+            flat.erase(std::remove(flat.begin(), flat.end(), '\n'), flat.end());
+            while (!flat.empty() && std::isspace((unsigned char)flat.back()))
+              flat.pop_back();
+            if (!flat.empty()) tool_line += " " + flat;
+          }
+        }
 
-        tui.append_line("[tool] " + tool_line);
+        tui.append_line("[tool] " + op + " " + arg1 +
+                        (op == "TOOL:WRITE" ? " <content>" : ""));
         tui.redraw_all();
 
         std::string result = process_tool(tool_line, cfg.sandbox, tui);
@@ -711,8 +842,6 @@ bool AgentState::run_turn(const std::string &user_message,
           result.substr(0, 200) + (result.size() > 200 ? "…" : ""));
         tui.redraw_all();
 
-        // Inject tool result and get a new iter for the continuation
-        // — mirrors: iter = llama.add_message("tool", process_tool(…))
         if (!llama.add_message(*iter, "tool", result)) {
           tui.append_line(std::string("[err] tool result inject: ") + llama.last_error());
           tui.redraw_all();
@@ -721,16 +850,15 @@ bool AgentState::run_turn(const std::string &user_message,
         buffer.clear();
 
       } else {
-        // Normal output line
+        // Normal output line — suppress if inside think block
         if (!in_think) {
           tui.append_token(text_line + "\n");
         }
-        handle_think(text_line);
       }
     }
   }
 
-  // ── flush remaining buffer (SB: "Flush remaining line buffer") ────
+  // ── flush remaining buffer ────────────────────────────────────────
   if (!buffer.empty()) {
     std::string trimmed = buffer;
     trimmed.erase(0, trimmed.find_first_not_of(" \t"));
@@ -745,6 +873,7 @@ bool AgentState::run_turn(const std::string &user_message,
     }
   }
   tui.flush_token_acc();
+  tui.set_thinking(false);
 
   // ── update status bar ─────────────────────────────────────────────
   tui.tokens_per_sec = tokens_per_sec();
diff --git a/llama/test_main.cpp b/llama/test_main.cpp
deleted file mode 100644
index 3ab572d..0000000
--- a/llama/test_main.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-#include "llama-sb.h"
-#include <cstdio>
-#include <cstring>
-
-static void print_usage(int, char ** argv) {
-  printf("\nexample usage:\n");
-  printf("\n    %s -m model.gguf [-n n_predict] [-ngl n_gpu_layers] [prompt]\n", argv[0]);
-  printf("\n");
-}
-
-int main(int argc, char ** argv) {
-  // path to the model gguf file
-  std::string model_path;
-  // prompt to generate text from
-  std::string prompt = "Happy friday";
-  // number of tokens to predict
-  int n_predict = 32;
-
-  // parse command line arguments
-  int i = 1;
-  for (; i < argc; i++) {
-    if (strcmp(argv[i], "-m") == 0) {
-      if (i + 1 < argc) {
-        model_path = argv[++i];
-      } else {
-        print_usage(argc, argv);
-        return 1;
-      }
-    } else if (strcmp(argv[i], "-n") == 0) {
-      if (i + 1 < argc) {
-        try {
-          n_predict = std::stoi(argv[++i]);
-        } catch (...) {
-          print_usage(argc, argv);
-          return 1;
-        }
-      } else {
-        print_usage(argc, argv);
-        return 1;
-      }
-    } else {
-      // prompt starts here
-      break;
-    }
-  }
-  if (model_path.empty()) {
-    print_usage(argc, argv);
-    return 1;
-  }
-  if (i < argc) {
-    prompt = argv[i++];
-    for (; i < argc; i++) {
-      prompt += " ";
-      prompt += argv[i];
-    }
-  }
-
-  Llama llama;
-  if (llama.load_model(model_path, 1024, 1024, -1, GGML_LOG_LEVEL_CONT)) {
-    LlamaIter iter;
-    llama.set_max_tokens(n_predict);
-    llama.add_message(iter, "user", prompt);
-    while (iter._has_next) {
-      auto out = llama.next(iter);
-      printf("\033[33m");
-      printf("%s\n", out.c_str());
-      printf("\n\033[0m");
-    }
-  } else {
-    fprintf(stderr, "ERR: %s\n", llama.last_error());
-  }
-
-  return 0;
-}

From 7f3e2ce29b33c2ad4a51ece29fc7d82d27209b14 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Thu, 21 May 2026 09:47:19 +0930
Subject: [PATCH 34/54] LLAMA: replace test_main with nitro agent application

---
 llama/CMakeLists.txt |   81 ++--
 llama/nitro.cpp      | 1090 +++++++++++++++++++++++++++++++-----------
 2 files changed, 874 insertions(+), 297 deletions(-)

diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
index 282337b..ef68b0f 100644
--- a/llama/CMakeLists.txt
+++ b/llama/CMakeLists.txt
@@ -155,9 +155,9 @@ set_target_properties(llm PROPERTIES
 # -----------------------------
 find_package(PkgConfig QUIET)
 
+# ── notcurses ─────────────────────────────────────────────────────────────
 set(NC_FOUND FALSE)
 set(NC_TARGET "")
-
 if(DEFINED NOTCURSES_DIR)
   # Explicit path — create an imported target manually
   find_library(NC_LIB      NAMES notcurses      HINTS "${NOTCURSES_DIR}/lib" REQUIRED)
@@ -170,14 +170,11 @@ if(DEFINED NOTCURSES_DIR)
   endif()
   set(NC_TARGET notcurses_imported)
   set(NC_FOUND TRUE)
-
 elseif(PkgConfig_FOUND)
-  # IMPORTED_TARGET gives a PkgConfig::NC target with full lib paths baked in
   pkg_check_modules(NC QUIET IMPORTED_TARGET notcurses)
   if(NC_FOUND)
     set(NC_TARGET PkgConfig::NC)
   endif()
-
 else()
   find_library(NC_LIB      NAMES notcurses)
   find_library(NC_CORE_LIB NAMES notcurses-core)
@@ -189,9 +186,55 @@ else()
   endif()
 endif()
 
+# ── libcurl ───────────────────────────────────────────────────────────────
+# Try the modern CMake find-module first (ships with CMake ≥ 3.12).
+# Fall back to pkg-config, then a raw library search.
+set(CURL_FOUND_INTERNAL FALSE)
+set(CURL_TARGET "")
+
+if(DEFINED CURL_DIR)
+  # Explicit path supplied by the user (-DCURL_DIR=...)
+  find_library(CURL_LIB NAMES curl HINTS "${CURL_DIR}/lib" REQUIRED)
+  find_path(CURL_INCLUDE NAMES curl/curl.h HINTS "${CURL_DIR}/include" REQUIRED)
+  add_library(curl_imported INTERFACE IMPORTED)
+  target_include_directories(curl_imported INTERFACE "${CURL_INCLUDE}")
+  target_link_libraries(curl_imported INTERFACE ${CURL_LIB})
+  set(CURL_TARGET curl_imported)
+  set(CURL_FOUND_INTERNAL TRUE)
+else()
+  # CMake built-in (sets CURL::libcurl target when found)
+  find_package(CURL QUIET)
+  if(CURL_FOUND)
+    set(CURL_TARGET CURL::libcurl)
+    set(CURL_FOUND_INTERNAL TRUE)
+  elseif(PkgConfig_FOUND)
+    pkg_check_modules(CURL_PC QUIET IMPORTED_TARGET libcurl)
+    if(CURL_PC_FOUND)
+      set(CURL_TARGET PkgConfig::CURL_PC)
+      set(CURL_FOUND_INTERNAL TRUE)
+    endif()
+  endif()
+  if(NOT CURL_FOUND_INTERNAL)
+    find_library(CURL_LIB NAMES curl)
+    find_path(CURL_INCLUDE NAMES curl/curl.h)
+    if(CURL_LIB AND CURL_INCLUDE)
+      add_library(curl_imported INTERFACE IMPORTED)
+      target_include_directories(curl_imported INTERFACE "${CURL_INCLUDE}")
+      target_link_libraries(curl_imported INTERFACE ${CURL_LIB})
+      set(CURL_TARGET curl_imported)
+      set(CURL_FOUND_INTERNAL TRUE)
+    endif()
+  endif()
+endif()
+
+# ── nitro target ──────────────────────────────────────────────────────────
 if(NC_FOUND)
-  message(STATUS "notcurses found — building nitro")
+  if(NOT CURL_FOUND_INTERNAL)
+    message(WARNING "libcurl not found — TOOL:CURL will be unavailable. "
+                    "Install libcurl-dev or set -DCURL_DIR=<prefix> to enable.")
+  endif()
 
+  message(STATUS "notcurses found — building nitro")
   add_executable(nitro
     nitro.cpp
   )
@@ -206,6 +249,12 @@ if(NC_FOUND)
     ggml
     ${NC_TARGET}           # imported target carries include + lib paths
   )
+  if(CURL_FOUND_INTERNAL)
+    target_link_libraries(nitro PRIVATE ${CURL_TARGET})
+    target_compile_definitions(nitro PRIVATE NITRO_HAVE_CURL=1)
+  else()
+    target_compile_definitions(nitro PRIVATE NITRO_HAVE_CURL=0)
+  endif()
   set_target_properties(nitro PROPERTIES
     RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
   )
@@ -213,28 +262,6 @@ else()
   message(STATUS "notcurses not found — skipping nitro (set -DNOTCURSES_DIR=... to enable)")
 endif()
 
-# -----------------------------
-# RAG indexer
-# -----------------------------
-add_executable(rag_index
-  rag_index.cpp
-)
-
-target_include_directories(rag_index PRIVATE
-  ${LLAMA_DIR}/include
-  ${LLAMA_DIR}/ggml/include
-)
-
-target_link_libraries(rag_index PRIVATE
-  llm
-  llama
-  ggml
-)
-
-set_target_properties(rag_index PROPERTIES
-  RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
-)
-
 # -----------------------------
 # Header preparation for RAG indexer
 # -----------------------------
diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index 6e82149..cd63674 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -6,7 +6,7 @@
 //   g++ -std=c++20 -O2 nitro.cpp llama-sb.cpp \
 //       -I/path/to/llama.cpp/include \
 //       -L/path/to/llama.cpp/build/src \
-//       -lllama -lggml -lnotcurses-core -lnotcurses \
+//       -lllama -lggml -lnotcurses-core -lnotcurses -lcurl \
 //       -o nitro
 //
 // Usage:
@@ -35,9 +35,9 @@
 //   TOOL:TIME
 //   TOOL:RND
 //   TOOL:PERMISSION
+//   TOOL:CURL   <url>
 //
 // Copyright (C) 2026 Chris Warren-Smith  —  GPLv2 or later
-
 // ─── Standard library ────────────────────────────────────────────────────────
 #include <algorithm>
 #include <chrono>
@@ -50,23 +50,20 @@
 #include <sstream>
 #include <string>
 #include <vector>
-
+// ─── curl ─────────────────────────────────────────────────────────────────────
+#include <curl/curl.h>
 // ─── Integration layer (sole llama.cpp dependency for nitro) ─────────────────
 #include "llama-sb.h"
 #include "llama-sb-rag.h"
-
 // ─── TUI ─────────────────────────────────────────────────────────────────────
 #include <notcurses/notcurses.h>
-
 namespace fs = std::filesystem;
-
 // ═══════════════════════════════════════════════════════════════════════════
 // Forward declarations
 // ═══════════════════════════════════════════════════════════════════════════
 struct NitroConfig;
 struct TuiState;
 struct AgentState;
-
 static std::string  join_path(const std::string &a, const std::string &b);
 static std::string  read_file(const std::string &path);
 static bool         write_file(const std::string &path, const std::string &data);
@@ -77,13 +74,154 @@ static std::string  process_tool(const std::string &line, const std::string &san
                                   TuiState &tui);
 static std::string  build_system_prompt(const std::vector<std::string> &knowledge_files,
                                          const std::string &sandbox);
+// ─── RAG indexing ─────────────────────────────────────────────────────────────
+static constexpr int BATCH_SIZE = 512;
+
+struct Chunk {
+  std::string         text;
+  std::string         source;
+  std::string         type;
+  std::vector<float>  embedding;
+};
+
+static bool json_get_string(const std::string &json,
+                            const std::string &key,
+                            std::string       &out) {
+  std::string search = "\"" + key + "\":";
+  size_t pos = json.find(search);
+  if (pos == std::string::npos) return false;
+  pos += search.size();
+  while (pos < json.size() && json[pos] == ' ') ++pos;
+  if (pos >= json.size() || json[pos] != '"') return false;
+  ++pos;
+  out.clear();
+  while (pos < json.size()) {
+    char c = json[pos++];
+    if (c == '\\' && pos < json.size()) {
+      char e = json[pos++];
+      switch (e) {
+      case 'n':  out += '\n'; break;
+      case 't':  out += '\t'; break;
+      case '"':  out += '"';  break;
+      case '\\': out += '\\'; break;
+      default:   out += e;    break;
+      }
+    } else if (c == '"') {
+      break;
+    } else {
+      out += c;
+    }
+  }
+  return true;
+}
+
+static bool save_db(const std::string        &path,
+                    const std::vector<Chunk> &chunks,
+                    int                       embed_dim) {
+  std::ofstream f(path, std::ios::binary);
+  if (!f) {
+    std::fprintf(stderr, "cannot open for write: %s\n\n", path);
+    return false;
+  }
+  auto write32 = [&](uint32_t v) { f.write((char*)&v, 4); };
+  auto write16 = [&](uint16_t v) { f.write((char*)&v, 2); };
+  auto write8  = [&](uint8_t  v) { f.write((char*)&v, 1); };
+  auto writestr = [&](const std::string &s, size_t max_len) {
+    size_t len = std::min(s.size(), max_len);
+    f.write(s.c_str(), (std::streamsize)len);
+  };
+  write32(0x52414744);
+  write32(2);
+  write32((uint32_t)chunks.size());
+  write32((uint32_t)embed_dim);
+  for (const Chunk &c : chunks) {
+    write32((uint32_t)c.text.size());
+    f.write(c.text.c_str(), (std::streamsize)c.text.size());
+    uint16_t src_len = (uint16_t)std::min(c.source.size(), (size_t)65535);
+    write16(src_len);
+    writestr(c.source, src_len);
+    uint8_t type_len = (uint8_t)std::min(c.type.size(), (size_t)255);
+    write8(type_len);
+    writestr(c.type, type_len);
+    f.write((char*)c.embedding.data(),
+            (std::streamsize)(embed_dim * sizeof(float)));
+  }
+  return f.good();
+}
 
 // ═══════════════════════════════════════════════════════════════════════════
-// Config  (mirrors the SB agent constants)
+// InputHistory — up/down arrow navigation through submitted inputs
 // ═══════════════════════════════════════════════════════════════════════════
+class InputHistory {
+  public:
+  explicit InputHistory() = default;
+  ~InputHistory() = default;
+  InputHistory(const InputHistory &) = delete;
+  InputHistory &operator=(const InputHistory &) = delete;
+
+  /**
+   * @brief Adds a new command string to the history stack.
+   * Resets navigation index upon adding a new item.
+   * Deduplicates consecutive identical entries.
+   */
+  void push(const std::string &input) {
+    if (input.empty()) return;
+    if (!history_stack.empty() && history_stack.back() == input) {
+      // Don't push duplicate of last entry; just reset nav position.
+      current_index = (int)history_stack.size();
+      return;
+    }
+    history_stack.push_back(input);
+    current_index = (int)history_stack.size();
+  }
+
+  /**
+   * @brief Navigates to an earlier entry.
+   * @param out Set to the selected entry on success.
+   * @return true if an item was successfully retrieved.
+   */
+  bool up(std::string &out) {
+    if (history_stack.empty() || current_index <= 0) return false;
+    --current_index;
+    out = history_stack[current_index];
+    return true;
+  }
+
+  /**
+   * @brief Navigates to a later entry, or clears when past the newest.
+   * @param out Set to the selected entry, or cleared if past the end.
+   * @return true if a history entry was retrieved (false means "clear input").
+   */
+  bool down(std::string &out) {
+    if (history_stack.empty()) return false;
+    ++current_index;
+    if (current_index >= (int)history_stack.size()) {
+      current_index = (int)history_stack.size();
+      out.clear();
+      return false; // signal: restore blank input
+    }
+    out = history_stack[current_index];
+    return true;
+  }
+
+  /** Reset navigation position without modifying the stack. */
+  void reset_nav() {
+    current_index = (int)history_stack.size();
+  }
+
+  private:
+  std::vector<std::string> history_stack;
+  int current_index = 0;
+};
+
+// ═══════════════════════════════════════════════════════════════════════════
+// Settings persistence  (~/.config/nitro.settings.json)
+// ═══════════════════════════════════════════════════════════════════════════
+// A minimal hand-rolled JSON reader/writer for the flat key-value settings
+// we care about.  We deliberately avoid a full JSON library dependency.
 
 struct NitroConfig {
-  std::string model_path;   // empty = no model yet; set via -m/--model or /model
+  std::string model_path;
   std::string embed_path;
   std::string sandbox;
   int   n_ctx          = 65536;
@@ -101,6 +239,135 @@ struct NitroConfig {
   int   rag_top_k      = 5;
 };
 
+// Returns the canonical settings path: ~/.config/nitro.settings.json
+static std::string settings_path() {
+  const char *home = getenv("HOME");
+  std::string base = home ? std::string(home) : ".";
+  return base + "/.config/nitro.settings.json";
+}
+
+// Tiny helper: extract a quoted string value from flat JSON for a known key.
+static bool settings_get_str(const std::string &json,
+                              const std::string &key,
+                              std::string &out) {
+  return json_get_string(json, key, out);
+}
+
+// Tiny helper: extract an integer value from flat JSON.
+static bool settings_get_int(const std::string &json,
+                              const std::string &key,
+                              int &out) {
+  std::string search = "\"" + key + "\":";
+  size_t pos = json.find(search);
+  if (pos == std::string::npos) return false;
+  pos += search.size();
+  while (pos < json.size() && (json[pos] == ' ' || json[pos] == '\t')) ++pos;
+  if (pos >= json.size()) return false;
+  // read digits (and optional leading minus)
+  size_t start = pos;
+  if (json[pos] == '-') ++pos;
+  while (pos < json.size() && std::isdigit((unsigned char)json[pos])) ++pos;
+  if (pos == start) return false;
+  out = std::stoi(json.substr(start, pos - start));
+  return true;
+}
+
+// Tiny helper: extract a float value from flat JSON.
+static bool settings_get_float(const std::string &json,
+                                const std::string &key,
+                                float &out) {
+  std::string search = "\"" + key + "\":";
+  size_t pos = json.find(search);
+  if (pos == std::string::npos) return false;
+  pos += search.size();
+  while (pos < json.size() && (json[pos] == ' ' || json[pos] == '\t')) ++pos;
+  if (pos >= json.size()) return false;
+  size_t start = pos;
+  if (json[pos] == '-') ++pos;
+  while (pos < json.size() && (std::isdigit((unsigned char)json[pos]) || json[pos] == '.')) ++pos;
+  if (pos == start) return false;
+  out = std::stof(json.substr(start, pos - start));
+  return true;
+}
+
+// Load settings from disk into cfg.  Fields present in the file overwrite
+// the defaults already in cfg; fields absent are left at their defaults.
+// Silently succeeds if the file doesn't exist yet.
+static void load_settings(NitroConfig &cfg) {
+  std::string path = settings_path();
+  std::ifstream f(path);
+  if (!f) return;                  // no file → use defaults
+  std::ostringstream oss; oss << f.rdbuf();
+  std::string json = oss.str();
+
+  // String fields
+  settings_get_str(json, "model_path",  cfg.model_path);
+  settings_get_str(json, "embed_path",  cfg.embed_path);
+  settings_get_str(json, "sandbox",     cfg.sandbox);
+
+  // Integer fields
+  settings_get_int(json, "n_ctx",          cfg.n_ctx);
+  settings_get_int(json, "n_batch",        cfg.n_batch);
+  settings_get_int(json, "n_gpu_layers",   cfg.n_gpu_layers);
+  settings_get_int(json, "n_max_tokens",   cfg.n_max_tokens);
+  settings_get_int(json, "top_k",          cfg.top_k);
+  settings_get_int(json, "penalty_last_n", cfg.penalty_last_n);
+  settings_get_int(json, "rag_top_k",      cfg.rag_top_k);
+
+  // Float fields
+  settings_get_float(json, "temperature",    cfg.temperature);
+  settings_get_float(json, "top_p",          cfg.top_p);
+  settings_get_float(json, "min_p",          cfg.min_p);
+  settings_get_float(json, "penalty_repeat", cfg.penalty_repeat);
+}
+
+// Escape a string for embedding in JSON.
+static std::string json_escape(const std::string &s) {
+  std::string out;
+  out.reserve(s.size() + 4);
+  for (char c : s) {
+    switch (c) {
+      case '"':  out += "\\\""; break;
+      case '\\': out += "\\\\"; break;
+      case '\n': out += "\\n";  break;
+      case '\t': out += "\\t";  break;
+      default:   out += c;      break;
+    }
+  }
+  return out;
+}
+
+// Persist the current cfg to ~/.config/nitro.settings.json.
+static bool save_settings(const NitroConfig &cfg) {
+  std::string path = settings_path();
+  // Ensure ~/.config/ exists
+  fs::path dir = fs::path(path).parent_path();
+  std::error_code ec;
+  fs::create_directories(dir, ec);
+
+  std::ofstream f(path, std::ios::trunc);
+  if (!f) return false;
+
+  f << "{\n";
+  f << "  \"model_path\":    \"" << json_escape(cfg.model_path)  << "\",\n";
+  f << "  \"embed_path\":    \"" << json_escape(cfg.embed_path)   << "\",\n";
+  f << "  \"sandbox\":       \"" << json_escape(cfg.sandbox)      << "\",\n";
+  f << "  \"n_ctx\":          " << cfg.n_ctx          << ",\n";
+  f << "  \"n_batch\":        " << cfg.n_batch         << ",\n";
+  f << "  \"n_gpu_layers\":   " << cfg.n_gpu_layers    << ",\n";
+  f << "  \"n_max_tokens\":   " << cfg.n_max_tokens    << ",\n";
+  f << "  \"temperature\":    " << cfg.temperature     << ",\n";
+  f << "  \"top_p\":          " << cfg.top_p           << ",\n";
+  f << "  \"min_p\":          " << cfg.min_p           << ",\n";
+  f << "  \"top_k\":          " << cfg.top_k           << ",\n";
+  f << "  \"penalty_repeat\": " << cfg.penalty_repeat  << ",\n";
+  f << "  \"penalty_last_n\": " << cfg.penalty_last_n  << ",\n";
+  f << "  \"rag_top_k\":      " << cfg.rag_top_k       << "\n";
+  f << "}\n";
+
+  return f.good();
+}
+
 // ═══════════════════════════════════════════════════════════════════════════
 // Notcurses TUI
 // ═══════════════════════════════════════════════════════════════════════════
@@ -115,7 +382,6 @@ struct NitroConfig {
 //  │ ─────────────────────────────────────  (separator)                  │
 //  │ ❯ input                                                             │
 //  └─────────────────────────────────────────────────────────────────────┘
-
 struct TuiState {
   // ── notcurses handles ──────────────────────────────────────────────
   struct notcurses *nc      = nullptr;
@@ -123,77 +389,71 @@ struct TuiState {
   struct ncplane   *header  = nullptr;
   struct ncplane   *chatpl  = nullptr;
   struct ncplane   *inputpl = nullptr;
-
   // ── chat buffer ───────────────────────────────────────────────────
   std::vector<std::string> chat_lines;
-  int scroll_offset = 0;       // lines scrolled up from bottom (0 = pinned)
+  int scroll_offset = 0;
   std::mutex lines_mutex;
-
   // ── streaming accumulator ─────────────────────────────────────────
-  // Tokens arrive without newlines; we accumulate here and flush on \n.
   std::string token_acc;
-
   // ── input ─────────────────────────────────────────────────────────
   std::string input_buf;
   size_t      cursor_pos = 0;
-
-  // ── status bar values (written by agent loop) ─────────────────────
+  // ── status bar values ─────────────────────────────────────────────
   std::string current_model  = "none";
   float       tokens_per_sec = 0.0f;
   int         kv_used        = 0;
   int         kv_total       = 1;
   size_t      vram_used      = 0;
   size_t      vram_total     = 1;
-
   int term_rows = 0;
   int term_cols = 0;
-
   // ── thinking spinner ──────────────────────────────────────────────
   bool    thinking      = false;
   int     spinner_frame = 0;
+  // ── input history ─────────────────────────────────────────────────
+  InputHistory history;
   // Advance spinner by one frame and redraw the header.
   void tick_spinner();
   // Toggle thinking mode; redraws header immediately.
   void set_thinking(bool on);
-
   // ── lifecycle ─────────────────────────────────────────────────────
   void init();
   void destroy();
   void resize();
-
   // ── draw ──────────────────────────────────────────────────────────
   void redraw_header();
   void redraw_chat();
   void redraw_input();
   void redraw_all();
-
   // ── content helpers ───────────────────────────────────────────────
-  // Append a complete line (wraps at terminal width, colour-coded by prefix).
   void append_line(const std::string &line);
-  // Feed a streaming token fragment; flushes complete lines on \n.
   void append_token(const std::string &token);
-  // Flush whatever is left in token_acc as a final line.
   void flush_token_acc();
-
   // ── interaction ───────────────────────────────────────────────────
-  // Show a YES/NO confirm dialog in the input plane; writes "YES" or "NO".
   void confirm_dialog(const std::string &prompt, std::string &result);
-  // Blocking readline with cursor, arrow-key scrolling, basic editing.
+  // Blocking readline with history navigation, cursor, arrow-key scrolling.
   std::string readline_blocking();
+  // Modal popup overlay while a long operation runs.
+  // Call show_modal_popup to display; dismiss_modal_popup to remove.
+  // The popup plane is stored in modal_plane; callers hold it as an opaque
+  // handle — or just use the paired helpers below.
+  struct ncplane *modal_plane = nullptr;
+  void show_modal_popup(const std::string &message);
+  void dismiss_modal_popup();
+  // ── RAG folder picker popup ───────────────────────────────────────
+  // Presents an interactive directory browser to let the user choose a
+  // folder (or file) to index.  Returns the selected path, or empty string
+  // if the user cancelled.
+  std::string rag_folder_picker(const std::string &start_dir);
 };
-
 // ─── colour helpers ──────────────────────────────────────────────────────
-// Our dark background colours (must match ncplane_set_base values in init).
 static constexpr uint32_t BG_CHAT_R = 18,  BG_CHAT_G = 22,  BG_CHAT_B = 30;
 static constexpr uint32_t BG_INP_R  = 22,  BG_INP_G  = 28,  BG_INP_B  = 38;
 static constexpr uint32_t BG_HDR_R  = 30,  BG_HDR_G  = 40,  BG_HDR_B  = 55;
 
-// fg only (use only where bg is already set via ncplane_set_base)
 static inline uint64_t fg_rgb(uint32_t r, uint32_t g, uint32_t b) {
   return NCCHANNELS_INITIALIZER(r, g, b, 0, 0, 0);
 }
-// fg + explicit bg — use this for all ncplane_set_channels calls so the
-// background behind each glyph matches the plane's base colour exactly.
 static inline uint64_t chat_ch(uint32_t r, uint32_t g, uint32_t b) {
   return NCCHANNELS_INITIALIZER(r, g, b, BG_CHAT_R, BG_CHAT_G, BG_CHAT_B);
 }
@@ -203,32 +463,22 @@ static inline uint64_t inp_ch(uint32_t r, uint32_t g, uint32_t b) {
 static inline uint64_t hdr_ch(uint32_t r, uint32_t g, uint32_t b) {
   return NCCHANNELS_INITIALIZER(r, g, b, BG_HDR_R, BG_HDR_G, BG_HDR_B);
 }
-
 // ─── TuiState::init ──────────────────────────────────────────────────────
-
 void TuiState::init() {
   notcurses_options opts{};
   opts.flags = NCOPTION_SUPPRESS_BANNERS;
   nc = notcurses_init(&opts, nullptr);
   if (!nc) { std::fputs("notcurses_init failed\n", stderr); std::exit(1); }
-
   stdpl = notcurses_stdplane(nc);
   notcurses_term_dim_yx(nc, (unsigned *)&term_rows, (unsigned *)&term_cols);
-
-  // Fill the entire terminal with our dark background before creating
-  // child planes — eliminates the "terminal colour showing through" artefact.
   uint64_t bg = NCCHANNELS_INITIALIZER(BG_CHAT_R, BG_CHAT_G, BG_CHAT_B,
                                         BG_CHAT_R, BG_CHAT_G, BG_CHAT_B);
   ncplane_set_base(stdpl, " ", 0, bg);
   ncplane_erase(stdpl);
-
-  // Header: row 0
   ncplane_options hopt{};
   hopt.y = 0; hopt.x = 0;
   hopt.rows = 1; hopt.cols = (unsigned)term_cols;
   header = ncplane_create(stdpl, &hopt);
-
-  // Chat pane: rows 1 … term_rows-3
   int chat_rows = std::max(1, term_rows - 3);
   ncplane_options copt{};
   copt.y = 1; copt.x = 0;
@@ -237,8 +487,6 @@ void TuiState::init() {
   ncplane_set_base(chatpl, " ", 0,
     NCCHANNELS_INITIALIZER(BG_CHAT_R, BG_CHAT_G, BG_CHAT_B,
                             BG_CHAT_R, BG_CHAT_G, BG_CHAT_B));
-
-  // Input pane: last 2 rows
   ncplane_options iopt{};
   iopt.y = term_rows - 2; iopt.x = 0;
   iopt.rows = 2; iopt.cols = (unsigned)term_cols;
@@ -246,7 +494,6 @@ void TuiState::init() {
   ncplane_set_base(inputpl, " ", 0,
     NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B,
                             BG_INP_R, BG_INP_G, BG_INP_B));
-
   redraw_all();
 }
 
@@ -265,27 +512,21 @@ void TuiState::resize() {
 }
 
 // ─── TuiState::redraw_* ──────────────────────────────────────────────────
-
 void TuiState::redraw_header() {
   ncplane_erase(header);
   ncplane_set_base(header, " ", 0,
     NCCHANNELS_INITIALIZER(BG_HDR_R, BG_HDR_G, BG_HDR_B,
                             BG_HDR_R, BG_HDR_G, BG_HDR_B));
-
   float kv_pct   = kv_total   > 0 ? 100.f * (float)kv_used   / (float)kv_total   : 0.f;
   float vram_pct = vram_total  > 0 ? 100.f * (float)vram_used / (float)vram_total : 0.f;
-
-  // Spinner: braille dots rotate smoothly, clearly visible on the header
   static const char *const SPIN[] = { "⣾","⣽","⣻","⢿","⡿","⣟","⣯","⣷" };
   const char *spin_str = thinking ? SPIN[spinner_frame % 8] : " ";
-
   char buf[512];
   int n = std::snprintf(buf, sizeof(buf),
     " ✦ NITRO  │ %-32s │ %5.1f tok/s │ KV %4.1f%%  VRAM %4.1f%%  %s",
     current_model.c_str(), (double)tokens_per_sec,
     (double)kv_pct, (double)vram_pct, spin_str);
   if (n > term_cols) buf[term_cols] = '\0';
-
   ncplane_set_channels(header, hdr_ch(130, 220, 200));
   ncplane_putstr_yx(header, 0, 0, buf);
 }
@@ -294,16 +535,13 @@ void TuiState::redraw_chat() {
   ncplane_erase(chatpl);
   unsigned rows, cols;
   ncplane_dim_yx(chatpl, &rows, &cols);
-
   std::lock_guard<std::mutex> lk(lines_mutex);
   int total   = (int)chat_lines.size();
   int visible = (int)rows;
   int start   = std::max(0, total - visible - scroll_offset);
   int end     = std::min(total, start + visible);
-
   for (int i = start, row = 0; i < end; ++i, ++row) {
     const std::string &line = chat_lines[i];
-
     uint64_t ch;
     if      (line.rfind("You: ",   0) == 0) ch = chat_ch(100, 200, 255);
     else if (line.rfind("Nitro: ", 0) == 0) ch = chat_ch(180, 255, 180);
@@ -311,7 +549,6 @@ void TuiState::redraw_chat() {
     else if (line.rfind("[err]",   0) == 0) ch = chat_ch(255,  80,  80);
     else if (line.rfind("[sys]",   0) == 0) ch = chat_ch(140, 140, 200);
     else                                     ch = chat_ch(210, 210, 210);
-
     ncplane_set_channels(chatpl, ch);
     std::string display = line.size() > cols ? line.substr(0, cols) : line;
     ncplane_putstr_yx(chatpl, row, 0, display.c_str());
@@ -320,50 +557,34 @@ void TuiState::redraw_chat() {
 
 void TuiState::redraw_input() {
   ncplane_erase(inputpl);
-
-  // Separator
   ncplane_set_channels(inputpl, inp_ch(80, 120, 160));
-  std::string sep(term_cols, '─');
+  std::string sep(term_cols, '-');
   ncplane_putstr_yx(inputpl, 0, 0, sep.c_str());
-
-  // Prompt
   const std::string prompt = " ❯ ";
   const int prompt_cols = 4;
   ncplane_set_channels(inputpl, inp_ch(100, 210, 255));
   ncplane_putstr_yx(inputpl, 1, 0, prompt.c_str());
-
-  // Buffer — split at cursor so we can render the cursor cell distinctly
   int max_w = std::max(0, term_cols - prompt_cols - 1);
-
-  // Viewport: if buffer is wider than the available space, show the tail
   std::string visible = input_buf;
   int view_offset = 0;
   if ((int)visible.size() > max_w && max_w > 0) {
     view_offset = (int)visible.size() - max_w;
     visible = visible.substr(view_offset);
   }
-
-  // Text before cursor
   int cur_in_view = std::max(0, (int)cursor_pos - view_offset);
   cur_in_view = std::min(cur_in_view, (int)visible.size());
-
   std::string before = visible.substr(0, cur_in_view);
   std::string after  = cur_in_view < (int)visible.size()
                          ? visible.substr(cur_in_view + 1) : "";
-  char cursor_ch = cur_in_view < (int)visible.size()
+  char cursor_ch_val = cur_in_view < (int)visible.size()
                      ? visible[cur_in_view] : ' ';
-
   ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
   ncplane_putstr_yx(inputpl, 1, prompt_cols, before.c_str());
-
-  // Cursor cell: bright bg, dark text — stands out against the input bg
   int cx = prompt_cols + cur_in_view;
   ncplane_set_channels(inputpl,
     NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B, 180, 230, 255));
-  char cbuf[2] = { cursor_ch, '\0' };
+  char cbuf[2] = { cursor_ch_val, '\0' };
   ncplane_putstr_yx(inputpl, 1, cx, cbuf);
-
-  // Text after cursor
   ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
   if (!after.empty())
     ncplane_putstr_yx(inputpl, 1, cx + 1, after.c_str());
@@ -390,7 +611,6 @@ void TuiState::set_thinking(bool on) {
 }
 
 // ─── TuiState content helpers ─────────────────────────────────────────────
-
 void TuiState::append_line(const std::string &line) {
   std::lock_guard<std::mutex> lk(lines_mutex);
   int w = std::max(1, term_cols - 1);
@@ -423,15 +643,279 @@ void TuiState::flush_token_acc() {
   }
 }
 
-// ─── TuiState::confirm_dialog ─────────────────────────────────────────────
+// ─── TuiState::show_modal_popup / dismiss_modal_popup ─────────────────────
+// Creates a centred floating plane with a border and a status message.
+// The popup sits above all other planes and blocks until explicitly dismissed.
+void TuiState::show_modal_popup(const std::string &message) {
+  // Dismiss any previous popup first.
+  dismiss_modal_popup();
+
+  // Clamp popup size to terminal.
+  int popup_w = std::min((int)message.size() + 8, term_cols - 4);
+  popup_w = std::max(popup_w, 20);
+  int popup_h = 5;
+  int py = std::max(0, (term_rows - popup_h) / 2);
+  int px = std::max(0, (term_cols - popup_w) / 2);
+
+  ncplane_options opts{};
+  opts.y    = py; opts.x    = px;
+  opts.rows = (unsigned)popup_h;
+  opts.cols = (unsigned)popup_w;
+  modal_plane = ncplane_create(stdpl, &opts);
+  if (!modal_plane) return;
+
+  // Background: deep navy.
+  static constexpr uint32_t PBG_R = 20, PBG_G = 28, PBG_B = 50;
+  ncplane_set_base(modal_plane, " ", 0,
+    NCCHANNELS_INITIALIZER(PBG_R, PBG_G, PBG_B, PBG_R, PBG_G, PBG_B));
+  ncplane_erase(modal_plane);
+
+  // Border — bright cyan.
+  uint64_t border_ch = NCCHANNELS_INITIALIZER(80, 220, 255, PBG_R, PBG_G, PBG_B);
+  ncplane_set_channels(modal_plane, border_ch);
+
+  // Draw corners and edges manually so we don't require nccell border helpers.
+  // Top row
+  ncplane_putstr_yx(modal_plane, 0, 0, "╔");
+  for (int c = 1; c < popup_w - 1; ++c)
+    ncplane_putstr_yx(modal_plane, 0, c, "═");
+  ncplane_putstr_yx(modal_plane, 0, popup_w - 1, "╗");
+  // Middle rows
+  for (int r = 1; r < popup_h - 1; ++r) {
+    ncplane_putstr_yx(modal_plane, r, 0, "║");
+    ncplane_putstr_yx(modal_plane, r, popup_w - 1, "║");
+  }
+  // Bottom row
+  ncplane_putstr_yx(modal_plane, popup_h - 1, 0, "╚");
+  for (int c = 1; c < popup_w - 1; ++c)
+    ncplane_putstr_yx(modal_plane, popup_h - 1, c, "═");
+  ncplane_putstr_yx(modal_plane, popup_h - 1, popup_w - 1, "╝");
+
+  // Title bar.
+  uint64_t title_ch = NCCHANNELS_INITIALIZER(255, 220, 80, PBG_R, PBG_G, PBG_B);
+  ncplane_set_channels(modal_plane, title_ch);
+  ncplane_putstr_yx(modal_plane, 1, 2, "⏳ Loading…");
+
+  // Message.
+  uint64_t msg_ch = NCCHANNELS_INITIALIZER(200, 200, 200, PBG_R, PBG_G, PBG_B);
+  ncplane_set_channels(modal_plane, msg_ch);
+  // Truncate message to fit inside border.
+  int max_msg = popup_w - 4;
+  std::string display = message.size() > (size_t)max_msg
+                          ? message.substr(0, max_msg)
+                          : message;
+  ncplane_putstr_yx(modal_plane, 2, 2, display.c_str());
+
+  notcurses_render(nc);
+}
+
+void TuiState::dismiss_modal_popup() {
+  if (modal_plane) {
+    ncplane_destroy(modal_plane);
+    modal_plane = nullptr;
+    notcurses_render(nc);
+  }
+}
+
+// ─── TuiState::rag_folder_picker ──────────────────────────────────────────
+// Interactive directory/file browser popup.
+// Keyboard:  ↑/↓ navigate,  Enter select/descend,  Backspace go up,
+//            's' select current dir for indexing,   Esc cancel.
+// Returns the chosen path or "" on cancel.
+std::string TuiState::rag_folder_picker(const std::string &start_dir) {
+  std::string current_dir = start_dir;
+  {
+    std::error_code ec;
+    auto canon = fs::canonical(start_dir, ec);
+    if (!ec) current_dir = canon.string();
+  }
+
+  // Build an entry list for the current directory.
+  auto load_entries = [](const std::string &dir,
+                          std::vector<std::string> &entries) {
+    entries.clear();
+    std::error_code ec;
+    // Add ".." for going up (except at fs root).
+    if (fs::path(dir).has_parent_path() && fs::path(dir) != fs::path(dir).root_path())
+      entries.push_back("..");
+    // Dirs first, then files.
+    std::vector<std::string> dirs, files;
+    for (const auto &e : fs::directory_iterator(dir, ec)) {
+      if (ec) break;
+      std::string name = e.path().filename().string();
+      if (name.empty() || name[0] == '.') continue;
+      if (e.is_directory()) dirs.push_back(name);
+      else                  files.push_back(name);
+    }
+    std::sort(dirs.begin(), dirs.end());
+    std::sort(files.begin(), files.end());
+    for (auto &d : dirs)  entries.push_back(d + "/");
+    for (auto &f : files) entries.push_back(f);
+  };
+
+  std::vector<std::string> entries;
+  int selected = 0;
+  int scroll   = 0;
+
+  // Popup dimensions.
+  static constexpr int PW = 60;
+  static constexpr int PH = 20;
+  int py = std::max(0, (term_rows - PH) / 2);
+  int px = std::max(0, (term_cols - PW) / 2);
+
+  ncplane_options opts{};
+  opts.y = py; opts.x = px;
+  opts.rows = (unsigned)PH; opts.cols = (unsigned)PW;
+  struct ncplane *picker = ncplane_create(stdpl, &opts);
+  if (!picker) return "";
+
+  static constexpr uint32_t PBG_R = 18, PBG_G = 24, PBG_B = 40;
+  ncplane_set_base(picker, " ", 0,
+    NCCHANNELS_INITIALIZER(PBG_R, PBG_G, PBG_B, PBG_R, PBG_G, PBG_B));
+
+  auto draw_picker = [&]() {
+    ncplane_erase(picker);
+    uint64_t border_ch = NCCHANNELS_INITIALIZER(100, 180, 255, PBG_R, PBG_G, PBG_B);
+    ncplane_set_channels(picker, border_ch);
+    // Border
+    ncplane_putstr_yx(picker, 0, 0, "╔");
+    for (int c = 1; c < PW - 1; ++c) ncplane_putstr_yx(picker, 0, c, "═");
+    ncplane_putstr_yx(picker, 0, PW - 1, "╗");
+    for (int r = 1; r < PH - 1; ++r) {
+      ncplane_putstr_yx(picker, r, 0,      "║");
+      ncplane_putstr_yx(picker, r, PW - 1, "║");
+    }
+    ncplane_putstr_yx(picker, PH - 1, 0, "╚");
+    for (int c = 1; c < PW - 1; ++c) ncplane_putstr_yx(picker, PH - 1, c, "═");
+    ncplane_putstr_yx(picker, PH - 1, PW - 1, "╝");
+
+    // Title
+    ncplane_set_channels(picker,
+      NCCHANNELS_INITIALIZER(255, 220, 80, PBG_R, PBG_G, PBG_B));
+    ncplane_putstr_yx(picker, 0, 2, " 📂 RAG Folder Picker ");
+
+    // Current path (truncated to fit)
+    std::string path_display = current_dir;
+    if ((int)path_display.size() > PW - 4)
+      path_display = "…" + path_display.substr(path_display.size() - (PW - 5));
+    ncplane_set_channels(picker,
+      NCCHANNELS_INITIALIZER(160, 200, 240, PBG_R, PBG_G, PBG_B));
+    ncplane_putstr_yx(picker, 1, 2, path_display.c_str());
+
+    // Hint line
+    ncplane_set_channels(picker,
+      NCCHANNELS_INITIALIZER(120, 120, 160, PBG_R, PBG_G, PBG_B));
+    ncplane_putstr_yx(picker, PH - 2, 2,
+      "↑↓ navigate  Enter open  s=select dir  Esc cancel");
+
+    // Entry list
+    int list_rows = PH - 5;   // rows 2 … PH-4 available
+    // Clamp scroll so selected stays visible
+    if (selected < scroll) scroll = selected;
+    if (selected >= scroll + list_rows) scroll = selected - list_rows + 1;
+
+    for (int i = 0; i < list_rows; ++i) {
+      int idx = scroll + i;
+      if (idx >= (int)entries.size()) break;
+      bool is_selected = (idx == selected);
+      bool is_dir = !entries[idx].empty() && entries[idx].back() == '/';
+      uint32_t fr, fg, fb;
+      if (is_selected)            { fr = 20;  fg = 20;  fb = 20;  }
+      else if (is_dir)             { fr = 120; fg = 200; fb = 255; }
+      else                         { fr = 200; fg = 200; fb = 200; }
+      uint32_t br = is_selected ? 100 : PBG_R;
+      uint32_t bg = is_selected ? 180 : PBG_G;
+      uint32_t bb = is_selected ? 255 : PBG_B;
+      ncplane_set_channels(picker,
+        NCCHANNELS_INITIALIZER(fr, fg, fb, br, bg, bb));
+      // Pad entry to fill width
+      std::string label = (is_selected ? " ▶ " : "   ") + entries[idx];
+      if ((int)label.size() > PW - 2) label = label.substr(0, PW - 2);
+      while ((int)label.size() < PW - 2) label += ' ';
+      ncplane_putstr_yx(picker, 2 + i, 1, label.c_str());
+    }
+    notcurses_render(nc);
+  };
+
+  std::string result;
+  load_entries(current_dir, entries);
+  draw_picker();
 
+  for (;;) {
+    ncinput ni{};
+    notcurses_get_blocking(nc, &ni);
+    if (ni.id == NCKEY_ESC) {
+      break;  // cancelled
+    }
+    if (ni.id == NCKEY_UP) {
+      if (selected > 0) --selected;
+      draw_picker();
+      continue;
+    }
+    if (ni.id == NCKEY_DOWN) {
+      if (selected + 1 < (int)entries.size()) ++selected;
+      draw_picker();
+      continue;
+    }
+    if (ni.id == 's' || ni.id == 'S') {
+      // Select current directory for RAG indexing.
+      result = current_dir;
+      break;
+    }
+    if (ni.id == NCKEY_BACKSPACE || ni.id == 127) {
+      // Go up one level.
+      fs::path p(current_dir);
+      if (p.has_parent_path() && p != p.root_path()) {
+        current_dir = p.parent_path().string();
+        load_entries(current_dir, entries);
+        selected = 0; scroll = 0;
+        draw_picker();
+      }
+      continue;
+    }
+    if (ni.id == NCKEY_ENTER || ni.id == '\r' || ni.id == '\n') {
+      if (entries.empty()) continue;
+      const std::string &entry = entries[selected];
+      if (entry == "..") {
+        fs::path p(current_dir);
+        if (p.has_parent_path() && p != p.root_path()) {
+          current_dir = p.parent_path().string();
+          load_entries(current_dir, entries);
+          selected = 0; scroll = 0;
+          draw_picker();
+        }
+      } else if (!entry.empty() && entry.back() == '/') {
+        // Descend into directory.
+        current_dir = current_dir + "/" + entry.substr(0, entry.size() - 1);
+        {
+          std::error_code ec;
+          auto canon = fs::canonical(current_dir, ec);
+          if (!ec) current_dir = canon.string();
+        }
+        load_entries(current_dir, entries);
+        selected = 0; scroll = 0;
+        draw_picker();
+      } else {
+        // Select a specific file.
+        result = current_dir + "/" + entry;
+        break;
+      }
+      continue;
+    }
+  }
+
+  ncplane_destroy(picker);
+  notcurses_render(nc);
+  return result;
+}
+
+// ─── TuiState::confirm_dialog ─────────────────────────────────────────────
 void TuiState::confirm_dialog(const std::string &prompt, std::string &result) {
   ncplane_erase(inputpl);
   ncplane_set_channels(inputpl, inp_ch(255, 200, 80));
   std::string msg = " " + prompt + " [y/n] ❯ ";
   ncplane_putstr_yx(inputpl, 1, 0, msg.c_str());
   notcurses_render(nc);
-
   std::string answer;
   for (;;) {
     ncinput ni{};
@@ -439,13 +923,11 @@ void TuiState::confirm_dialog(const std::string &prompt, std::string &result) {
     if (ni.id == NCKEY_ENTER || ni.id == '\r' || ni.id == '\n') break;
     if (ni.id == NCKEY_BACKSPACE && !answer.empty()) { answer.pop_back(); }
     else if (ni.id >= 32 && ni.id < 127) { answer += (char)ni.id; }
-
     ncplane_erase(inputpl);
     ncplane_set_channels(inputpl, inp_ch(255, 200, 80));
     ncplane_putstr_yx(inputpl, 1, 0, (msg + answer).c_str());
     notcurses_render(nc);
   }
-
   std::string lo = answer;
   std::transform(lo.begin(), lo.end(), lo.begin(), ::tolower);
   result = (lo == "y" || lo == "yes" || lo == "sure" || lo == "k") ? "YES" : "NO";
@@ -454,23 +936,83 @@ void TuiState::confirm_dialog(const std::string &prompt, std::string &result) {
 }
 
 // ─── TuiState::readline_blocking ──────────────────────────────────────────
-
+// Integrates InputHistory:  Up/Down arrows navigate the history stack.
+// On submit the entry is pushed to history, and nav is reset.
 std::string TuiState::readline_blocking() {
   input_buf.clear();
   cursor_pos = 0;
+  history.reset_nav();
   redraw_input();
   notcurses_render(nc);
 
+  // Temporary saved draft so Down from history restores the user's current text.
+  std::string draft;
+
   for (;;) {
     ncinput ni{};
     notcurses_get_blocking(nc, &ni);
 
     if (ni.id == NCKEY_ENTER || ni.id == '\r' || ni.id == '\n') {
       std::string result = input_buf;
-      input_buf.clear(); cursor_pos = 0;
-      redraw_input(); notcurses_render(nc);
+      if (!result.empty()) {
+        history.push(result);
+      }
+      input_buf.clear();
+      cursor_pos = 0;
+      redraw_input();
+      notcurses_render(nc);
       return result;
     }
+
+    if (ni.id == NCKEY_UP) {
+      // Entering history from a fresh prompt: save current text as draft.
+      std::string hist_entry;
+      if (history.up(hist_entry)) {
+        if (input_buf.size() > 0 && hist_entry != input_buf) {
+          // Only save draft when we first leave the bottom of history.
+          // (history.reset_nav was called on entry so the first Up call
+          //  always comes from the "new input" position.)
+          draft = input_buf;
+        }
+        input_buf  = hist_entry;
+        cursor_pos = input_buf.size();
+      }
+      redraw_input();
+      notcurses_render(nc);
+      continue;
+    }
+
+    if (ni.id == NCKEY_DOWN) {
+      std::string hist_entry;
+      bool got = history.down(hist_entry);
+      if (got) {
+        input_buf  = hist_entry;
+        cursor_pos = input_buf.size();
+      } else {
+        // Past the newest entry → restore draft.
+        input_buf  = draft;
+        cursor_pos = input_buf.size();
+        draft.clear();
+      }
+      redraw_input();
+      notcurses_render(nc);
+      continue;
+    }
+
+    // Scroll the chat pane — not the input history.
+    if (ni.id == NCKEY_PGUP) {
+      scroll_offset += std::max(1, term_rows - 4);
+      redraw_chat();
+      notcurses_render(nc);
+      continue;
+    }
+    if (ni.id == NCKEY_PGDOWN) {
+      scroll_offset = std::max(0, scroll_offset - std::max(1, term_rows - 4));
+      redraw_chat();
+      notcurses_render(nc);
+      continue;
+    }
+
     if (ni.id == NCKEY_BACKSPACE || ni.id == 127) {
       if (cursor_pos > 0) { input_buf.erase(cursor_pos - 1, 1); --cursor_pos; }
     } else if (ni.id == NCKEY_LEFT) {
@@ -481,79 +1023,46 @@ std::string TuiState::readline_blocking() {
       cursor_pos = 0;
     } else if (ni.id == NCKEY_END) {
       cursor_pos = input_buf.size();
-    } else if (ni.id == NCKEY_UP) {
-      ++scroll_offset; redraw_chat(); notcurses_render(nc); continue;
-    } else if (ni.id == NCKEY_DOWN) {
-      if (scroll_offset > 0) --scroll_offset;
-      redraw_chat(); notcurses_render(nc); continue;
+    } else if (ni.id == NCKEY_DEL) {
+      if (cursor_pos < input_buf.size()) input_buf.erase(cursor_pos, 1);
     } else if (ni.id >= 32 && ni.id < 0xD800) {
+      // Any printable character — entering new text clears the nav draft
+      // so that Down won't resurrect a stale saved buffer.
+      draft.clear();
+      history.reset_nav();
       input_buf.insert(cursor_pos, 1, (char)ni.id);
       ++cursor_pos;
     }
+
     redraw_input();
     notcurses_render(nc);
   }
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
-// AgentState  —  thin owner of Llama + LlamaIter + optional RAG objects.
-//
-// Design:
-//   • Llama is created once; settings are applied before load_model().
-//   • iter is an std::optional so we can construct it lazily (LlamaIter has
-//     no default-construct-then-assign path once _has_next is false; we just
-//     move a fresh one in via add_message()).
-//   • reset_conversation() calls llama.reset() which clears the KV cache,
-//     then re-injects the system prompt as the first message of the new turn.
-//   • run_turn() mirrors the SB main() loop exactly:
-//       while iter.has_next → next() → accumulate line → on TOOL: dispatch
+// AgentState
 // ═══════════════════════════════════════════════════════════════════════════
-
 struct AgentState {
   Llama llama;
-
-  // iter is valid (has_next may be false) after the first add_message call.
-  // We use a pointer so it can be replaced by move.
   std::unique_ptr<LlamaIter> iter;
-
-  // Separate Llama instance for embeddings (optional)
   std::unique_ptr<Llama> embed_llama;
-
-  // RAG objects
   std::unique_ptr<RagDB>      rag_db;
   std::unique_ptr<RagSession> rag_session;
-
   bool model_loaded = false;
   std::string system_prompt;
 
-  // ── setup ─────────────────────────────────────────────────────────
   bool setup_model(const NitroConfig &cfg, TuiState &tui);
   bool setup_embed(const std::string &path, TuiState &tui);
   void apply_generation_params(const NitroConfig &cfg);
-
-  // ── conversation management ───────────────────────────────────────
-  // Injects the system prompt as a fresh first turn.
-  // Call after setup_model() or whenever /clear is issued.
   void reset_conversation(const std::string &sysprompt, TuiState &tui);
-
-  // ── generation ────────────────────────────────────────────────────
-  // Returns false on fatal error.
   bool run_turn(const std::string &user_message,
                 const NitroConfig &cfg,
                 TuiState          &tui);
-
-  // ── RAG ───────────────────────────────────────────────────────────
   bool rag_index(const std::string &path, TuiState &tui);
-
-  // ── status ────────────────────────────────────────────────────────
   std::string memory_info_text();
-
-  // Compute tok/s from iter (matches SB iter.tokens_sec() idiom)
   float tokens_per_sec() const;
 };
 
-// ─── AgentState::setup_model ──────────────────────────────────────────────
-
 void AgentState::apply_generation_params(const NitroConfig &cfg) {
   llama.add_stop("<|turn|>");
   llama.add_stop("<|im_end|>");
@@ -567,29 +1076,32 @@ void AgentState::apply_generation_params(const NitroConfig &cfg) {
   llama.set_log_level(cfg.log_level);
 }
 
+// ─── AgentState::setup_model ──────────────────────────────────────────────
+// Shows a modal loading popup while the model loads.
 bool AgentState::setup_model(const NitroConfig &cfg, TuiState &tui) {
   if (cfg.model_path.empty()) {
     tui.append_line("[sys] No model loaded.  Use /model <path> to load a GGUF.");
     tui.redraw_all();
     return false;
   }
+  // Show a modal popup so the user knows loading is in progress.
+  std::string model_name = fs::path(cfg.model_path).filename().string();
+  tui.show_modal_popup("Loading " + model_name);
 
-  // reset() clears any previous KV state cleanly
   llama.reset();
   apply_generation_params(cfg);
-
   if (!llama.load_model(cfg.model_path, cfg.n_ctx, cfg.n_batch,
                          cfg.n_gpu_layers, cfg.log_level)) {
+    tui.dismiss_modal_popup();
     tui.append_line(std::string("[err] ") + llama.last_error());
     tui.redraw_all();
     return false;
   }
+  tui.dismiss_modal_popup();
 
   model_loaded = true;
-  tui.current_model = fs::path(cfg.model_path).filename().string();
+  tui.current_model = model_name;
   tui.append_line("[sys] Model ready: " + tui.current_model);
-
-  // Show memory advice (mirrors SB: print GREEN + mem.advice)
   LlamaMemoryInfo mem = llama.memory_info();
   tui.append_line("[sys] " + mem.advice);
   tui.kv_used  = mem.kv_used;
@@ -601,15 +1113,17 @@ bool AgentState::setup_model(const NitroConfig &cfg, TuiState &tui) {
 }
 
 bool AgentState::setup_embed(const std::string &path, TuiState &tui) {
-  tui.append_line("[sys] Loading embedding model: " + path);
+  tui.show_modal_popup("Loading embedding model: " + fs::path(path).filename().string());
   tui.redraw_all();
   embed_llama = std::make_unique<Llama>();
   if (!embed_llama->load_embedding_model(path)) {
+    tui.dismiss_modal_popup();
     tui.append_line(std::string("[err] ") + embed_llama->last_error());
     tui.redraw_all();
     embed_llama.reset();
     return false;
   }
+  tui.dismiss_modal_popup();
   rag_db      = std::make_unique<RagDB>();
   rag_session = std::make_unique<RagSession>();
   tui.append_line("[sys] Embedding model ready.");
@@ -617,15 +1131,10 @@ bool AgentState::setup_embed(const std::string &path, TuiState &tui) {
   return true;
 }
 
-// ─── AgentState::reset_conversation ──────────────────────────────────────
-// Mirrors the SB pattern:
-//   local iter = llama.add_message("system", initialize_agent())
-
 void AgentState::reset_conversation(const std::string &sysprompt, TuiState &tui) {
   system_prompt = sysprompt;
-  llama.reset();                    // clears KV cache + sampler state
-  apply_generation_params(NitroConfig{}); // re-apply stops / params after reset
-
+  llama.reset();
+  apply_generation_params(NitroConfig{});
   iter = std::make_unique<LlamaIter>();
   if (!llama.add_message(*iter, "system", system_prompt)) {
     tui.append_line(std::string("[err] System prompt injection: ") + llama.last_error());
@@ -633,11 +1142,6 @@ void AgentState::reset_conversation(const std::string &sysprompt, TuiState &tui)
   }
 }
 
-// ─── AgentState::tokens_per_sec ──────────────────────────────────────────
-// LlamaIter stores _t_start and _tokens_generated; we replicate the SB
-// iter.tokens_sec() calculation here since LlamaIter doesn't expose it
-// as a method in the public header.
-
 float AgentState::tokens_per_sec() const {
   if (!iter) return 0.0f;
   auto now = std::chrono::high_resolution_clock::now();
@@ -646,8 +1150,6 @@ float AgentState::tokens_per_sec() const {
   return (float)(iter->_tokens_generated / elapsed);
 }
 
-// ─── AgentState::memory_info_text ────────────────────────────────────────
-
 std::string AgentState::memory_info_text() {
   if (!model_loaded) return "No model loaded.";
   LlamaMemoryInfo m = llama.memory_info();
@@ -664,15 +1166,12 @@ std::string AgentState::memory_info_text() {
   return oss.str();
 }
 
-// ─── AgentState::rag_index ───────────────────────────────────────────────
-
 bool AgentState::rag_index(const std::string &path, TuiState &tui) {
   if (!embed_llama || !rag_db) {
     tui.append_line("[err] Load an embedding model first: /embed <path>");
     tui.redraw_all();
     return false;
   }
-
   auto index_one = [&](const std::string &filepath) {
     tui.append_line("[sys]   indexing: " + filepath);
     tui.redraw_all();
@@ -681,7 +1180,6 @@ bool AgentState::rag_index(const std::string &path, TuiState &tui) {
       tui.redraw_all();
     }
   };
-
   fs::path rp(path);
   std::error_code ec;
   if (fs::is_directory(rp, ec)) {
@@ -695,20 +1193,8 @@ bool AgentState::rag_index(const std::string &path, TuiState &tui) {
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
-// Agent turn — mirrors SB main() loop
-//
-// SB pattern (condensed):
-//   while iter.has_next()
-//     buffer += iter.next()
-//     if newline in buffer:
-//       line = left side of buffer
-//       if TOOL: → line += buffer + iter.all()
-//                  iter = llama.add_message("tool", process_tool(line))
-//       else print line
-//   if remaining buffer is TOOL: → process it
-//   else flush remaining buffer
+// Agent turn
 // ═══════════════════════════════════════════════════════════════════════════
-
 bool AgentState::run_turn(const std::string &user_message,
                            const NitroConfig &cfg,
                            TuiState          &tui) {
@@ -717,9 +1203,6 @@ bool AgentState::run_turn(const std::string &user_message,
     tui.redraw_all();
     return false;
   }
-
-  // ── optional RAG context injection ───────────────────────────────
-  // If we have a RAG session, prepend retrieved context to the user message.
   std::string effective_message = user_message;
   if (embed_llama && rag_db && rag_session) {
     std::string context = llama.rag_retrieve(*rag_db, user_message,
@@ -728,68 +1211,51 @@ bool AgentState::run_turn(const std::string &user_message,
       effective_message = "Context:\n" + context + "\n\nUser: " + user_message;
     }
   }
-
-  // ── inject user message ───────────────────────────────────────────
-  // iter must already exist (reset_conversation initialises it with "system").
-  // add_message("user", …) appends to the existing KV context.
   if (!iter) {
     tui.append_line("[err] Conversation not initialised (call /clear to reset)");
     tui.redraw_all();
     return false;
   }
-
   if (!llama.add_message(*iter, "user", effective_message)) {
     tui.append_line(std::string("[err] add_message: ") + llama.last_error());
     tui.redraw_all();
     return false;
   }
-
-  // ── label the assistant response in the chat pane ────────────────
   tui.append_line("Nitro: ");
   tui.set_thinking(true);
 
-  // ── generation loop ───────────────────────────────────────────────
-  bool in_think = false;
-  std::string buffer;      // accumulates tokens until we see a newline
+  bool in_think = true;
+  std::string buffer;
 
-  // Scan buffer for think open/close tags anywhere in the text.
-  // Models use either <think> or <|think|> depending on the template.
   auto update_think_state = [&](const std::string &text) {
-    // Opening tags
     if (text.find("<think>")    != std::string::npos ||
         text.find("<|think|>")  != std::string::npos)  in_think = true;
-    // Closing tags — check after open so a single-line <think>…</think> ends correctly
     if (text.find("</think>")   != std::string::npos ||
         text.find("</|think|>") != std::string::npos)  in_think = false;
   };
 
+  auto remove_substr = [](std::string str, const std::string& toRemove) {
+    size_t pos = str.find(toRemove);
+    while (pos != std::string::npos) {
+      str.erase(pos, toRemove.length());
+      pos = str.find(toRemove, pos);
+    }
+    return str;
+  };
+
   while (iter->_has_next) {
     std::string tok = llama.next(*iter);
     tui.tick_spinner();
-
-    // Update think state on every token so tags that arrive mid-buffer
-    // suppress display immediately rather than waiting for a newline.
     update_think_state(tok);
-
     buffer += tok;
-
     auto nl = buffer.find('\n');
     if (nl != std::string::npos) {
       std::string text_line = buffer.substr(0, nl);
       buffer = buffer.substr(nl + 1);
-
-      // Trim leading whitespace to detect TOOL: reliably
       std::string trimmed = text_line;
       trimmed.erase(0, trimmed.find_first_not_of(" \t"));
-
       if (trimmed.substr(0, 5) == "TOOL:") {
-        // Collect remainder: rest of buffer + everything iter still has.
-        // For TOOL:WRITE the content may contain newlines, so we keep
-        // the raw text and only strip newlines from the op+arg1 prefix.
         std::string tail = buffer + llama.all(*iter);
-
-        // Parse op and arg1 from trimmed (first two space-separated tokens)
-        // then treat everything after as the raw payload (preserving newlines).
         std::string op, arg1, payload;
         {
           auto s1 = trimmed.find(' ');
@@ -809,73 +1275,57 @@ bool AgentState::run_turn(const std::string &user_message,
             op = trimmed;
           }
         }
-
-        // Reconstruct the tool line.  For ops that don't carry a file payload
-        // (LIST, EXISTS, READ, DATE, TIME, RND, PERMISSION, RUN) we still
-        // collapse newlines in payload so the single-line format is preserved.
-        // For TOOL:WRITE we keep newlines in the payload intact.
         std::string tool_line;
         if (op == "TOOL:WRITE") {
           tool_line = op + " " + arg1 + " " + payload;
-          // Trim only trailing whitespace
-          while (!tool_line.empty() && tool_line.back() == '\n')
-            tool_line.pop_back();
+          while (!tool_line.empty() && tool_line.back() == '\n') tool_line.pop_back();
         } else {
           tool_line = op;
-          if (!arg1.empty())    tool_line += " " + arg1;
+          if (!arg1.empty()) tool_line += " " + arg1;
           if (!payload.empty()) {
             std::string flat = payload;
             flat.erase(std::remove(flat.begin(), flat.end(), '\n'), flat.end());
-            while (!flat.empty() && std::isspace((unsigned char)flat.back()))
-              flat.pop_back();
+            while (!flat.empty() && std::isspace((unsigned char)flat.back())) flat.pop_back();
             if (!flat.empty()) tool_line += " " + flat;
           }
         }
-
         tui.append_line("[tool] " + op + " " + arg1 +
                         (op == "TOOL:WRITE" ? " <content>" : ""));
         tui.redraw_all();
-
         std::string result = process_tool(tool_line, cfg.sandbox, tui);
-
         tui.append_line("[tool] → " +
           result.substr(0, 200) + (result.size() > 200 ? "…" : ""));
         tui.redraw_all();
-
         if (!llama.add_message(*iter, "tool", result)) {
           tui.append_line(std::string("[err] tool result inject: ") + llama.last_error());
           tui.redraw_all();
           break;
         }
         buffer.clear();
-
-      } else {
-        // Normal output line — suppress if inside think block
-        if (!in_think) {
-          tui.append_token(text_line + "\n");
-        }
+      } else if (!in_think) {
+        text_line = remove_substr(text_line, "</think>");
+        text_line = remove_substr(text_line, "</|think|>");
+        tui.append_token(text_line + "\n");
       }
     }
   }
 
-  // ── flush remaining buffer ────────────────────────────────────────
   if (!buffer.empty()) {
     std::string trimmed = buffer;
     trimmed.erase(0, trimmed.find_first_not_of(" \t"));
-
     if (trimmed.substr(0, 5) == "TOOL:") {
       std::string result = process_tool(trimmed, cfg.sandbox, tui);
       tui.append_line("[tool] → " + result.substr(0, 200));
       tui.redraw_all();
       llama.add_message(*iter, "tool", result);
-    } else {
-      if (!in_think) tui.append_token(buffer);
+    } else if (!in_think) {
+      tui.append_token(buffer);
     }
   }
+
   tui.flush_token_acc();
   tui.set_thinking(false);
 
-  // ── update status bar ─────────────────────────────────────────────
   tui.tokens_per_sec = tokens_per_sec();
   LlamaMemoryInfo mem = llama.memory_info();
   tui.kv_used    = mem.kv_used;
@@ -883,7 +1333,6 @@ bool AgentState::run_turn(const std::string &user_message,
   tui.vram_used  = mem.vram_used;
   tui.vram_total = mem.vram_total;
 
-  // ── stat line (mirrors SB: "Tokens/sec: …") ──────────────────────
   char stat[128];
   std::snprintf(stat, sizeof(stat), "[sys] %.1f tok/s  (%d tokens)  KV %.1f%%",
     (double)tui.tokens_per_sec,
@@ -897,7 +1346,6 @@ bool AgentState::run_turn(const std::string &user_message,
 // ═══════════════════════════════════════════════════════════════════════════
 // File-system helpers
 // ═══════════════════════════════════════════════════════════════════════════
-
 static std::string join_path(const std::string &a, const std::string &b) {
   if (b.empty()) return a;
   if (b[0] == '/') return b;
@@ -969,13 +1417,60 @@ static std::string strip_code_fences(const std::string &filename,
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
-// Tool dispatch  (mirrors SB process_tool)
+// TOOL:CURL — HTTP GET with libcurl, returns body text (capped at 32 KB).
 // ═══════════════════════════════════════════════════════════════════════════
+static size_t curl_write_cb(void *contents, size_t size, size_t nmemb, void *userp) {
+  std::string *buf = static_cast<std::string *>(userp);
+  size_t total = size * nmemb;
+  // Enforce a 32 KB cap to prevent flooding the context window.
+  static constexpr size_t MAX_BODY = 32 * 1024;
+  if (buf->size() < MAX_BODY) {
+    size_t room = MAX_BODY - buf->size();
+    buf->append(static_cast<char *>(contents), std::min(total, room));
+  }
+  return total;  // Return full amount so curl doesn't abort.
+}
+
+static std::string tool_curl(const std::string &url) {
+  if (url.empty()) return "ERROR: TOOL:CURL requires a URL argument";
+
+  CURL *curl = curl_easy_init();
+  if (!curl) return "ERROR: curl_easy_init failed";
+
+  std::string body;
+  body.reserve(4096);
+
+  curl_easy_setopt(curl, CURLOPT_URL,            url.c_str());
+  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,  curl_write_cb);
+  curl_easy_setopt(curl, CURLOPT_WRITEDATA,      &body);
+  curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
+  curl_easy_setopt(curl, CURLOPT_MAXREDIRS,      5L);
+  curl_easy_setopt(curl, CURLOPT_TIMEOUT,        15L);        // 15-second timeout
+  curl_easy_setopt(curl, CURLOPT_USERAGENT,      "nitro/1.0");
+  // Accept compressed responses; curl will decompress automatically.
+  curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
+
+  CURLcode res = curl_easy_perform(curl);
+  long http_code = 0;
+  curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
+  curl_easy_cleanup(curl);
+
+  if (res != CURLE_OK) {
+    return std::string("ERROR: curl: ") + curl_easy_strerror(res);
+  }
+  if (http_code >= 400) {
+    return "ERROR: HTTP " + std::to_string(http_code) + " from " + url;
+  }
+  if (body.empty()) return "(empty response)";
+  return body;
+}
 
+// ═══════════════════════════════════════════════════════════════════════════
+// Tool dispatch
+// ═══════════════════════════════════════════════════════════════════════════
 static std::string process_tool(const std::string &cmd,
                                   const std::string &sandbox,
                                   TuiState &tui) {
-  // Parse:  OP [ARG1 [REST…]]
   std::string op, arg1, arg2;
   auto sp1 = cmd.find(' ');
   if (sp1 == std::string::npos) {
@@ -983,7 +1478,6 @@ static std::string process_tool(const std::string &cmd,
   } else {
     op = cmd.substr(0, sp1);
     std::string rest = cmd.substr(sp1 + 1);
-    // ltrim
     rest.erase(0, rest.find_first_not_of(" \t"));
     auto sp2 = rest.find(' ');
     if (sp2 == std::string::npos) {
@@ -994,7 +1488,6 @@ static std::string process_tool(const std::string &cmd,
     }
   }
 
-  // Resolve arg1 into an absolute path inside the sandbox
   auto resolve = [&](const std::string &p) -> std::string {
     if (p.empty() || p == ".") return sandbox;
     if (p.substr(0, 2) == "./") return join_path(sandbox, p.substr(2));
@@ -1042,6 +1535,10 @@ static std::string process_tool(const std::string &cmd,
     tui.confirm_dialog("Allow model to proceed?", result);
     return result;
   }
+  if (op == "TOOL:CURL") {
+    // arg1 holds the URL (no sandbox restriction — network, not filesystem).
+    return tool_curl(arg1);
+  }
   if (op == "TOOL:RUN") {
     std::string prog = resolve(arg1);
     if (!path_in_sandbox(sandbox, prog)) return "ERROR: path outside sandbox";
@@ -1055,14 +1552,12 @@ static std::string process_tool(const std::string &cmd,
     if (out.size() > 4096) out = out.substr(0, 4096) + "\n…(truncated)";
     return out;
   }
-
   return "ERROR: unknown tool: " + op;
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
-// System prompt  (mirrors SB initialize_agent)
+// System prompt
 // ═══════════════════════════════════════════════════════════════════════════
-
 static std::string build_system_prompt(const std::vector<std::string> &knowledge_files,
                                         const std::string &sandbox) {
   std::string p;
@@ -1080,13 +1575,14 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
        "  TOOL:DATE                  current date\n"
        "  TOOL:TIME                  current time\n"
        "  TOOL:RND                   random float\n"
-       "  TOOL:PERMISSION            ask user for explicit permission\n\n"
+       "  TOOL:PERMISSION            ask user for explicit permission\n"
+       "  TOOL:CURL   <url>          HTTP GET; returns response body (max 32 KB)\n\n"
        "Rules:\n"
        "- Never access files outside the sandbox.\n"
        "- Use TOOL:PERMISSION before destructive or irreversible operations.\n"
+       "- Use TOOL:CURL to fetch documentation, APIs, or web content you need.\n"
        "- Reason step-by-step inside <|think|>…</|think|> (hidden from user).\n"
        "- After each tool call, explain what you did in plain English.\n\n";
-
   for (const auto &kf : knowledge_files) {
     std::ifstream f(kf);
     if (!f) continue;
@@ -1099,7 +1595,6 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
 // ═══════════════════════════════════════════════════════════════════════════
 // Slash command handler
 // ═══════════════════════════════════════════════════════════════════════════
-
 static void handle_slash(const std::string &input,
                           NitroConfig       &cfg,
                           AgentState        &agent,
@@ -1116,9 +1611,10 @@ static void handle_slash(const std::string &input,
     tui.append_line("[sys] Commands:");
     tui.append_line("[sys]   /model  <path>   load a GGUF model");
     tui.append_line("[sys]   /embed  <path>   load an embedding model for RAG");
-    tui.append_line("[sys]   /rag    <path>   index file or directory");
+    tui.append_line("[sys]   /rag    [path]   index file or directory (picker if no path)");
     tui.append_line("[sys]   /memory          KV / VRAM / layer stats");
     tui.append_line("[sys]   /clear           reset conversation");
+    tui.append_line("[sys]   /settings        show current settings");
     tui.append_line("[sys]   /help            this message");
     tui.append_line("[sys]   exit / quit      exit Nitro");
     tui.redraw_all();
@@ -1134,6 +1630,7 @@ static void handle_slash(const std::string &input,
     if (agent.setup_model(cfg, tui)) {
       std::string sysp = build_system_prompt(cfg.knowledge_files, cfg.sandbox);
       agent.reset_conversation(sysp, tui);
+      save_settings(cfg);
     }
     tui.redraw_all();
     return;
@@ -1145,16 +1642,26 @@ static void handle_slash(const std::string &input,
       tui.redraw_all(); return;
     }
     cfg.embed_path = rest;
-    agent.setup_embed(rest, tui);
+    if (agent.setup_embed(rest, tui)) {
+      save_settings(cfg);
+    }
     return;
   }
 
   if (verb == "/rag") {
-    if (rest.empty()) {
-      tui.append_line("[err] Usage: /rag <file-or-dir>");
-      tui.redraw_all(); return;
+    std::string path = rest;
+    if (path.empty()) {
+      // Launch the interactive folder picker starting from the sandbox.
+      path = tui.rag_folder_picker(cfg.sandbox);
+      if (path.empty()) {
+        tui.append_line("[sys] RAG indexing cancelled.");
+        tui.redraw_all();
+        return;
+      }
     }
-    agent.rag_index(rest, tui);
+    tui.append_line("[sys] Indexing: " + path);
+    tui.redraw_all();
+    agent.rag_index(path, tui);
     return;
   }
 
@@ -1176,20 +1683,80 @@ static void handle_slash(const std::string &input,
     return;
   }
 
+  if (verb == "/settings") {
+    tui.append_line("[sys] Current settings:");
+    tui.append_line("[sys]   model_path    : " + cfg.model_path);
+    tui.append_line("[sys]   embed_path    : " + cfg.embed_path);
+    tui.append_line("[sys]   sandbox       : " + cfg.sandbox);
+    tui.append_line("[sys]   n_ctx         : " + std::to_string(cfg.n_ctx));
+    tui.append_line("[sys]   n_gpu_layers  : " + std::to_string(cfg.n_gpu_layers));
+    tui.append_line("[sys]   n_max_tokens  : " + std::to_string(cfg.n_max_tokens));
+    tui.append_line("[sys]   temperature   : " + std::to_string(cfg.temperature));
+    tui.append_line("[sys]   top_p         : " + std::to_string(cfg.top_p));
+    tui.append_line("[sys]   top_k         : " + std::to_string(cfg.top_k));
+    tui.append_line("[sys]   penalty_repeat: " + std::to_string(cfg.penalty_repeat));
+    tui.append_line("[sys]   rag_top_k     : " + std::to_string(cfg.rag_top_k));
+    tui.append_line("[sys]   saved to      : " + settings_path());
+    tui.redraw_all();
+    return;
+  }
+
   tui.append_line("[err] Unknown command: " + verb + "  (try /help)");
   tui.redraw_all();
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
-// Welcome banner
+// Welcome banner  — colourful multi-line ASCII logo
 // ═══════════════════════════════════════════════════════════════════════════
-
 static void welcome(TuiState &tui, const std::string &sandbox) {
-  tui.append_line("[sys] ╔═══════════════════════════════════════════╗");
-  tui.append_line("[sys] ║       N I T R O   A G E N T    v1.0       ║");
-  tui.append_line("[sys] ╚═══════════════════════════════════════════╝");
-  tui.append_line("[sys] Sandbox : " + sandbox);
-  tui.append_line("[sys] /help for commands  ·  exit to quit");
+  // Each line is tagged with a [sys] prefix so redraw_chat applies the
+  // right channel (chat_ch 140,140,200). We print the logo directly onto
+  // the chat plane here; subsequent redraws will re-render it from the
+  // chat_lines vector, which is fine.
+  //
+  // The logo uses block characters and Unicode box-drawing; it degrades
+  // gracefully to plain ASCII on terminals that don't support them.
+  //
+  // Colour coding: each row gets a different fg colour via a small gradient
+  // from cyan → magenta so it looks "fancy" without requiring custom planes.
+  // Because append_line uses the [sys] prefix colour rule for all rows that
+  // start with "[sys]", we sneak the colour in by using a small ANSI-escape-
+  // free approach: we write logo lines without the "[sys]" prefix and colour
+  // them with the generic chat_ch(210,210,210).  We then set their text so
+  // redraw_chat picks them up with the right colour rule.
+  //
+  // The easiest approach: use special prefix "[logo]" → handled in
+  // redraw_chat just like [sys] but with a different per-row colour.
+  // To avoid touching redraw_chat, we instead pick a gradient and embed it
+  // directly into the strings, relying on notcurses to render the UTF-8
+  // box-art as-is (no ANSI escapes — notcurses owns the terminal).
+
+  // We append each logo row as a "[logo_N]" marker that redraw_chat will
+  // colour with a gradient.  We handle this by using a small local helper
+  // that picks a colour based on the row index.
+
+  // ── Logo rows (pure text, no ANSI) ──────────────────────────────────
+  // The prefix "⚡" keeps the colour rule from matching "[sys]" etc.
+  // redraw_chat: any line that doesn't match known prefixes gets the
+  // default colour (210,210,210).  We rely on that for most logo rows.
+  //
+  // To get a gradient we insert a thin λ-wrapper that adds lines with
+  // distinct prefixes we interpret in redraw_chat.  But that requires
+  // editing redraw_chat — so instead we just use [sys] lines with
+  // embedded Unicode art; they all get the same blue-ish colour which
+  // still looks great.
+
+  tui.append_line("");
+  tui.append_line("[sys]  ███╗   ██╗██╗████████╗██████╗  ██████╗ ");
+  tui.append_line("[sys]  ████╗  ██║██║╚══██╔══╝██╔══██╗██╔═══██╗");
+  tui.append_line("[sys]  ██╔██╗ ██║██║   ██║   ██████╔╝██║   ██║");
+  tui.append_line("[sys]  ██║╚██╗██║██║   ██║   ██╔══██╗██║   ██║");
+  tui.append_line("[sys]  ██║ ╚████║██║   ██║   ██║  ██║╚██████╔╝");
+  tui.append_line("[sys]  ╚═╝  ╚═══╝╚═╝   ╚═╝   ╚═╝  ╚═╝ ╚═════╝ ");
+  tui.append_line("[sys]  ─────────── agentic LLM shell v1.0 ────────────");
+  tui.append_line("");
+  tui.append_line("[sys]  Sandbox : " + sandbox);
+  tui.append_line("[sys]  /help for commands  ·  exit to quit");
   tui.append_line("");
   tui.redraw_all();
 }
@@ -1197,18 +1764,12 @@ static void welcome(TuiState &tui, const std::string &sandbox) {
 // ═══════════════════════════════════════════════════════════════════════════
 // main()
 // ═══════════════════════════════════════════════════════════════════════════
-
 int main(int argc, char **argv) {
+  // ── Load persisted settings first (provides defaults) ────────────
   NitroConfig cfg;
+  load_settings(cfg);   // silently no-ops if ~/.config/nitro.settings.json absent
 
-  // ── Parse arguments ───────────────────────────────────────────────
-  // Accepted forms:
-  //   ./nitro [options] [project_dir]
-  //   -m / --model  <path>       GGUF to load
-  //   -e / --embed  <path>       embedding model
-  //   -g / --gpu-layers <n>      GPU layer count
-  // The first non-option argument is treated as project_dir.
-
+  // ── Parse arguments (command-line overrides saved settings) ──────
   auto resolve_path = [](const std::string &arg) -> std::string {
     std::error_code ec;
     if (arg.substr(0, 2) == "~/") {
@@ -1222,7 +1783,6 @@ int main(int argc, char **argv) {
 
   for (int i = 1; i < argc; ++i) {
     std::string a = argv[i];
-
     auto take_next = [&](const char *flag) -> std::string {
       if (i + 1 >= argc) {
         std::fprintf(stderr, "nitro: %s requires an argument\n", flag);
@@ -1230,7 +1790,6 @@ int main(int argc, char **argv) {
       }
       return argv[++i];
     };
-
     if (a == "-m" || a == "--model") {
       cfg.model_path = resolve_path(take_next(a.c_str()));
     } else if (a == "-e" || a == "--embed") {
@@ -1248,12 +1807,14 @@ int main(int argc, char **argv) {
         "  -h, --help               show this help\n"
         "\n"
         "project_dir defaults to the current working directory.\n"
+        "Settings are persisted to ~/.config/nitro.settings.json.\n"
         "\n"
         "Slash commands inside nitro:\n"
         "  /model  <path>           load / hot-reload a GGUF\n"
         "  /embed  <path>           load an embedding model\n"
-        "  /rag    <path>           index file or directory\n"
+        "  /rag    [path]           index file or directory (picker if no path)\n"
         "  /memory                  KV / VRAM / layer stats\n"
+        "  /settings                show current settings\n"
         "  /clear                   reset conversation\n"
         "  /help                    list commands\n"
       );
@@ -1262,7 +1823,6 @@ int main(int argc, char **argv) {
       std::fprintf(stderr, "nitro: unknown option '%s'  (try --help)\n", a.c_str());
       std::exit(1);
     } else {
-      // positional → project_dir
       cfg.sandbox = resolve_path(a);
     }
   }
@@ -1279,28 +1839,24 @@ int main(int argc, char **argv) {
     if (fs::exists(kf)) cfg.knowledge_files.push_back(kf);
   }
 
+  // ── Init curl globally ────────────────────────────────────────────
+  curl_global_init(CURL_GLOBAL_DEFAULT);
+
   // ── Init TUI ──────────────────────────────────────────────────────
   TuiState tui;
   tui.init();
   welcome(tui, cfg.sandbox);
 
   // ── Init agent ────────────────────────────────────────────────────
-  // AgentState owns a Llama whose constructor calls llama_backend_init();
-  // its destructor calls llama_backend_free() — nitro never touches
-  // the raw llama API directly.
   AgentState agent;
-
   if (!cfg.model_path.empty()) {
-    // Model provided on the command line — load immediately.
     if (agent.setup_model(cfg, tui)) {
       std::string sysp = build_system_prompt(cfg.knowledge_files, cfg.sandbox);
       agent.reset_conversation(sysp, tui);
     }
-    // Load embedding model if also provided up-front.
     if (!cfg.embed_path.empty())
       agent.setup_embed(cfg.embed_path, tui);
   } else {
-    // No model yet — friendly prompt, not an error.
     tui.append_line("[sys] No model specified.  Use /model <path> to load one.");
     tui.append_line("[sys] Example: /model ~/models/qwen2.5-7b-q4_k_m.gguf");
     tui.redraw_all();
@@ -1308,26 +1864,20 @@ int main(int argc, char **argv) {
 
   // ── Main loop ─────────────────────────────────────────────────────
   for (;;) {
-    // Check for terminal resize
     {
       unsigned rows = 0, cols = 0;
       notcurses_stddim_yx(tui.nc, &rows, &cols);
       if ((int)rows != tui.term_rows || (int)cols != tui.term_cols)
         tui.resize();
     }
-
     std::string input = tui.readline_blocking();
-    // trim
     input.erase(0, input.find_first_not_of(" \t"));
     if (!input.empty())
       input.erase(input.find_last_not_of(" \t\r\n") + 1);
     if (input.empty()) continue;
-
     tui.append_line("You: " + input);
     tui.redraw_all();
-
     if (input == "exit" || input == "quit") break;
-
     if (input[0] == '/') {
       handle_slash(input, cfg, agent, tui);
     } else {
@@ -1336,6 +1886,6 @@ int main(int argc, char **argv) {
   }
 
   tui.destroy();
-  // agent destructor cleans up Llama (which calls llama_backend_free)
+  curl_global_cleanup();
   return 0;
 }

From 6436a27f363efca8096b88397f27d4a96fef6cba Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Thu, 21 May 2026 17:42:35 +0930
Subject: [PATCH 35/54] LLAMA: nitro agent - wip

---
 llama/nitro.cpp | 562 +++++++++++++++++++++++++++++++++++-------------
 1 file changed, 412 insertions(+), 150 deletions(-)

diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index cd63674..9a7424f 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -71,9 +71,10 @@ static std::string  list_dir(const std::string &path);
 static bool         path_in_sandbox(const std::string &sandbox, const std::string &path);
 static std::string  strip_code_fences(const std::string &filename, const std::string &src);
 static std::string  process_tool(const std::string &line, const std::string &sandbox,
-                                  TuiState &tui);
+                                 const std::vector<std::string> &run_allowed,
+                                 TuiState &tui);
 static std::string  build_system_prompt(const std::vector<std::string> &knowledge_files,
-                                         const std::string &sandbox);
+                                        const std::string &sandbox);
 // ─── RAG indexing ─────────────────────────────────────────────────────────────
 static constexpr int BATCH_SIZE = 512;
 
@@ -209,6 +210,45 @@ class InputHistory {
     current_index = (int)history_stack.size();
   }
 
+  /**
+   * @brief Load history from ~/.config/nitro.history (one entry per line).
+   * Silently succeeds if the file doesn't exist.
+   */
+  void load(const std::string &path) {
+    std::ifstream f(path);
+    if (!f) return;
+    std::string line;
+    while (std::getline(f, line)) {
+      if (!line.empty()) history_stack.push_back(line);
+    }
+    current_index = (int)history_stack.size();
+  }
+
+  /**
+   * @brief Persist history to disk (most-recent last, one entry per line).
+   * Caps at MAX_PERSIST entries so the file never grows unbounded.
+   */
+  void save(const std::string &path) const {
+    // Ensure parent directory exists.
+    fs::path dir = fs::path(path).parent_path();
+    std::error_code ec;
+    fs::create_directories(dir, ec);
+
+    std::ofstream f(path, std::ios::trunc);
+    if (!f) return;
+
+    static constexpr int MAX_PERSIST = 500;
+    int start = std::max(0, (int)history_stack.size() - MAX_PERSIST);
+    for (int i = start; i < (int)history_stack.size(); ++i) {
+      // Escape embedded newlines so each entry stays on one line.
+      for (char c : history_stack[i]) {
+        if (c == '\n') f << "\\n";
+        else           f << c;
+      }
+      f << '\n';
+    }
+  }
+
   private:
   std::vector<std::string> history_stack;
   int current_index = 0;
@@ -237,6 +277,9 @@ struct NitroConfig {
   int   penalty_last_n = 256;
   std::vector<std::string> knowledge_files;
   int   rag_top_k      = 5;
+  // TOOL:RUN allowlist — if non-empty, only these program basenames may run.
+  // Empty means "allow anything inside the sandbox" (original behaviour).
+  std::vector<std::string> run_allowed;
 };
 
 // Returns the canonical settings path: ~/.config/nitro.settings.json
@@ -246,17 +289,24 @@ static std::string settings_path() {
   return base + "/.config/nitro.settings.json";
 }
 
+// Returns the history file path: ~/.config/nitro.history
+static std::string history_path() {
+  const char *home = getenv("HOME");
+  std::string base = home ? std::string(home) : ".";
+  return base + "/.config/nitro.history";
+}
+
 // Tiny helper: extract a quoted string value from flat JSON for a known key.
 static bool settings_get_str(const std::string &json,
-                              const std::string &key,
-                              std::string &out) {
+                             const std::string &key,
+                             std::string &out) {
   return json_get_string(json, key, out);
 }
 
 // Tiny helper: extract an integer value from flat JSON.
 static bool settings_get_int(const std::string &json,
-                              const std::string &key,
-                              int &out) {
+                             const std::string &key,
+                             int &out) {
   std::string search = "\"" + key + "\":";
   size_t pos = json.find(search);
   if (pos == std::string::npos) return false;
@@ -274,8 +324,8 @@ static bool settings_get_int(const std::string &json,
 
 // Tiny helper: extract a float value from flat JSON.
 static bool settings_get_float(const std::string &json,
-                                const std::string &key,
-                                float &out) {
+                               const std::string &key,
+                               float &out) {
   std::string search = "\"" + key + "\":";
   size_t pos = json.find(search);
   if (pos == std::string::npos) return false;
@@ -327,11 +377,11 @@ static std::string json_escape(const std::string &s) {
   out.reserve(s.size() + 4);
   for (char c : s) {
     switch (c) {
-      case '"':  out += "\\\""; break;
-      case '\\': out += "\\\\"; break;
-      case '\n': out += "\\n";  break;
-      case '\t': out += "\\t";  break;
-      default:   out += c;      break;
+    case '"':  out += "\\\""; break;
+    case '\\': out += "\\\\"; break;
+    case '\n': out += "\\n";  break;
+    case '\t': out += "\\t";  break;
+    default:   out += c;      break;
     }
   }
   return out;
@@ -472,7 +522,7 @@ void TuiState::init() {
   stdpl = notcurses_stdplane(nc);
   notcurses_term_dim_yx(nc, (unsigned *)&term_rows, (unsigned *)&term_cols);
   uint64_t bg = NCCHANNELS_INITIALIZER(BG_CHAT_R, BG_CHAT_G, BG_CHAT_B,
-                                        BG_CHAT_R, BG_CHAT_G, BG_CHAT_B);
+                                       BG_CHAT_R, BG_CHAT_G, BG_CHAT_B);
   ncplane_set_base(stdpl, " ", 0, bg);
   ncplane_erase(stdpl);
   ncplane_options hopt{};
@@ -485,15 +535,15 @@ void TuiState::init() {
   copt.rows = (unsigned)chat_rows; copt.cols = (unsigned)term_cols;
   chatpl = ncplane_create(stdpl, &copt);
   ncplane_set_base(chatpl, " ", 0,
-    NCCHANNELS_INITIALIZER(BG_CHAT_R, BG_CHAT_G, BG_CHAT_B,
-                            BG_CHAT_R, BG_CHAT_G, BG_CHAT_B));
+                   NCCHANNELS_INITIALIZER(BG_CHAT_R, BG_CHAT_G, BG_CHAT_B,
+                                          BG_CHAT_R, BG_CHAT_G, BG_CHAT_B));
   ncplane_options iopt{};
   iopt.y = term_rows - 2; iopt.x = 0;
   iopt.rows = 2; iopt.cols = (unsigned)term_cols;
   inputpl = ncplane_create(stdpl, &iopt);
   ncplane_set_base(inputpl, " ", 0,
-    NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B,
-                            BG_INP_R, BG_INP_G, BG_INP_B));
+                   NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B,
+                                          BG_INP_R, BG_INP_G, BG_INP_B));
   redraw_all();
 }
 
@@ -515,17 +565,17 @@ void TuiState::resize() {
 void TuiState::redraw_header() {
   ncplane_erase(header);
   ncplane_set_base(header, " ", 0,
-    NCCHANNELS_INITIALIZER(BG_HDR_R, BG_HDR_G, BG_HDR_B,
-                            BG_HDR_R, BG_HDR_G, BG_HDR_B));
+                   NCCHANNELS_INITIALIZER(BG_HDR_R, BG_HDR_G, BG_HDR_B,
+                                          BG_HDR_R, BG_HDR_G, BG_HDR_B));
   float kv_pct   = kv_total   > 0 ? 100.f * (float)kv_used   / (float)kv_total   : 0.f;
   float vram_pct = vram_total  > 0 ? 100.f * (float)vram_used / (float)vram_total : 0.f;
   static const char *const SPIN[] = { "⣾","⣽","⣻","⢿","⡿","⣟","⣯","⣷" };
   const char *spin_str = thinking ? SPIN[spinner_frame % 8] : " ";
   char buf[512];
   int n = std::snprintf(buf, sizeof(buf),
-    " ✦ NITRO  │ %-32s │ %5.1f tok/s │ KV %4.1f%%  VRAM %4.1f%%  %s",
-    current_model.c_str(), (double)tokens_per_sec,
-    (double)kv_pct, (double)vram_pct, spin_str);
+                        " ✦ NITRO  │ %-32s │ %5.1f tok/s │ KV %4.1f%%  VRAM %4.1f%%  %s",
+                        current_model.c_str(), (double)tokens_per_sec,
+                        (double)kv_pct, (double)vram_pct, spin_str);
   if (n > term_cols) buf[term_cols] = '\0';
   ncplane_set_channels(header, hdr_ch(130, 220, 200));
   ncplane_putstr_yx(header, 0, 0, buf);
@@ -543,14 +593,29 @@ void TuiState::redraw_chat() {
   for (int i = start, row = 0; i < end; ++i, ++row) {
     const std::string &line = chat_lines[i];
     uint64_t ch;
-    if      (line.rfind("You: ",   0) == 0) ch = chat_ch(100, 200, 255);
+    // Logo lines use prefix "[logo_N]" where N is the row index 0-6.
+    // We interpolate a cyan→magenta gradient across the 7 art rows.
+    if (line.rfind("[logo_", 0) == 0 && line.size() > 7 && line[7] == ']') {
+      int logo_row = line[6] - '0';
+      // Gradient: cyan (0,230,255) → green (80,255,160) → magenta (220,80,255)
+      // 7 steps, indices 0-6.
+      static const uint32_t GRAD_R[] = {  0,  20,  60, 120, 180, 210, 220 };
+      static const uint32_t GRAD_G[] = { 230, 255, 255, 255, 200, 130,  80 };
+      static const uint32_t GRAD_B[] = { 255, 200, 140,  80, 100, 200, 255 };
+      int gi = std::max(0, std::min(logo_row, 6));
+      ch = chat_ch(GRAD_R[gi], GRAD_G[gi], GRAD_B[gi]);
+    }
+    else if (line.rfind("You: ",   0) == 0) ch = chat_ch(100, 200, 255);
     else if (line.rfind("Nitro: ", 0) == 0) ch = chat_ch(180, 255, 180);
     else if (line.rfind("[tool]",  0) == 0) ch = chat_ch(255, 180,  80);
     else if (line.rfind("[err]",   0) == 0) ch = chat_ch(255,  80,  80);
     else if (line.rfind("[sys]",   0) == 0) ch = chat_ch(140, 140, 200);
     else                                     ch = chat_ch(210, 210, 210);
     ncplane_set_channels(chatpl, ch);
-    std::string display = line.size() > cols ? line.substr(0, cols) : line;
+    // Strip the [logo_N] prefix before rendering.
+    std::string display = (line.rfind("[logo_", 0) == 0 && line.size() > 8)
+      ? line.substr(8) : line;
+    if (display.size() > cols) display = display.substr(0, cols);
     ncplane_putstr_yx(chatpl, row, 0, display.c_str());
   }
 }
@@ -575,14 +640,14 @@ void TuiState::redraw_input() {
   cur_in_view = std::min(cur_in_view, (int)visible.size());
   std::string before = visible.substr(0, cur_in_view);
   std::string after  = cur_in_view < (int)visible.size()
-                         ? visible.substr(cur_in_view + 1) : "";
+    ? visible.substr(cur_in_view + 1) : "";
   char cursor_ch_val = cur_in_view < (int)visible.size()
-                     ? visible[cur_in_view] : ' ';
+    ? visible[cur_in_view] : ' ';
   ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
   ncplane_putstr_yx(inputpl, 1, prompt_cols, before.c_str());
   int cx = prompt_cols + cur_in_view;
   ncplane_set_channels(inputpl,
-    NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B, 180, 230, 255));
+                       NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B, 180, 230, 255));
   char cbuf[2] = { cursor_ch_val, '\0' };
   ncplane_putstr_yx(inputpl, 1, cx, cbuf);
   ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
@@ -667,7 +732,7 @@ void TuiState::show_modal_popup(const std::string &message) {
   // Background: deep navy.
   static constexpr uint32_t PBG_R = 20, PBG_G = 28, PBG_B = 50;
   ncplane_set_base(modal_plane, " ", 0,
-    NCCHANNELS_INITIALIZER(PBG_R, PBG_G, PBG_B, PBG_R, PBG_G, PBG_B));
+                   NCCHANNELS_INITIALIZER(PBG_R, PBG_G, PBG_B, PBG_R, PBG_G, PBG_B));
   ncplane_erase(modal_plane);
 
   // Border — bright cyan.
@@ -702,8 +767,8 @@ void TuiState::show_modal_popup(const std::string &message) {
   // Truncate message to fit inside border.
   int max_msg = popup_w - 4;
   std::string display = message.size() > (size_t)max_msg
-                          ? message.substr(0, max_msg)
-                          : message;
+    ? message.substr(0, max_msg)
+    : message;
   ncplane_putstr_yx(modal_plane, 2, 2, display.c_str());
 
   notcurses_render(nc);
@@ -732,7 +797,7 @@ std::string TuiState::rag_folder_picker(const std::string &start_dir) {
 
   // Build an entry list for the current directory.
   auto load_entries = [](const std::string &dir,
-                          std::vector<std::string> &entries) {
+                         std::vector<std::string> &entries) {
     entries.clear();
     std::error_code ec;
     // Add ".." for going up (except at fs root).
@@ -771,7 +836,7 @@ std::string TuiState::rag_folder_picker(const std::string &start_dir) {
 
   static constexpr uint32_t PBG_R = 18, PBG_G = 24, PBG_B = 40;
   ncplane_set_base(picker, " ", 0,
-    NCCHANNELS_INITIALIZER(PBG_R, PBG_G, PBG_B, PBG_R, PBG_G, PBG_B));
+                   NCCHANNELS_INITIALIZER(PBG_R, PBG_G, PBG_B, PBG_R, PBG_G, PBG_B));
 
   auto draw_picker = [&]() {
     ncplane_erase(picker);
@@ -791,7 +856,7 @@ std::string TuiState::rag_folder_picker(const std::string &start_dir) {
 
     // Title
     ncplane_set_channels(picker,
-      NCCHANNELS_INITIALIZER(255, 220, 80, PBG_R, PBG_G, PBG_B));
+                         NCCHANNELS_INITIALIZER(255, 220, 80, PBG_R, PBG_G, PBG_B));
     ncplane_putstr_yx(picker, 0, 2, " 📂 RAG Folder Picker ");
 
     // Current path (truncated to fit)
@@ -799,14 +864,14 @@ std::string TuiState::rag_folder_picker(const std::string &start_dir) {
     if ((int)path_display.size() > PW - 4)
       path_display = "…" + path_display.substr(path_display.size() - (PW - 5));
     ncplane_set_channels(picker,
-      NCCHANNELS_INITIALIZER(160, 200, 240, PBG_R, PBG_G, PBG_B));
+                         NCCHANNELS_INITIALIZER(160, 200, 240, PBG_R, PBG_G, PBG_B));
     ncplane_putstr_yx(picker, 1, 2, path_display.c_str());
 
     // Hint line
     ncplane_set_channels(picker,
-      NCCHANNELS_INITIALIZER(120, 120, 160, PBG_R, PBG_G, PBG_B));
+                         NCCHANNELS_INITIALIZER(120, 120, 160, PBG_R, PBG_G, PBG_B));
     ncplane_putstr_yx(picker, PH - 2, 2,
-      "↑↓ navigate  Enter open  s=select dir  Esc cancel");
+                      "↑↓ navigate  Enter open  s=select dir  Esc cancel");
 
     // Entry list
     int list_rows = PH - 5;   // rows 2 … PH-4 available
@@ -827,7 +892,7 @@ std::string TuiState::rag_folder_picker(const std::string &start_dir) {
       uint32_t bg = is_selected ? 180 : PBG_G;
       uint32_t bb = is_selected ? 255 : PBG_B;
       ncplane_set_channels(picker,
-        NCCHANNELS_INITIALIZER(fr, fg, fb, br, bg, bb));
+                           NCCHANNELS_INITIALIZER(fr, fg, fb, br, bg, bb));
       // Pad entry to fill width
       std::string label = (is_selected ? " ▶ " : "   ") + entries[idx];
       if ((int)label.size() > PW - 2) label = label.substr(0, PW - 2);
@@ -1091,7 +1156,7 @@ bool AgentState::setup_model(const NitroConfig &cfg, TuiState &tui) {
   llama.reset();
   apply_generation_params(cfg);
   if (!llama.load_model(cfg.model_path, cfg.n_ctx, cfg.n_batch,
-                         cfg.n_gpu_layers, cfg.log_level)) {
+                        cfg.n_gpu_layers, cfg.log_level)) {
     tui.dismiss_modal_popup();
     tui.append_line(std::string("[err] ") + llama.last_error());
     tui.redraw_all();
@@ -1196,8 +1261,8 @@ bool AgentState::rag_index(const std::string &path, TuiState &tui) {
 // Agent turn
 // ═══════════════════════════════════════════════════════════════════════════
 bool AgentState::run_turn(const std::string &user_message,
-                           const NitroConfig &cfg,
-                           TuiState          &tui) {
+                          const NitroConfig &cfg,
+                          TuiState          &tui) {
   if (!model_loaded) {
     tui.append_line("[err] No model loaded. Use /model <path>");
     tui.redraw_all();
@@ -1206,7 +1271,7 @@ bool AgentState::run_turn(const std::string &user_message,
   std::string effective_message = user_message;
   if (embed_llama && rag_db && rag_session) {
     std::string context = llama.rag_retrieve(*rag_db, user_message,
-                                              cfg.rag_top_k, *rag_session);
+                                             cfg.rag_top_k, *rag_session);
     if (!context.empty()) {
       effective_message = "Context:\n" + context + "\n\nUser: " + user_message;
     }
@@ -1292,9 +1357,9 @@ bool AgentState::run_turn(const std::string &user_message,
         tui.append_line("[tool] " + op + " " + arg1 +
                         (op == "TOOL:WRITE" ? " <content>" : ""));
         tui.redraw_all();
-        std::string result = process_tool(tool_line, cfg.sandbox, tui);
+        std::string result = process_tool(tool_line, cfg.sandbox, cfg.run_allowed, tui);
         tui.append_line("[tool] → " +
-          result.substr(0, 200) + (result.size() > 200 ? "…" : ""));
+                        result.substr(0, 200) + (result.size() > 200 ? "…" : ""));
         tui.redraw_all();
         if (!llama.add_message(*iter, "tool", result)) {
           tui.append_line(std::string("[err] tool result inject: ") + llama.last_error());
@@ -1314,7 +1379,7 @@ bool AgentState::run_turn(const std::string &user_message,
     std::string trimmed = buffer;
     trimmed.erase(0, trimmed.find_first_not_of(" \t"));
     if (trimmed.substr(0, 5) == "TOOL:") {
-      std::string result = process_tool(trimmed, cfg.sandbox, tui);
+      std::string result = process_tool(trimmed, cfg.sandbox, cfg.run_allowed, tui);
       tui.append_line("[tool] → " + result.substr(0, 200));
       tui.redraw_all();
       llama.add_message(*iter, "tool", result);
@@ -1335,9 +1400,9 @@ bool AgentState::run_turn(const std::string &user_message,
 
   char stat[128];
   std::snprintf(stat, sizeof(stat), "[sys] %.1f tok/s  (%d tokens)  KV %.1f%%",
-    (double)tui.tokens_per_sec,
-    iter->_tokens_generated,
-    (double)mem.kv_percent);
+                (double)tui.tokens_per_sec,
+                iter->_tokens_generated,
+                (double)mem.kv_percent);
   tui.append_line(stat);
   tui.redraw_all();
   return true;
@@ -1401,10 +1466,10 @@ static const std::vector<std::string> CODE_EXTENSIONS = {
 };
 
 static std::string strip_code_fences(const std::string &filename,
-                                      const std::string &src) {
+                                     const std::string &src) {
   auto ext = fs::path(filename).extension().string();
   bool is_code = std::any_of(CODE_EXTENSIONS.begin(), CODE_EXTENSIONS.end(),
-                              [&](const std::string &e){ return ext == e; });
+                             [&](const std::string &e){ return ext == e; });
   if (!is_code) return src;
   auto pos = src.find("```");
   if (pos == std::string::npos) return src;
@@ -1416,6 +1481,137 @@ static std::string strip_code_fences(const std::string &filename,
   return inner;
 }
 
+// ═══════════════════════════════════════════════════════════════════════════
+// html_to_text — strip HTML for cleaner TOOL:CURL context
+// ═══════════════════════════════════════════════════════════════════════════
+// Lightweight HTML→plain-text conversion:
+//   • Drops <head>, <script>, <style> blocks entirely.
+//   • Inserts newlines at block-level tags (p, div, br, li, h1-h6 …).
+//   • Strips all remaining tags.
+//   • Decodes common named & numeric HTML entities.
+//   • Collapses whitespace runs; caps consecutive blank lines at 2.
+static std::string html_to_text(const std::string &html) {
+  std::string s = html;
+
+  // 1. Remove <head>…</head>
+  {
+    std::string lo = s;
+    std::transform(lo.begin(), lo.end(), lo.begin(), ::tolower);
+    auto p0 = lo.find("<head");
+    auto p1 = lo.find("</head>");
+    if (p0 != std::string::npos && p1 != std::string::npos)
+      s.erase(p0, p1 + 7 - p0);
+  }
+
+  // 2. Remove <script>…</script> and <style>…</style>
+  for (const std::string &tag : {"script", "style"}) {
+    std::string open  = "<" + tag;
+    std::string close = "</" + tag + ">";
+    std::string lo = s;
+    std::transform(lo.begin(), lo.end(), lo.begin(), ::tolower);
+    for (;;) {
+      auto p0 = lo.find(open);
+      if (p0 == std::string::npos) break;
+      auto p1 = lo.find(close, p0);
+      if (p1 == std::string::npos) { s.erase(p0); lo.erase(p0); break; }
+      s.erase(p0, p1 + close.size() - p0);
+      lo.erase(p0, p1 + close.size() - p0);
+    }
+  }
+
+  // 3. Replace block-level tags with '\n' before stripping all tags.
+  static const char *const BLOCK[] = {
+    "p","div","br","li","tr","h1","h2","h3","h4","h5","h6",
+    "article","section","header","footer","nav","main", nullptr
+  };
+  {
+    std::string out;
+    out.reserve(s.size());
+    size_t i = 0;
+    while (i < s.size()) {
+      if (s[i] != '<') { out += s[i++]; continue; }
+      auto ce = s.find('>', i);
+      if (ce == std::string::npos) { out += s[i++]; continue; }
+      std::string inner = s.substr(i + 1, ce - i - 1);
+      size_t sp = inner.find_first_of(" \t/\r\n");
+      std::string name = (sp != std::string::npos) ? inner.substr(0, sp) : inner;
+      std::transform(name.begin(), name.end(), name.begin(), ::tolower);
+      for (int k = 0; BLOCK[k]; ++k)
+        if (name == BLOCK[k]) { out += '\n'; break; }
+      i = ce + 1;
+    }
+    s = out;
+  }
+
+  // 4. Strip all remaining tags.
+  {
+    std::string out; out.reserve(s.size());
+    bool in_tag = false;
+    for (char c : s) {
+      if (c == '<')  { in_tag = true;  continue; }
+      if (c == '>')  { in_tag = false; continue; }
+      if (!in_tag)     out += c;
+    }
+    s = out;
+  }
+
+  // 5. Decode common HTML entities.
+  static const std::pair<const char*, const char*> ENT[] = {
+    {"&amp;","&"},{"&lt;","<"},{"&gt;",">"},{"&quot;","\""},
+    {"&apos;","'"},{"&nbsp;"," "},{"&mdash;","—"},{"&ndash;","–"},
+    {"&hellip;","…"},{"&#39;","'"},{"&#34;","\""},
+    {nullptr,nullptr}
+  };
+  for (int k = 0; ENT[k].first; ++k) {
+    std::string e = ENT[k].first, r = ENT[k].second;
+    size_t pos = 0;
+    while ((pos = s.find(e, pos)) != std::string::npos)
+      { s.replace(pos, e.size(), r); pos += r.size(); }
+  }
+  // Numeric entities &#NNN; and &#xHHH;
+  {
+    std::string out; out.reserve(s.size());
+    size_t i = 0;
+    while (i < s.size()) {
+      if (s[i]=='&' && i+2<s.size() && s[i+1]=='#') {
+        size_t semi = s.find(';', i+2);
+        if (semi != std::string::npos && semi-i < 10) {
+          std::string num = s.substr(i+2, semi-i-2);
+          try {
+            uint32_t cp = (num[0]=='x'||num[0]=='X')
+              ? (uint32_t)std::stoul(num.substr(1),nullptr,16)
+              : (uint32_t)std::stoul(num);
+            if      (cp < 0x80)  { out += (char)cp; }
+            else if (cp < 0x800) { out += (char)(0xC0|(cp>>6)); out += (char)(0x80|(cp&0x3F)); }
+            else                 { out += (char)(0xE0|(cp>>12)); out += (char)(0x80|((cp>>6)&0x3F)); out += (char)(0x80|(cp&0x3F)); }
+            i = semi+1; continue;
+          } catch (...) {}
+        }
+      }
+      out += s[i++];
+    }
+    s = out;
+  }
+
+  // 6. Collapse whitespace; cap blank lines at 2.
+  {
+    std::string out; out.reserve(s.size());
+    int nl_run = 0; bool last_sp = false;
+    for (char c : s) {
+      if (c == '\r') continue;
+      if (c == '\t') c = ' ';
+      if (c == '\n') { ++nl_run; last_sp=false; if (nl_run<=2) out+='\n'; continue; }
+      nl_run = 0;
+      if (c == ' ') { if (!last_sp) { out+=' '; last_sp=true; } continue; }
+      last_sp = false; out += c;
+    }
+    size_t f = out.find_first_not_of(" \n");
+    size_t l = out.find_last_not_of(" \n");
+    s = (f == std::string::npos) ? "" : out.substr(f, l-f+1);
+  }
+  return s;
+}
+
 // ═══════════════════════════════════════════════════════════════════════════
 // TOOL:CURL — HTTP GET with libcurl, returns body text (capped at 32 KB).
 // ═══════════════════════════════════════════════════════════════════════════
@@ -1453,6 +1649,14 @@ static std::string tool_curl(const std::string &url) {
   CURLcode res = curl_easy_perform(curl);
   long http_code = 0;
   curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
+
+  // Query content-type before cleanup (pointer is only valid while handle lives).
+  char *ct_raw = nullptr;
+  curl_easy_getinfo(curl, CURLINFO_CONTENT_TYPE, &ct_raw);
+  std::string content_type = ct_raw ? ct_raw : "";
+  std::transform(content_type.begin(), content_type.end(),
+                 content_type.begin(), ::tolower);
+
   curl_easy_cleanup(curl);
 
   if (res != CURLE_OK) {
@@ -1462,6 +1666,13 @@ static std::string tool_curl(const std::string &url) {
     return "ERROR: HTTP " + std::to_string(http_code) + " from " + url;
   }
   if (body.empty()) return "(empty response)";
+
+  // Strip HTML tags so the model receives clean plain text.
+  bool is_html = (content_type.find("text/html") != std::string::npos)
+    || (body.size() > 5 && body.substr(0,5) == "<!DOC")
+    || (body.size() > 6 && body.substr(0,6) == "<html>");
+  if (is_html) body = html_to_text(body);
+
   return body;
 }
 
@@ -1469,8 +1680,9 @@ static std::string tool_curl(const std::string &url) {
 // Tool dispatch
 // ═══════════════════════════════════════════════════════════════════════════
 static std::string process_tool(const std::string &cmd,
-                                  const std::string &sandbox,
-                                  TuiState &tui) {
+                                const std::string &sandbox,
+                                const std::vector<std::string> &run_allowed,
+                                TuiState &tui) {
   std::string op, arg1, arg2;
   auto sp1 = cmd.find(' ');
   if (sp1 == std::string::npos) {
@@ -1528,7 +1740,7 @@ static std::string process_tool(const std::string &cmd,
     if (!path_in_sandbox(sandbox, p)) return "ERROR: path outside sandbox";
     std::string content = strip_code_fences(arg1, arg2);
     return write_file(p, content) ? "OK: written to " + arg1
-                                   : "ERROR: write failed for " + arg1;
+      : "ERROR: write failed for " + arg1;
   }
   if (op == "TOOL:PERMISSION") {
     std::string result;
@@ -1542,6 +1754,16 @@ static std::string process_tool(const std::string &cmd,
   if (op == "TOOL:RUN") {
     std::string prog = resolve(arg1);
     if (!path_in_sandbox(sandbox, prog)) return "ERROR: path outside sandbox";
+    // Enforce allowlist if one is configured.
+    if (!run_allowed.empty()) {
+      std::string basename = fs::path(prog).filename().string();
+      bool permitted = std::any_of(run_allowed.begin(), run_allowed.end(),
+                                   [&](const std::string &a){ return a == basename; });
+      if (!permitted) {
+        return "ERROR: '" + basename + "' is not in the TOOL:RUN allowlist. "
+          "Use /set run_allowed <name> to permit it.";
+      }
+    }
     std::string command = prog + " " + arg2 + " 2>&1";
     FILE *fp = popen(command.c_str(), "r");
     if (!fp) return "ERROR: popen failed";
@@ -1559,30 +1781,30 @@ static std::string process_tool(const std::string &cmd,
 // System prompt
 // ═══════════════════════════════════════════════════════════════════════════
 static std::string build_system_prompt(const std::vector<std::string> &knowledge_files,
-                                        const std::string &sandbox) {
+                                       const std::string &sandbox) {
   std::string p;
   p += "You are Nitro, an agentic AI assistant for software development.\n"
-       "Your sandbox (project directory) is: " + sandbox + "\n\n"
-       "## Tool protocol\n"
-       "Emit tool calls on their own line. The host executes them and returns\n"
-       "TOOL_RESULT: <value> on the next line.\n\n"
-       "Available tools:\n"
-       "  TOOL:LIST   [dir]          list files (default: sandbox root)\n"
-       "  TOOL:READ   <file>         read file contents\n"
-       "  TOOL:WRITE  <file> <text>  write text to file\n"
-       "  TOOL:EXISTS <file>         YES or NO\n"
-       "  TOOL:RUN    <prog> [args]  run program inside sandbox\n"
-       "  TOOL:DATE                  current date\n"
-       "  TOOL:TIME                  current time\n"
-       "  TOOL:RND                   random float\n"
-       "  TOOL:PERMISSION            ask user for explicit permission\n"
-       "  TOOL:CURL   <url>          HTTP GET; returns response body (max 32 KB)\n\n"
-       "Rules:\n"
-       "- Never access files outside the sandbox.\n"
-       "- Use TOOL:PERMISSION before destructive or irreversible operations.\n"
-       "- Use TOOL:CURL to fetch documentation, APIs, or web content you need.\n"
-       "- Reason step-by-step inside <|think|>…</|think|> (hidden from user).\n"
-       "- After each tool call, explain what you did in plain English.\n\n";
+    "Your sandbox (project directory) is: " + sandbox + "\n\n"
+    "## Tool protocol\n"
+    "Emit tool calls on their own line. The host executes them and returns\n"
+    "TOOL_RESULT: <value> on the next line.\n\n"
+    "Available tools:\n"
+    "  TOOL:LIST   [dir]          list files (default: sandbox root)\n"
+    "  TOOL:READ   <file>         read file contents\n"
+    "  TOOL:WRITE  <file> <text>  write text to file\n"
+    "  TOOL:EXISTS <file>         YES or NO\n"
+    "  TOOL:RUN    <prog> [args]  run program inside sandbox\n"
+    "  TOOL:DATE                  current date\n"
+    "  TOOL:TIME                  current time\n"
+    "  TOOL:RND                   random float\n"
+    "  TOOL:PERMISSION            ask user for explicit permission\n"
+    "  TOOL:CURL   <url>          HTTP GET; returns response body (max 32 KB)\n\n"
+    "Rules:\n"
+    "- Never access files outside the sandbox.\n"
+    "- Use TOOL:PERMISSION before destructive or irreversible operations.\n"
+    "- Use TOOL:CURL to fetch documentation, APIs, or web content you need.\n"
+    "- Reason step-by-step inside <|think|>…</|think|> (hidden from user).\n"
+    "- After each tool call, explain what you did in plain English.\n\n";
   for (const auto &kf : knowledge_files) {
     std::ifstream f(kf);
     if (!f) continue;
@@ -1596,9 +1818,9 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
 // Slash command handler
 // ═══════════════════════════════════════════════════════════════════════════
 static void handle_slash(const std::string &input,
-                          NitroConfig       &cfg,
-                          AgentState        &agent,
-                          TuiState          &tui) {
+                         NitroConfig       &cfg,
+                         AgentState        &agent,
+                         TuiState          &tui) {
   auto sp = input.find(' ');
   std::string verb = (sp == std::string::npos) ? input : input.substr(0, sp);
   std::string rest;
@@ -1609,14 +1831,19 @@ static void handle_slash(const std::string &input,
 
   if (verb == "/help") {
     tui.append_line("[sys] Commands:");
-    tui.append_line("[sys]   /model  <path>   load a GGUF model");
-    tui.append_line("[sys]   /embed  <path>   load an embedding model for RAG");
-    tui.append_line("[sys]   /rag    [path]   index file or directory (picker if no path)");
-    tui.append_line("[sys]   /memory          KV / VRAM / layer stats");
-    tui.append_line("[sys]   /clear           reset conversation");
-    tui.append_line("[sys]   /settings        show current settings");
-    tui.append_line("[sys]   /help            this message");
-    tui.append_line("[sys]   exit / quit      exit Nitro");
+    tui.append_line("[sys]   /model  <path>           load a GGUF model");
+    tui.append_line("[sys]   /embed  <path>           load an embedding model for RAG");
+    tui.append_line("[sys]   /rag    [path]           index file or directory (picker if no path)");
+    tui.append_line("[sys]   /memory                  KV / VRAM / layer stats");
+    tui.append_line("[sys]   /clear                   reset conversation");
+    tui.append_line("[sys]   /settings                show current settings");
+    tui.append_line("[sys]   /set    <key> <value>    change a setting live");
+    tui.append_line("[sys]   /help                    this message");
+    tui.append_line("[sys]   exit / quit              exit Nitro");
+    tui.append_line("[sys] Settable keys (via /set):");
+    tui.append_line("[sys]   temperature  top_p  top_k  min_p  penalty_repeat");
+    tui.append_line("[sys]   n_max_tokens  penalty_last_n  rag_top_k  n_gpu_layers");
+    tui.append_line("[sys]   run_allowed  (comma-separated list, e.g. python3,make)");
     tui.redraw_all();
     return;
   }
@@ -1701,6 +1928,72 @@ static void handle_slash(const std::string &input,
     return;
   }
 
+  if (verb == "/set") {
+    // Usage: /set <key> <value>
+    // Parses the key and value, updates cfg in place, re-applies generation
+    // params if needed, and saves settings to disk.
+    auto sp2 = rest.find(' ');
+    std::string key = (sp2 == std::string::npos) ? rest : rest.substr(0, sp2);
+    std::string val = (sp2 == std::string::npos) ? "" : rest.substr(sp2 + 1);
+    val.erase(0, val.find_first_not_of(" \t"));
+
+    if (key.empty() || val.empty()) {
+      tui.append_line("[err] Usage: /set <key> <value>");
+      tui.redraw_all(); return;
+    }
+
+    bool ok = true;
+    bool needs_reparam = false; // whether to re-apply generation params
+
+    try {
+      if (key == "temperature")    { cfg.temperature    = std::stof(val); needs_reparam = true; }
+      else if (key == "top_p")     { cfg.top_p          = std::stof(val); needs_reparam = true; }
+      else if (key == "min_p")     { cfg.min_p          = std::stof(val); needs_reparam = true; }
+      else if (key == "top_k")     { cfg.top_k          = std::stoi(val); needs_reparam = true; }
+      else if (key == "penalty_repeat") { cfg.penalty_repeat = std::stof(val); needs_reparam = true; }
+      else if (key == "penalty_last_n") { cfg.penalty_last_n = std::stoi(val); needs_reparam = true; }
+      else if (key == "n_max_tokens")   { cfg.n_max_tokens   = std::stoi(val); needs_reparam = true; }
+      else if (key == "rag_top_k")      { cfg.rag_top_k      = std::stoi(val); }
+      else if (key == "n_gpu_layers")   {
+        cfg.n_gpu_layers = std::stoi(val);
+        tui.append_line("[sys] n_gpu_layers will take effect on next /model load.");
+      }
+      else if (key == "run_allowed") {
+        // Accept a comma-separated list of basenames, or "none" to clear.
+        cfg.run_allowed.clear();
+        if (val != "none") {
+          std::istringstream iss(val);
+          std::string tok;
+          while (std::getline(iss, tok, ',')) {
+            tok.erase(0, tok.find_first_not_of(" \t"));
+            tok.erase(tok.find_last_not_of(" \t") + 1);
+            if (!tok.empty()) cfg.run_allowed.push_back(tok);
+          }
+        }
+        if (cfg.run_allowed.empty()) {
+          tui.append_line("[sys] run_allowed cleared — all sandbox programs permitted.");
+        } else {
+          std::string list;
+          for (const auto &e : cfg.run_allowed) list += e + " ";
+          tui.append_line("[sys] run_allowed: " + list);
+        }
+      }
+      else { tui.append_line("[err] Unknown key '" + key + "'.  Try /help for list."); ok = false; }
+    } catch (const std::exception &ex) {
+      tui.append_line(std::string("[err] /set: ") + ex.what());
+      ok = false;
+    }
+
+    if (ok) {
+      if (needs_reparam && agent.model_loaded)
+        agent.apply_generation_params(cfg);
+      save_settings(cfg);
+      tui.append_line("[sys] " + key + " = " + val);
+    }
+    tui.redraw_all();
+    return;
+  }
+
   tui.append_line("[err] Unknown command: " + verb + "  (try /help)");
   tui.redraw_all();
 }
@@ -1709,51 +2002,16 @@ static void handle_slash(const std::string &input,
 // Welcome banner  — colourful multi-line ASCII logo
 // ═══════════════════════════════════════════════════════════════════════════
 static void welcome(TuiState &tui, const std::string &sandbox) {
-  // Each line is tagged with a [sys] prefix so redraw_chat applies the
-  // right channel (chat_ch 140,140,200). We print the logo directly onto
-  // the chat plane here; subsequent redraws will re-render it from the
-  // chat_lines vector, which is fine.
-  //
-  // The logo uses block characters and Unicode box-drawing; it degrades
-  // gracefully to plain ASCII on terminals that don't support them.
-  //
-  // Colour coding: each row gets a different fg colour via a small gradient
-  // from cyan → magenta so it looks "fancy" without requiring custom planes.
-  // Because append_line uses the [sys] prefix colour rule for all rows that
-  // start with "[sys]", we sneak the colour in by using a small ANSI-escape-
-  // free approach: we write logo lines without the "[sys]" prefix and colour
-  // them with the generic chat_ch(210,210,210).  We then set their text so
-  // redraw_chat picks them up with the right colour rule.
-  //
-  // The easiest approach: use special prefix "[logo]" → handled in
-  // redraw_chat just like [sys] but with a different per-row colour.
-  // To avoid touching redraw_chat, we instead pick a gradient and embed it
-  // directly into the strings, relying on notcurses to render the UTF-8
-  // box-art as-is (no ANSI escapes — notcurses owns the terminal).
-
-  // We append each logo row as a "[logo_N]" marker that redraw_chat will
-  // colour with a gradient.  We handle this by using a small local helper
-  // that picks a colour based on the row index.
-
-  // ── Logo rows (pure text, no ANSI) ──────────────────────────────────
-  // The prefix "⚡" keeps the colour rule from matching "[sys]" etc.
-  // redraw_chat: any line that doesn't match known prefixes gets the
-  // default colour (210,210,210).  We rely on that for most logo rows.
-  //
-  // To get a gradient we insert a thin λ-wrapper that adds lines with
-  // distinct prefixes we interpret in redraw_chat.  But that requires
-  // editing redraw_chat — so instead we just use [sys] lines with
-  // embedded Unicode art; they all get the same blue-ish colour which
-  // still looks great.
-
+  // Logo lines use the "[logo_N]" prefix so redraw_chat applies a
+  // per-row cyan→magenta gradient (N = 0-6 maps to the gradient table).
   tui.append_line("");
-  tui.append_line("[sys]  ███╗   ██╗██╗████████╗██████╗  ██████╗ ");
-  tui.append_line("[sys]  ████╗  ██║██║╚══██╔══╝██╔══██╗██╔═══██╗");
-  tui.append_line("[sys]  ██╔██╗ ██║██║   ██║   ██████╔╝██║   ██║");
-  tui.append_line("[sys]  ██║╚██╗██║██║   ██║   ██╔══██╗██║   ██║");
-  tui.append_line("[sys]  ██║ ╚████║██║   ██║   ██║  ██║╚██████╔╝");
-  tui.append_line("[sys]  ╚═╝  ╚═══╝╚═╝   ╚═╝   ╚═╝  ╚═╝ ╚═════╝ ");
-  tui.append_line("[sys]  ─────────── agentic LLM shell v1.0 ────────────");
+  tui.append_line("[logo_0]  ███╗   ██╗██╗████████╗██████╗  ██████╗ ");
+  tui.append_line("[logo_1]  ████╗  ██║██║╚══██╔══╝██╔══██╗██╔═══██╗");
+  tui.append_line("[logo_2]  ██╔██╗ ██║██║   ██║   ██████╔╝██║   ██║");
+  tui.append_line("[logo_3]  ██║╚██╗██║██║   ██║   ██╔══██╗██║   ██║");
+  tui.append_line("[logo_4]  ██║ ╚████║██║   ██║   ██║  ██║╚██████╔╝");
+  tui.append_line("[logo_5]  ╚═╝  ╚═══╝╚═╝   ╚═╝   ╚═╝  ╚═╝ ╚═════╝ ");
+  tui.append_line("[logo_6]  ─────────── agentic LLM shell v1.0 ──────────────");
   tui.append_line("");
   tui.append_line("[sys]  Sandbox : " + sandbox);
   tui.append_line("[sys]  /help for commands  ·  exit to quit");
@@ -1798,26 +2056,26 @@ int main(int argc, char **argv) {
       cfg.n_gpu_layers = std::stoi(take_next(a.c_str()));
     } else if (a == "-h" || a == "--help") {
       std::puts(
-        "Usage: nitro [options] [project_dir]\n"
-        "\n"
-        "Options:\n"
-        "  -m, --model  <path>      GGUF model to load on startup\n"
-        "  -e, --embed  <path>      embedding model for RAG\n"
-        "  -g, --gpu-layers <n>     GPU layers to offload (default: 32)\n"
-        "  -h, --help               show this help\n"
-        "\n"
-        "project_dir defaults to the current working directory.\n"
-        "Settings are persisted to ~/.config/nitro.settings.json.\n"
-        "\n"
-        "Slash commands inside nitro:\n"
-        "  /model  <path>           load / hot-reload a GGUF\n"
-        "  /embed  <path>           load an embedding model\n"
-        "  /rag    [path]           index file or directory (picker if no path)\n"
-        "  /memory                  KV / VRAM / layer stats\n"
-        "  /settings                show current settings\n"
-        "  /clear                   reset conversation\n"
-        "  /help                    list commands\n"
-      );
+                "Usage: nitro [options] [project_dir]\n"
+                "\n"
+                "Options:\n"
+                "  -m, --model  <path>      GGUF model to load on startup\n"
+                "  -e, --embed  <path>      embedding model for RAG\n"
+                "  -g, --gpu-layers <n>     GPU layers to offload (default: 32)\n"
+                "  -h, --help               show this help\n"
+                "\n"
+                "project_dir defaults to the current working directory.\n"
+                "Settings are persisted to ~/.config/nitro.settings.json.\n"
+                "\n"
+                "Slash commands inside nitro:\n"
+                "  /model  <path>           load / hot-reload a GGUF\n"
+                "  /embed  <path>           load an embedding model\n"
+                "  /rag    [path]           index file or directory (picker if no path)\n"
+                "  /memory                  KV / VRAM / layer stats\n"
+                "  /settings                show current settings\n"
+                "  /clear                   reset conversation\n"
+                "  /help                    list commands\n"
+                );
       return 0;
     } else if (!a.empty() && a[0] == '-') {
       std::fprintf(stderr, "nitro: unknown option '%s'  (try --help)\n", a.c_str());
@@ -1845,6 +2103,8 @@ int main(int argc, char **argv) {
   // ── Init TUI ──────────────────────────────────────────────────────
   TuiState tui;
   tui.init();
+  // Load persisted input history so up-arrow works across sessions.
+  tui.history.load(history_path());
   welcome(tui, cfg.sandbox);
 
   // ── Init agent ────────────────────────────────────────────────────
@@ -1886,6 +2146,8 @@ int main(int argc, char **argv) {
   }
 
   tui.destroy();
+  // Persist input history for the next session.
+  tui.history.save(history_path());
   curl_global_cleanup();
   return 0;
 }

From 87981a1acb9488932d3539af1c7fea61c08b4580 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Fri, 22 May 2026 14:39:48 +0930
Subject: [PATCH 36/54] LLAMA: nitro agent - wip

---
 llama/nitro.cpp | 263 +++++++++++++++++++++++++++++-------------------
 1 file changed, 160 insertions(+), 103 deletions(-)

diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index 9a7424f..b35e10e 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -18,8 +18,8 @@
 //   -g, --gpu-layers <n>      layers to offload to GPU (default: 32)
 //
 // Slash commands:
-//   /model  <path>            — load / hot-reload a GGUF model
-//   /embed  <path>            — load an embedding model for RAG
+//   /model  <path>            — load / hot-reload a GGUF model (picker if no path)
+//   /embed  <path>            — load an embedding model for RAG (picker if no path)
 //   /rag    <path>            — index a file or directory into RAG
 //   /memory                   — show KV / VRAM / layer stats
 //   /clear                    — reset conversation (keeps system prompt)
@@ -494,13 +494,23 @@ struct TuiState {
   // Presents an interactive directory browser to let the user choose a
   // folder (or file) to index.  Returns the selected path, or empty string
   // if the user cancelled.
-  std::string rag_folder_picker(const std::string &start_dir);
+  // ── RAG / file browser popup ─────────────────────────────────────
+  // Used by /rag, /model, and /embed to pick a path interactively.
+  // Pass a hint string shown in the title bar (e.g. "RAG Folder",
+  // "Model File", "Embedding Model").
+  // Returns the selected path, or empty string if the user cancelled.
+  std::string file_picker(const std::string &start_dir,
+                          const std::string &title_hint = "File");
+  // Legacy alias kept for callers that used the old name.
+  std::string rag_folder_picker(const std::string &start_dir) {
+    return file_picker(start_dir, "RAG Folder");
+  }
 };
+
 // ─── colour helpers ──────────────────────────────────────────────────────
 static constexpr uint32_t BG_CHAT_R = 18,  BG_CHAT_G = 22,  BG_CHAT_B = 30;
 static constexpr uint32_t BG_INP_R  = 22,  BG_INP_G  = 28,  BG_INP_B  = 38;
 static constexpr uint32_t BG_HDR_R  = 30,  BG_HDR_G  = 40,  BG_HDR_B  = 55;
-
 static inline uint64_t fg_rgb(uint32_t r, uint32_t g, uint32_t b) {
   return NCCHANNELS_INITIALIZER(r, g, b, 0, 0, 0);
 }
@@ -513,6 +523,7 @@ static inline uint64_t inp_ch(uint32_t r, uint32_t g, uint32_t b) {
 static inline uint64_t hdr_ch(uint32_t r, uint32_t g, uint32_t b) {
   return NCCHANNELS_INITIALIZER(r, g, b, BG_HDR_R, BG_HDR_G, BG_HDR_B);
 }
+
 // ─── TuiState::init ──────────────────────────────────────────────────────
 void TuiState::init() {
   notcurses_options opts{};
@@ -544,13 +555,12 @@ void TuiState::init() {
   ncplane_set_base(inputpl, " ", 0,
                    NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B,
                                           BG_INP_R, BG_INP_G, BG_INP_B));
+  notcurses_mice_enable(nc, NCMICE_ALL_EVENTS);
   redraw_all();
 }
-
 void TuiState::destroy() {
   if (nc) { notcurses_stop(nc); nc = nullptr; }
 }
-
 void TuiState::resize() {
   notcurses_term_dim_yx(nc, (unsigned *)&term_rows, (unsigned *)&term_cols);
   ncplane_resize_simple(header,  1,                       (unsigned)term_cols);
@@ -787,23 +797,33 @@ void TuiState::dismiss_modal_popup() {
 // Keyboard:  ↑/↓ navigate,  Enter select/descend,  Backspace go up,
 //            's' select current dir for indexing,   Esc cancel.
 // Returns the chosen path or "" on cancel.
-std::string TuiState::rag_folder_picker(const std::string &start_dir) {
+// ─── TuiState::file_picker ────────────────────────────────────────────────
+// Unified interactive directory/file browser used by /rag, /model, /embed.
+// title_hint appears in the popup header (e.g. "RAG Folder", "Model File").
+//
+// Keyboard:
+//   ↑/↓        navigate list
+//   Enter      descend into directory, or select a file
+//   Backspace  go up one directory
+//   s          select the current directory itself (useful for /rag)
+//   Esc        cancel → returns ""
+//
+// Returns the chosen path, or "" on cancel.
+std::string TuiState::file_picker(const std::string &start_dir,
+                                  const std::string &title_hint) {
   std::string current_dir = start_dir;
   {
     std::error_code ec;
     auto canon = fs::canonical(start_dir, ec);
     if (!ec) current_dir = canon.string();
   }
-
-  // Build an entry list for the current directory.
   auto load_entries = [](const std::string &dir,
                          std::vector<std::string> &entries) {
     entries.clear();
     std::error_code ec;
-    // Add ".." for going up (except at fs root).
-    if (fs::path(dir).has_parent_path() && fs::path(dir) != fs::path(dir).root_path())
+    if (fs::path(dir).has_parent_path() &&
+        fs::path(dir) != fs::path(dir).root_path())
       entries.push_back("..");
-    // Dirs first, then files.
     std::vector<std::string> dirs, files;
     for (const auto &e : fs::directory_iterator(dir, ec)) {
       if (ec) break;
@@ -837,12 +857,17 @@ std::string TuiState::rag_folder_picker(const std::string &start_dir) {
   static constexpr uint32_t PBG_R = 18, PBG_G = 24, PBG_B = 40;
   ncplane_set_base(picker, " ", 0,
                    NCCHANNELS_INITIALIZER(PBG_R, PBG_G, PBG_B, PBG_R, PBG_G, PBG_B));
-
+  // Build a compact hint line appropriate to the operation.
+  // /rag adds 's=select dir'; /model and /embed only need file selection.
+  std::string hint_line = "↑↓ navigate  Enter open/select  Esc cancel";
+  if (title_hint.find("RAG") != std::string::npos ||
+      title_hint.find("Folder") != std::string::npos) {
+    hint_line = "↑↓ navigate  Enter open  s=select dir  Esc cancel";
+  }
   auto draw_picker = [&]() {
     ncplane_erase(picker);
     uint64_t border_ch = NCCHANNELS_INITIALIZER(100, 180, 255, PBG_R, PBG_G, PBG_B);
     ncplane_set_channels(picker, border_ch);
-    // Border
     ncplane_putstr_yx(picker, 0, 0, "╔");
     for (int c = 1; c < PW - 1; ++c) ncplane_putstr_yx(picker, 0, c, "═");
     ncplane_putstr_yx(picker, 0, PW - 1, "╗");
@@ -857,43 +882,40 @@ std::string TuiState::rag_folder_picker(const std::string &start_dir) {
     // Title
     ncplane_set_channels(picker,
                          NCCHANNELS_INITIALIZER(255, 220, 80, PBG_R, PBG_G, PBG_B));
-    ncplane_putstr_yx(picker, 0, 2, " 📂 RAG Folder Picker ");
-
-    // Current path (truncated to fit)
+    std::string title_str = " 📂 " + title_hint + " Picker ";
+    if ((int)title_str.size() > PW - 4) title_str = title_str.substr(0, PW - 4);
+    ncplane_putstr_yx(picker, 0, 2, title_str.c_str());
+    // Current path (truncated).
     std::string path_display = current_dir;
     if ((int)path_display.size() > PW - 4)
       path_display = "…" + path_display.substr(path_display.size() - (PW - 5));
     ncplane_set_channels(picker,
                          NCCHANNELS_INITIALIZER(160, 200, 240, PBG_R, PBG_G, PBG_B));
     ncplane_putstr_yx(picker, 1, 2, path_display.c_str());
-
-    // Hint line
+    // Hint line (bottom interior row).
     ncplane_set_channels(picker,
                          NCCHANNELS_INITIALIZER(120, 120, 160, PBG_R, PBG_G, PBG_B));
-    ncplane_putstr_yx(picker, PH - 2, 2,
-                      "↑↓ navigate  Enter open  s=select dir  Esc cancel");
-
-    // Entry list
-    int list_rows = PH - 5;   // rows 2 … PH-4 available
-    // Clamp scroll so selected stays visible
+    std::string hint_trunc = hint_line;
+    if ((int)hint_trunc.size() > PW - 4) hint_trunc = hint_trunc.substr(0, PW - 4);
+    ncplane_putstr_yx(picker, PH - 2, 2, hint_trunc.c_str());
+    // Entry list.
+    int list_rows = PH - 5;
     if (selected < scroll) scroll = selected;
     if (selected >= scroll + list_rows) scroll = selected - list_rows + 1;
-
     for (int i = 0; i < list_rows; ++i) {
       int idx = scroll + i;
       if (idx >= (int)entries.size()) break;
       bool is_selected = (idx == selected);
       bool is_dir = !entries[idx].empty() && entries[idx].back() == '/';
       uint32_t fr, fg, fb;
-      if (is_selected)            { fr = 20;  fg = 20;  fb = 20;  }
-      else if (is_dir)             { fr = 120; fg = 200; fb = 255; }
-      else                         { fr = 200; fg = 200; fb = 200; }
+      if (is_selected)  { fr = 20;  fg = 20;  fb = 20;  }
+      else if (is_dir)   { fr = 120; fg = 200; fb = 255; }
+      else               { fr = 200; fg = 200; fb = 200; }
       uint32_t br = is_selected ? 100 : PBG_R;
       uint32_t bg = is_selected ? 180 : PBG_G;
       uint32_t bb = is_selected ? 255 : PBG_B;
       ncplane_set_channels(picker,
                            NCCHANNELS_INITIALIZER(fr, fg, fb, br, bg, bb));
-      // Pad entry to fill width
       std::string label = (is_selected ? " ▶ " : "   ") + entries[idx];
       if ((int)label.size() > PW - 2) label = label.substr(0, PW - 2);
       while ((int)label.size() < PW - 2) label += ' ';
@@ -922,6 +944,7 @@ std::string TuiState::rag_folder_picker(const std::string &start_dir) {
       draw_picker();
       continue;
     }
+    // 's' — select the current directory (useful for /rag, ignored for file pickers).
     if (ni.id == 's' || ni.id == 'S') {
       // Select current directory for RAG indexing.
       result = current_dir;
@@ -962,13 +985,13 @@ std::string TuiState::rag_folder_picker(const std::string &start_dir) {
         draw_picker();
       } else {
         // Select a specific file.
+        // Select the highlighted file.
         result = current_dir + "/" + entry;
         break;
       }
       continue;
     }
   }
-
   ncplane_destroy(picker);
   notcurses_render(nc);
   return result;
@@ -1029,6 +1052,7 @@ std::string TuiState::readline_blocking() {
       return result;
     }
 
+
     if (ni.id == NCKEY_UP) {
       // Entering history from a fresh prompt: save current text as draft.
       std::string hist_entry;
@@ -1077,7 +1101,18 @@ std::string TuiState::readline_blocking() {
       notcurses_render(nc);
       continue;
     }
-
+    if (ni.id == NCKEY_SCROLL_UP && scroll_offset < term_rows + 10) {
+      scroll_offset += 1;
+      redraw_chat();
+      notcurses_render(nc);
+      continue;
+    }
+    if (ni.id == NCKEY_SCROLL_DOWN && scroll_offset > 0) {
+      scroll_offset -= 1;
+      redraw_chat();
+      notcurses_render(nc);
+      continue;
+    }
     if (ni.id == NCKEY_BACKSPACE || ni.id == 127) {
       if (cursor_pos > 0) { input_buf.erase(cursor_pos - 1, 1); --cursor_pos; }
     } else if (ni.id == NCKEY_LEFT) {
@@ -1108,7 +1143,7 @@ std::string TuiState::readline_blocking() {
 // AgentState
 // ═══════════════════════════════════════════════════════════════════════════
 struct AgentState {
-  Llama llama;
+  std::unique_ptr<Llama> llama;
   std::unique_ptr<LlamaIter> iter;
   std::unique_ptr<Llama> embed_llama;
   std::unique_ptr<RagDB>      rag_db;
@@ -1127,18 +1162,17 @@ struct AgentState {
   std::string memory_info_text();
   float tokens_per_sec() const;
 };
-
 void AgentState::apply_generation_params(const NitroConfig &cfg) {
-  llama.add_stop("<|turn|>");
-  llama.add_stop("<|im_end|>");
-  llama.set_max_tokens(cfg.n_max_tokens);
-  llama.set_temperature(cfg.temperature);
-  llama.set_top_k(cfg.top_k);
-  llama.set_top_p(cfg.top_p);
-  llama.set_min_p(cfg.min_p);
-  llama.set_penalty_repeat(cfg.penalty_repeat);
-  llama.set_penalty_last_n(cfg.penalty_last_n);
-  llama.set_log_level(cfg.log_level);
+  llama->add_stop("<|turn|>");
+  llama->add_stop("<|im_end|>");
+  llama->set_max_tokens(cfg.n_max_tokens);
+  llama->set_temperature(cfg.temperature);
+  llama->set_top_k(cfg.top_k);
+  llama->set_top_p(cfg.top_p);
+  llama->set_min_p(cfg.min_p);
+  llama->set_penalty_repeat(cfg.penalty_repeat);
+  llama->set_penalty_last_n(cfg.penalty_last_n);
+  llama->set_log_level(cfg.log_level);
 }
 
 // ─── AgentState::setup_model ──────────────────────────────────────────────
@@ -1152,22 +1186,25 @@ bool AgentState::setup_model(const NitroConfig &cfg, TuiState &tui) {
   // Show a modal popup so the user knows loading is in progress.
   std::string model_name = fs::path(cfg.model_path).filename().string();
   tui.show_modal_popup("Loading " + model_name);
+  // Destroy the iterator first — it holds references into the llama context.
+  // Freeing llama while iter is still alive causes use-after-free / load failure.
+  iter.reset();
+  model_loaded = false;
+  llama = std::make_unique<Llama>();
 
-  llama.reset();
   apply_generation_params(cfg);
-  if (!llama.load_model(cfg.model_path, cfg.n_ctx, cfg.n_batch,
-                        cfg.n_gpu_layers, cfg.log_level)) {
+  if (!llama->load_model(cfg.model_path, cfg.n_ctx, cfg.n_batch,
+                         cfg.n_gpu_layers, cfg.log_level)) {
     tui.dismiss_modal_popup();
-    tui.append_line(std::string("[err] ") + llama.last_error());
+    tui.append_line(std::string("[err] ") + llama->last_error());
     tui.redraw_all();
     return false;
   }
   tui.dismiss_modal_popup();
-
   model_loaded = true;
   tui.current_model = model_name;
   tui.append_line("[sys] Model ready: " + tui.current_model);
-  LlamaMemoryInfo mem = llama.memory_info();
+  LlamaMemoryInfo mem = llama->memory_info();
   tui.append_line("[sys] " + mem.advice);
   tui.kv_used  = mem.kv_used;
   tui.kv_total = mem.kv_total;
@@ -1198,11 +1235,11 @@ bool AgentState::setup_embed(const std::string &path, TuiState &tui) {
 
 void AgentState::reset_conversation(const std::string &sysprompt, TuiState &tui) {
   system_prompt = sysprompt;
-  llama.reset();
+  llama->reset();
   apply_generation_params(NitroConfig{});
   iter = std::make_unique<LlamaIter>();
-  if (!llama.add_message(*iter, "system", system_prompt)) {
-    tui.append_line(std::string("[err] System prompt injection: ") + llama.last_error());
+  if (!llama->add_message(*iter, "system", system_prompt)) {
+    tui.append_line(std::string("[err] System prompt injection: ") + llama->last_error());
     tui.redraw_all();
   }
 }
@@ -1217,7 +1254,7 @@ float AgentState::tokens_per_sec() const {
 
 std::string AgentState::memory_info_text() {
   if (!model_loaded) return "No model loaded.";
-  LlamaMemoryInfo m = llama.memory_info();
+  LlamaMemoryInfo m = llama->memory_info();
   std::ostringstream oss;
   oss << "KV cache  : " << m.kv_used << " / " << m.kv_total
       << "  (" << m.kv_percent << "%)\n";
@@ -1270,8 +1307,8 @@ bool AgentState::run_turn(const std::string &user_message,
   }
   std::string effective_message = user_message;
   if (embed_llama && rag_db && rag_session) {
-    std::string context = llama.rag_retrieve(*rag_db, user_message,
-                                             cfg.rag_top_k, *rag_session);
+    std::string context = llama->rag_retrieve(*rag_db, user_message,
+                                              cfg.rag_top_k, *rag_session);
     if (!context.empty()) {
       effective_message = "Context:\n" + context + "\n\nUser: " + user_message;
     }
@@ -1281,24 +1318,21 @@ bool AgentState::run_turn(const std::string &user_message,
     tui.redraw_all();
     return false;
   }
-  if (!llama.add_message(*iter, "user", effective_message)) {
-    tui.append_line(std::string("[err] add_message: ") + llama.last_error());
+  if (!llama->add_message(*iter, "user", effective_message)) {
+    tui.append_line(std::string("[err] add_message: ") + llama->last_error());
     tui.redraw_all();
     return false;
   }
   tui.append_line("Nitro: ");
   tui.set_thinking(true);
-
   bool in_think = true;
   std::string buffer;
-
   auto update_think_state = [&](const std::string &text) {
     if (text.find("<think>")    != std::string::npos ||
         text.find("<|think|>")  != std::string::npos)  in_think = true;
     if (text.find("</think>")   != std::string::npos ||
         text.find("</|think|>") != std::string::npos)  in_think = false;
   };
-
   auto remove_substr = [](std::string str, const std::string& toRemove) {
     size_t pos = str.find(toRemove);
     while (pos != std::string::npos) {
@@ -1309,7 +1343,7 @@ bool AgentState::run_turn(const std::string &user_message,
   };
 
   while (iter->_has_next) {
-    std::string tok = llama.next(*iter);
+    std::string tok = llama->next(*iter);
     tui.tick_spinner();
     update_think_state(tok);
     buffer += tok;
@@ -1320,7 +1354,7 @@ bool AgentState::run_turn(const std::string &user_message,
       std::string trimmed = text_line;
       trimmed.erase(0, trimmed.find_first_not_of(" \t"));
       if (trimmed.substr(0, 5) == "TOOL:") {
-        std::string tail = buffer + llama.all(*iter);
+        std::string tail = buffer + llama->all(*iter);
         std::string op, arg1, payload;
         {
           auto s1 = trimmed.find(' ');
@@ -1361,8 +1395,14 @@ bool AgentState::run_turn(const std::string &user_message,
         tui.append_line("[tool] → " +
                         result.substr(0, 200) + (result.size() > 200 ? "…" : ""));
         tui.redraw_all();
-        if (!llama.add_message(*iter, "tool", result)) {
-          tui.append_line(std::string("[err] tool result inject: ") + llama.last_error());
+        // Inject the tool result back into the conversation as a user-role
+        // message using the TOOL_RESULT: prefix that the system prompt
+        // describes.  Using "user" role ensures the injected text is correctly
+        // formatted regardless of whether the underlying llama-sb layer knows
+        // a dedicated "tool" role.
+        std::string tool_result_msg = "TOOL_RESULT: " + result;
+        if (!llama->add_message(*iter, "user", tool_result_msg)) {
+          tui.append_line(std::string("[err] tool result inject: ") + llama->last_error());
           tui.redraw_all();
           break;
         }
@@ -1382,22 +1422,20 @@ bool AgentState::run_turn(const std::string &user_message,
       std::string result = process_tool(trimmed, cfg.sandbox, cfg.run_allowed, tui);
       tui.append_line("[tool] → " + result.substr(0, 200));
       tui.redraw_all();
-      llama.add_message(*iter, "tool", result);
+      std::string tool_result_msg = "TOOL_RESULT: " + result;
+      llama->add_message(*iter, "user", tool_result_msg);
     } else if (!in_think) {
       tui.append_token(buffer);
     }
   }
-
   tui.flush_token_acc();
   tui.set_thinking(false);
-
   tui.tokens_per_sec = tokens_per_sec();
-  LlamaMemoryInfo mem = llama.memory_info();
+  LlamaMemoryInfo mem = llama->memory_info();
   tui.kv_used    = mem.kv_used;
   tui.kv_total   = mem.kv_total;
   tui.vram_used  = mem.vram_used;
   tui.vram_total = mem.vram_total;
-
   char stat[128];
   std::snprintf(stat, sizeof(stat), "[sys] %.1f tok/s  (%d tokens)  KV %.1f%%",
                 (double)tui.tokens_per_sec,
@@ -1613,35 +1651,30 @@ static std::string html_to_text(const std::string &html) {
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
-// TOOL:CURL — HTTP GET with libcurl, returns body text (capped at 32 KB).
+// TOOL:CURL
 // ═══════════════════════════════════════════════════════════════════════════
 static size_t curl_write_cb(void *contents, size_t size, size_t nmemb, void *userp) {
   std::string *buf = static_cast<std::string *>(userp);
   size_t total = size * nmemb;
-  // Enforce a 32 KB cap to prevent flooding the context window.
   static constexpr size_t MAX_BODY = 32 * 1024;
   if (buf->size() < MAX_BODY) {
     size_t room = MAX_BODY - buf->size();
     buf->append(static_cast<char *>(contents), std::min(total, room));
   }
-  return total;  // Return full amount so curl doesn't abort.
+  return total;
 }
-
 static std::string tool_curl(const std::string &url) {
   if (url.empty()) return "ERROR: TOOL:CURL requires a URL argument";
-
   CURL *curl = curl_easy_init();
   if (!curl) return "ERROR: curl_easy_init failed";
-
   std::string body;
   body.reserve(4096);
-
   curl_easy_setopt(curl, CURLOPT_URL,            url.c_str());
   curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,  curl_write_cb);
   curl_easy_setopt(curl, CURLOPT_WRITEDATA,      &body);
   curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
   curl_easy_setopt(curl, CURLOPT_MAXREDIRS,      5L);
-  curl_easy_setopt(curl, CURLOPT_TIMEOUT,        15L);        // 15-second timeout
+  curl_easy_setopt(curl, CURLOPT_TIMEOUT,        15L);
   curl_easy_setopt(curl, CURLOPT_USERAGENT,      "nitro/1.0");
   // Accept compressed responses; curl will decompress automatically.
   curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
@@ -1656,9 +1689,7 @@ static std::string tool_curl(const std::string &url) {
   std::string content_type = ct_raw ? ct_raw : "";
   std::transform(content_type.begin(), content_type.end(),
                  content_type.begin(), ::tolower);
-
   curl_easy_cleanup(curl);
-
   if (res != CURLE_OK) {
     return std::string("ERROR: curl: ") + curl_easy_strerror(res);
   }
@@ -1748,13 +1779,11 @@ static std::string process_tool(const std::string &cmd,
     return result;
   }
   if (op == "TOOL:CURL") {
-    // arg1 holds the URL (no sandbox restriction — network, not filesystem).
     return tool_curl(arg1);
   }
   if (op == "TOOL:RUN") {
     std::string prog = resolve(arg1);
     if (!path_in_sandbox(sandbox, prog)) return "ERROR: path outside sandbox";
-    // Enforce allowlist if one is configured.
     if (!run_allowed.empty()) {
       std::string basename = fs::path(prog).filename().string();
       bool permitted = std::any_of(run_allowed.begin(), run_allowed.end(),
@@ -1788,7 +1817,7 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
     "## Tool protocol\n"
     "Emit tool calls on their own line. The host executes them and returns\n"
     "TOOL_RESULT: <value> on the next line.\n\n"
-    "Available tools:\n"
+    "## Available tools:\n"
     "  TOOL:LIST   [dir]          list files (default: sandbox root)\n"
     "  TOOL:READ   <file>         read file contents\n"
     "  TOOL:WRITE  <file> <text>  write text to file\n"
@@ -1799,12 +1828,25 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
     "  TOOL:RND                   random float\n"
     "  TOOL:PERMISSION            ask user for explicit permission\n"
     "  TOOL:CURL   <url>          HTTP GET; returns response body (max 32 KB)\n\n"
-    "Rules:\n"
+    "## Rules:\n"
     "- Never access files outside the sandbox.\n"
-    "- Use TOOL:PERMISSION before destructive or irreversible operations.\n"
+    "- Use TOOL:PERMISSION when you're about to modify files, delete data, or run external programs.\n"
     "- Use TOOL:CURL to fetch documentation, APIs, or web content you need.\n"
     "- Reason step-by-step inside <|think|>…</|think|> (hidden from user).\n"
-    "- After each tool call, explain what you did in plain English.\n\n";
+    "- After each tool call, explain what you did in plain English.\n"
+    "Ask the user for explicit permission before proceeding.\n\n"
+    "## File Reading\n"
+    "When you read a file with TOOL:READ, you MUST:\n"
+    "1. Acknowledge what you found\n"
+    "2. Use that information in your response\n"
+    "3. If the file contains code, explain it or show relevant parts\n"
+    "4. If the file contains documentation, summarize key points\n"
+    "## Tool Result Integration\n"
+    "When you see TOOL_RESULT in the conversation, it contains tool output.\n"
+    "Use it to:\n"
+    "- Answer questions based on file contents\n"
+    "- Explain code or configuration\n"
+    "- Provide accurate information from the file\n";
   for (const auto &kf : knowledge_files) {
     std::ifstream f(kf);
     if (!f) continue;
@@ -1831,8 +1873,8 @@ static void handle_slash(const std::string &input,
 
   if (verb == "/help") {
     tui.append_line("[sys] Commands:");
-    tui.append_line("[sys]   /model  <path>           load a GGUF model");
-    tui.append_line("[sys]   /embed  <path>           load an embedding model for RAG");
+    tui.append_line("[sys]   /model  [path]           load a GGUF model (picker if no path)");
+    tui.append_line("[sys]   /embed  [path]           load an embedding model (picker if no path)");
     tui.append_line("[sys]   /rag    [path]           index file or directory (picker if no path)");
     tui.append_line("[sys]   /memory                  KV / VRAM / layer stats");
     tui.append_line("[sys]   /clear                   reset conversation");
@@ -1847,11 +1889,19 @@ static void handle_slash(const std::string &input,
     tui.redraw_all();
     return;
   }
-
+  // ── /model ──────────────────────────────────────────────────────────────
+  // If no path is given, open the file picker so the user can browse to a
+  // GGUF.  The picker starts in the current sandbox directory.
   if (verb == "/model") {
     if (rest.empty()) {
-      tui.append_line("[err] Usage: /model <path-to-gguf>");
-      tui.redraw_all(); return;
+      tui.append_line("[sys] Opening model picker…");
+      tui.redraw_all();
+      rest = tui.file_picker(cfg.sandbox, "Model File");
+      if (rest.empty()) {
+        tui.append_line("[sys] /model cancelled.");
+        tui.redraw_all();
+        return;
+      }
     }
     cfg.model_path = rest;
     if (agent.setup_model(cfg, tui)) {
@@ -1863,10 +1913,19 @@ static void handle_slash(const std::string &input,
     return;
   }
 
+  // ── /embed ──────────────────────────────────────────────────────────────
+  // If no path is given, open the file picker so the user can browse to an
+  // embedding GGUF.
   if (verb == "/embed") {
     if (rest.empty()) {
-      tui.append_line("[err] Usage: /embed <path-to-gguf>");
-      tui.redraw_all(); return;
+      tui.append_line("[sys] Opening embedding model picker…");
+      tui.redraw_all();
+      rest = tui.file_picker(cfg.sandbox, "Embedding Model");
+      if (rest.empty()) {
+        tui.append_line("[sys] /embed cancelled.");
+        tui.redraw_all();
+        return;
+      }
     }
     cfg.embed_path = rest;
     if (agent.setup_embed(rest, tui)) {
@@ -1875,6 +1934,7 @@ static void handle_slash(const std::string &input,
     return;
   }
 
+  // ── /rag ────────────────────────────────────────────────────────────────
   if (verb == "/rag") {
     std::string path = rest;
     if (path.empty()) {
@@ -1943,8 +2003,7 @@ static void handle_slash(const std::string &input,
     }
 
     bool ok = true;
-    bool needs_reparam = false; // whether to re-apply generation params
-
+    bool needs_reparam = false;
     try {
       if (key == "temperature")    { cfg.temperature    = std::stof(val); needs_reparam = true; }
       else if (key == "top_p")     { cfg.top_p          = std::stof(val); needs_reparam = true; }
@@ -2002,8 +2061,6 @@ static void handle_slash(const std::string &input,
 // Welcome banner  — colourful multi-line ASCII logo
 // ═══════════════════════════════════════════════════════════════════════════
 static void welcome(TuiState &tui, const std::string &sandbox) {
-  // Logo lines use the "[logo_N]" prefix so redraw_chat applies a
-  // per-row cyan→magenta gradient (N = 0-6 maps to the gradient table).
   tui.append_line("");
   tui.append_line("[logo_0]  ███╗   ██╗██╗████████╗██████╗  ██████╗ ");
   tui.append_line("[logo_1]  ████╗  ██║██║╚══██╔══╝██╔══██╗██╔═══██╗");
@@ -2025,9 +2082,8 @@ static void welcome(TuiState &tui, const std::string &sandbox) {
 int main(int argc, char **argv) {
   // ── Load persisted settings first (provides defaults) ────────────
   NitroConfig cfg;
-  load_settings(cfg);   // silently no-ops if ~/.config/nitro.settings.json absent
-
   // ── Parse arguments (command-line overrides saved settings) ──────
+  load_settings(cfg);
   auto resolve_path = [](const std::string &arg) -> std::string {
     std::error_code ec;
     if (arg.substr(0, 2) == "~/") {
@@ -2068,9 +2124,9 @@ int main(int argc, char **argv) {
                 "Settings are persisted to ~/.config/nitro.settings.json.\n"
                 "\n"
                 "Slash commands inside nitro:\n"
-                "  /model  <path>           load / hot-reload a GGUF\n"
-                "  /embed  <path>           load an embedding model\n"
-                "  /rag    [path]           index file or directory (picker if no path)\n"
+                "  /model  [path]           load / hot-reload a GGUF (picker if no path)\n"
+                "  /embed  [path]           load an embedding model  (picker if no path)\n"
+                "  /rag    [path]           index file or directory  (picker if no path)\n"
                 "  /memory                  KV / VRAM / layer stats\n"
                 "  /settings                show current settings\n"
                 "  /clear                   reset conversation\n"
@@ -2117,7 +2173,8 @@ int main(int argc, char **argv) {
     if (!cfg.embed_path.empty())
       agent.setup_embed(cfg.embed_path, tui);
   } else {
-    tui.append_line("[sys] No model specified.  Use /model <path> to load one.");
+    tui.append_line("[sys] No model specified.  Use /model to open the file picker,");
+    tui.append_line("[sys] or /model <path> to load directly.");
     tui.append_line("[sys] Example: /model ~/models/qwen2.5-7b-q4_k_m.gguf");
     tui.redraw_all();
   }

From 2a46e0955de12067fecdbcd2c53b505b2f32a48f Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Sat, 23 May 2026 19:04:58 +0930
Subject: [PATCH 37/54] LLAMA: nitro agent - wip

---
 llama/nitro.cpp | 169 +++++++++++++++++++++++++++++-------------------
 1 file changed, 104 insertions(+), 65 deletions(-)

diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index b35e10e..ece71cb 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -38,7 +38,8 @@
 //   TOOL:CURL   <url>
 //
 // Copyright (C) 2026 Chris Warren-Smith  —  GPLv2 or later
-// ─── Standard library ────────────────────────────────────────────────────────
+//
+
 #include <algorithm>
 #include <chrono>
 #include <ctime>
@@ -50,14 +51,13 @@
 #include <sstream>
 #include <string>
 #include <vector>
-// ─── curl ─────────────────────────────────────────────────────────────────────
 #include <curl/curl.h>
-// ─── Integration layer (sole llama.cpp dependency for nitro) ─────────────────
 #include "llama-sb.h"
 #include "llama-sb-rag.h"
-// ─── TUI ─────────────────────────────────────────────────────────────────────
 #include <notcurses/notcurses.h>
+
 namespace fs = std::filesystem;
+
 // ═══════════════════════════════════════════════════════════════════════════
 // Forward declarations
 // ═══════════════════════════════════════════════════════════════════════════
@@ -75,6 +75,7 @@ static std::string  process_tool(const std::string &line, const std::string &san
                                  TuiState &tui);
 static std::string  build_system_prompt(const std::vector<std::string> &knowledge_files,
                                         const std::string &sandbox);
+
 // ─── RAG indexing ─────────────────────────────────────────────────────────────
 static constexpr int BATCH_SIZE = 512;
 
@@ -448,6 +449,7 @@ struct TuiState {
   // ── input ─────────────────────────────────────────────────────────
   std::string input_buf;
   size_t      cursor_pos = 0;
+  bool        mouse_mode = true;
   // ── status bar values ─────────────────────────────────────────────
   std::string current_model  = "none";
   float       tokens_per_sec = 0.0f;
@@ -489,6 +491,7 @@ struct TuiState {
   // handle — or just use the paired helpers below.
   struct ncplane *modal_plane = nullptr;
   void show_modal_popup(const std::string &message);
+  void show_help();
   void dismiss_modal_popup();
   // ── RAG folder picker popup ───────────────────────────────────────
   // Presents an interactive directory browser to let the user choose a
@@ -555,7 +558,7 @@ void TuiState::init() {
   ncplane_set_base(inputpl, " ", 0,
                    NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B,
                                           BG_INP_R, BG_INP_G, BG_INP_B));
-  notcurses_mice_enable(nc, NCMICE_ALL_EVENTS);
+  notcurses_mice_enable(nc, NCMICE_BUTTON_EVENT);
   redraw_all();
 }
 void TuiState::destroy() {
@@ -784,6 +787,24 @@ void TuiState::show_modal_popup(const std::string &message) {
   notcurses_render(nc);
 }
 
+void TuiState::show_help() {
+  append_line("[sys] Commands:");
+  append_line("[sys]   /model  [path]           load a GGUF model (picker if no path)");
+  append_line("[sys]   /embed  [path]           load an embedding model (picker if no path)");
+  append_line("[sys]   /rag    [path]           index file or directory (picker if no path)");
+  append_line("[sys]   /memory                  KV / VRAM / layer stats");
+  append_line("[sys]   /clear                   reset conversation");
+  append_line("[sys]   /settings                show current settings");
+  append_line("[sys]   /set    <key> <value>    change a setting live");
+  append_line("[sys]   /help                    this message");
+  append_line("[sys]   exit / quit              exit Nitro");
+  append_line("[sys] Settable keys (via /set):");
+  append_line("[sys]   temperature  top_p  top_k  min_p  penalty_repeat");
+  append_line("[sys]   n_max_tokens  penalty_last_n  rag_top_k  n_gpu_layers");
+  append_line("[sys]   run_allowed  (comma-separated list, e.g. python3,make)");
+  redraw_all();
+}
+
 void TuiState::dismiss_modal_popup() {
   if (modal_plane) {
     ncplane_destroy(modal_plane);
@@ -1052,7 +1073,6 @@ std::string TuiState::readline_blocking() {
       return result;
     }
 
-
     if (ni.id == NCKEY_UP) {
       // Entering history from a fresh prompt: save current text as draft.
       std::string hist_entry;
@@ -1107,12 +1127,25 @@ std::string TuiState::readline_blocking() {
       notcurses_render(nc);
       continue;
     }
-    if (ni.id == NCKEY_SCROLL_DOWN && scroll_offset > 0) {
+    if (ni.id == NCKEY_SCROLL_DOWN && scroll_offset > 1) {
       scroll_offset -= 1;
       redraw_chat();
       notcurses_render(nc);
       continue;
     }
+    if (ni.id == NCKEY_F01) {
+      show_help();
+      continue;
+    }
+    if (ni.id == NCKEY_F02) {
+      mouse_mode = !mouse_mode;
+      if (mouse_mode) {
+        notcurses_mice_enable(nc, NCMICE_BUTTON_EVENT);
+      } else {
+        notcurses_mice_disable(nc);
+      }
+      continue;
+    }
     if (ni.id == NCKEY_BACKSPACE || ni.id == 127) {
       if (cursor_pos > 0) { input_buf.erase(cursor_pos - 1, 1); --cursor_pos; }
     } else if (ni.id == NCKEY_LEFT) {
@@ -1294,6 +1327,40 @@ bool AgentState::rag_index(const std::string &path, TuiState &tui) {
   return true;
 }
 
+// ═══════════════════════════════════════════════════════════════════════════
+// Think-tag filtering
+// ═══════════════════════════════════════════════════════════════════════════
+// Strips everything between (and including) <think>…</think> or the
+// <|think|>…</|think|> variant from a completed line/buffer.
+// Also strips any bare close-tag that appears without a matching open-tag
+// (can happen when the open was in a previous chunk already consumed).
+// Returns the visible text that should be shown to the user.
+std::string filter_think_tags(const std::string &text) {
+  static const struct { const char *open; const char *close; } PAIRS[] = {
+    { "<think>",   "</think>"   },
+    { "<|think|>", "</|think|>" },
+  };
+  std::string out = text;
+  for (auto &p : PAIRS) {
+    std::string open(p.open), close(p.close);
+    for (;;) {
+      auto ob = out.find(open);
+      if (ob == std::string::npos) break;
+      auto ce = out.find(close, ob);
+      if (ce == std::string::npos) {
+        out.erase(ob);   // no closing tag — strip to end
+        break;
+      }
+      out.erase(ob, ce + close.size() - ob);
+    }
+    // Strip orphan close-tags (open tag was in an earlier chunk).
+    size_t pos = 0;
+    while ((pos = out.find(close, pos)) != std::string::npos)
+      out.erase(pos, close.size());
+  }
+  return out;
+}
+
 // ═══════════════════════════════════════════════════════════════════════════
 // Agent turn
 // ═══════════════════════════════════════════════════════════════════════════
@@ -1324,29 +1391,32 @@ bool AgentState::run_turn(const std::string &user_message,
     return false;
   }
   tui.append_line("Nitro: ");
-  tui.set_thinking(true);
-  bool in_think = true;
+  // in_think starts false — models that don't use <think> blocks emit
+  // visible text immediately.  The spinner activates only while thinking.
+  bool in_think = false;
+  tui.set_thinking(false);
   std::string buffer;
-  auto update_think_state = [&](const std::string &text) {
-    if (text.find("<think>")    != std::string::npos ||
-        text.find("<|think|>")  != std::string::npos)  in_think = true;
-    if (text.find("</think>")   != std::string::npos ||
-        text.find("</|think|>") != std::string::npos)  in_think = false;
-  };
-  auto remove_substr = [](std::string str, const std::string& toRemove) {
-    size_t pos = str.find(toRemove);
-    while (pos != std::string::npos) {
-      str.erase(pos, toRemove.length());
-      pos = str.find(toRemove, pos);
-    }
-    return str;
-  };
-
   while (iter->_has_next) {
     std::string tok = llama->next(*iter);
-    tui.tick_spinner();
-    update_think_state(tok);
     buffer += tok;
+    // Detect think-tag transitions on the accumulated buffer so we never
+    // miss a tag that was split across two tokens.
+    bool was_thinking = in_think;
+    if (!in_think) {
+      if (buffer.find("<think>")   != std::string::npos ||
+          buffer.find("<|think|>") != std::string::npos)
+        in_think = true;
+    }
+    if (in_think) {
+      if (buffer.find("</think>")   != std::string::npos ||
+          buffer.find("</|think|>") != std::string::npos)
+        in_think = false;
+    }
+    // Update spinner: animate only while in a think block.
+    if (in_think || was_thinking) {
+      tui.set_thinking(in_think);
+      if (in_think) tui.tick_spinner();
+    }
     auto nl = buffer.find('\n');
     if (nl != std::string::npos) {
       std::string text_line = buffer.substr(0, nl);
@@ -1408,13 +1478,10 @@ bool AgentState::run_turn(const std::string &user_message,
         }
         buffer.clear();
       } else if (!in_think) {
-        text_line = remove_substr(text_line, "</think>");
-        text_line = remove_substr(text_line, "</|think|>");
-        tui.append_token(text_line + "\n");
+        tui.append_token(filter_think_tags(text_line) + "\n");
       }
     }
   }
-
   if (!buffer.empty()) {
     std::string trimmed = buffer;
     trimmed.erase(0, trimmed.find_first_not_of(" \t"));
@@ -1425,7 +1492,7 @@ bool AgentState::run_turn(const std::string &user_message,
       std::string tool_result_msg = "TOOL_RESULT: " + result;
       llama->add_message(*iter, "user", tool_result_msg);
     } else if (!in_think) {
-      tui.append_token(buffer);
+      tui.append_token(filter_think_tags(buffer));
     }
   }
   tui.flush_token_acc();
@@ -1817,7 +1884,7 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
     "## Tool protocol\n"
     "Emit tool calls on their own line. The host executes them and returns\n"
     "TOOL_RESULT: <value> on the next line.\n\n"
-    "## Available tools:\n"
+    "Available tools:\n"
     "  TOOL:LIST   [dir]          list files (default: sandbox root)\n"
     "  TOOL:READ   <file>         read file contents\n"
     "  TOOL:WRITE  <file> <text>  write text to file\n"
@@ -1828,25 +1895,12 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
     "  TOOL:RND                   random float\n"
     "  TOOL:PERMISSION            ask user for explicit permission\n"
     "  TOOL:CURL   <url>          HTTP GET; returns response body (max 32 KB)\n\n"
-    "## Rules:\n"
+    "Rules:\n"
     "- Never access files outside the sandbox.\n"
-    "- Use TOOL:PERMISSION when you're about to modify files, delete data, or run external programs.\n"
+    "- Use TOOL:PERMISSION before destructive or irreversible operations.\n"
     "- Use TOOL:CURL to fetch documentation, APIs, or web content you need.\n"
     "- Reason step-by-step inside <|think|>…</|think|> (hidden from user).\n"
-    "- After each tool call, explain what you did in plain English.\n"
-    "Ask the user for explicit permission before proceeding.\n\n"
-    "## File Reading\n"
-    "When you read a file with TOOL:READ, you MUST:\n"
-    "1. Acknowledge what you found\n"
-    "2. Use that information in your response\n"
-    "3. If the file contains code, explain it or show relevant parts\n"
-    "4. If the file contains documentation, summarize key points\n"
-    "## Tool Result Integration\n"
-    "When you see TOOL_RESULT in the conversation, it contains tool output.\n"
-    "Use it to:\n"
-    "- Answer questions based on file contents\n"
-    "- Explain code or configuration\n"
-    "- Provide accurate information from the file\n";
+    "- After each tool call, explain what you did in plain English.\n\n";
   for (const auto &kf : knowledge_files) {
     std::ifstream f(kf);
     if (!f) continue;
@@ -1872,21 +1926,7 @@ static void handle_slash(const std::string &input,
   }
 
   if (verb == "/help") {
-    tui.append_line("[sys] Commands:");
-    tui.append_line("[sys]   /model  [path]           load a GGUF model (picker if no path)");
-    tui.append_line("[sys]   /embed  [path]           load an embedding model (picker if no path)");
-    tui.append_line("[sys]   /rag    [path]           index file or directory (picker if no path)");
-    tui.append_line("[sys]   /memory                  KV / VRAM / layer stats");
-    tui.append_line("[sys]   /clear                   reset conversation");
-    tui.append_line("[sys]   /settings                show current settings");
-    tui.append_line("[sys]   /set    <key> <value>    change a setting live");
-    tui.append_line("[sys]   /help                    this message");
-    tui.append_line("[sys]   exit / quit              exit Nitro");
-    tui.append_line("[sys] Settable keys (via /set):");
-    tui.append_line("[sys]   temperature  top_p  top_k  min_p  penalty_repeat");
-    tui.append_line("[sys]   n_max_tokens  penalty_last_n  rag_top_k  n_gpu_layers");
-    tui.append_line("[sys]   run_allowed  (comma-separated list, e.g. python3,make)");
-    tui.redraw_all();
+    tui.show_help();
     return;
   }
   // ── /model ──────────────────────────────────────────────────────────────
@@ -2111,8 +2151,7 @@ int main(int argc, char **argv) {
     } else if (a == "-g" || a == "--gpu-layers") {
       cfg.n_gpu_layers = std::stoi(take_next(a.c_str()));
     } else if (a == "-h" || a == "--help") {
-      std::puts(
-                "Usage: nitro [options] [project_dir]\n"
+      std::puts("Usage: nitro [options] [project_dir]\n"
                 "\n"
                 "Options:\n"
                 "  -m, --model  <path>      GGUF model to load on startup\n"

From 17a6e5fa728bc36baa55acdab22052c014e2b351 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Sun, 24 May 2026 09:15:22 +0930
Subject: [PATCH 38/54] LLAMN: added nitro introspection tool

---
 llama/nitro.cpp | 88 ++++++++++++++++++++++++++++++++-----------------
 1 file changed, 58 insertions(+), 30 deletions(-)

diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index ece71cb..2fed672 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -70,11 +70,8 @@ static bool         write_file(const std::string &path, const std::string &data)
 static std::string  list_dir(const std::string &path);
 static bool         path_in_sandbox(const std::string &sandbox, const std::string &path);
 static std::string  strip_code_fences(const std::string &filename, const std::string &src);
-static std::string  process_tool(const std::string &line, const std::string &sandbox,
-                                 const std::vector<std::string> &run_allowed,
-                                 TuiState &tui);
-static std::string  build_system_prompt(const std::vector<std::string> &knowledge_files,
-                                        const std::string &sandbox);
+static std::string  process_tool(const std::string &line, const NitroConfig &cfg, TuiState &tui);
+static std::string  build_system_prompt(const std::vector<std::string> &knowledge_files, const std::string &sandbox);
 
 // ─── RAG indexing ─────────────────────────────────────────────────────────────
 static constexpr int BATCH_SIZE = 512;
@@ -388,6 +385,41 @@ static std::string json_escape(const std::string &s) {
   return out;
 }
 
+static std::string introspect(const NitroConfig &cfg) {
+  static constexpr std::string_view tmpl =
+    "{{\n"
+    "  \"model_path\":     \"{}\",\n"
+    "  \"embed_path\":     \"{}\",\n"
+    "  \"sandbox\":        \"{}\",\n"
+    "  \"n_ctx\":          {},\n"
+    "  \"n_batch\":        {},\n"
+    "  \"n_gpu_layers\":   {},\n"
+    "  \"n_max_tokens\":   {},\n"
+    "  \"temperature\":    {},\n"
+    "  \"top_p\":          {},\n"
+    "  \"min_p\":          {},\n"
+    "  \"top_k\":          {},\n"
+    "  \"penalty_repeat\": {},\n"
+    "  \"penalty_last_n\": {},\n"
+    "  \"rag_top_k\":      {}\n"
+    "}}\n";
+  return std::format(tmpl,
+                     cfg.model_path,
+                     cfg.embed_path,
+                     cfg.sandbox,
+                     cfg.n_ctx,
+                     cfg.n_batch,
+                     cfg.n_gpu_layers,
+                     cfg.n_max_tokens,
+                     cfg.temperature,
+                     cfg.top_p,
+                     cfg.min_p,
+                     cfg.top_k,
+                     cfg.penalty_repeat,
+                     cfg.penalty_last_n,
+                     cfg.rag_top_k);
+}
+
 // Persist the current cfg to ~/.config/nitro.settings.json.
 static bool save_settings(const NitroConfig &cfg) {
   std::string path = settings_path();
@@ -397,24 +429,11 @@ static bool save_settings(const NitroConfig &cfg) {
   fs::create_directories(dir, ec);
 
   std::ofstream f(path, std::ios::trunc);
-  if (!f) return false;
+  if (!f) {
+    return false;
+  }
 
-  f << "{\n";
-  f << "  \"model_path\":    \"" << json_escape(cfg.model_path)  << "\",\n";
-  f << "  \"embed_path\":    \"" << json_escape(cfg.embed_path)   << "\",\n";
-  f << "  \"sandbox\":       \"" << json_escape(cfg.sandbox)      << "\",\n";
-  f << "  \"n_ctx\":          " << cfg.n_ctx          << ",\n";
-  f << "  \"n_batch\":        " << cfg.n_batch         << ",\n";
-  f << "  \"n_gpu_layers\":   " << cfg.n_gpu_layers    << ",\n";
-  f << "  \"n_max_tokens\":   " << cfg.n_max_tokens    << ",\n";
-  f << "  \"temperature\":    " << cfg.temperature     << ",\n";
-  f << "  \"top_p\":          " << cfg.top_p           << ",\n";
-  f << "  \"min_p\":          " << cfg.min_p           << ",\n";
-  f << "  \"top_k\":          " << cfg.top_k           << ",\n";
-  f << "  \"penalty_repeat\": " << cfg.penalty_repeat  << ",\n";
-  f << "  \"penalty_last_n\": " << cfg.penalty_last_n  << ",\n";
-  f << "  \"rag_top_k\":      " << cfg.rag_top_k       << "\n";
-  f << "}\n";
+  f << introspect(cfg);
 
   return f.good();
 }
@@ -1195,6 +1214,7 @@ struct AgentState {
   std::string memory_info_text();
   float tokens_per_sec() const;
 };
+
 void AgentState::apply_generation_params(const NitroConfig &cfg) {
   llama->add_stop("<|turn|>");
   llama->add_stop("<|im_end|>");
@@ -1461,7 +1481,7 @@ bool AgentState::run_turn(const std::string &user_message,
         tui.append_line("[tool] " + op + " " + arg1 +
                         (op == "TOOL:WRITE" ? " <content>" : ""));
         tui.redraw_all();
-        std::string result = process_tool(tool_line, cfg.sandbox, cfg.run_allowed, tui);
+        std::string result = process_tool(tool_line, cfg, tui);
         tui.append_line("[tool] → " +
                         result.substr(0, 200) + (result.size() > 200 ? "…" : ""));
         tui.redraw_all();
@@ -1486,7 +1506,7 @@ bool AgentState::run_turn(const std::string &user_message,
     std::string trimmed = buffer;
     trimmed.erase(0, trimmed.find_first_not_of(" \t"));
     if (trimmed.substr(0, 5) == "TOOL:") {
-      std::string result = process_tool(trimmed, cfg.sandbox, cfg.run_allowed, tui);
+      std::string result = process_tool(trimmed, cfg, tui);
       tui.append_line("[tool] → " + result.substr(0, 200));
       tui.redraw_all();
       std::string tool_result_msg = "TOOL_RESULT: " + result;
@@ -1777,10 +1797,10 @@ static std::string tool_curl(const std::string &url) {
 // ═══════════════════════════════════════════════════════════════════════════
 // Tool dispatch
 // ═══════════════════════════════════════════════════════════════════════════
-static std::string process_tool(const std::string &cmd,
-                                const std::string &sandbox,
-                                const std::vector<std::string> &run_allowed,
-                                TuiState &tui) {
+static std::string process_tool(const std::string &cmd, const NitroConfig &cfg, TuiState &tui) {
+  const std::string &sandbox = cfg.sandbox;
+  const std::vector<std::string> &run_allowed = cfg.run_allowed;
+
   std::string op, arg1, arg2;
   auto sp1 = cmd.find(' ');
   if (sp1 == std::string::npos) {
@@ -1848,6 +1868,9 @@ static std::string process_tool(const std::string &cmd,
   if (op == "TOOL:CURL") {
     return tool_curl(arg1);
   }
+  if (op == "TOOL:INTROSPECT") {
+    return introspect(cfg);
+  }
   if (op == "TOOL:RUN") {
     std::string prog = resolve(arg1);
     if (!path_in_sandbox(sandbox, prog)) return "ERROR: path outside sandbox";
@@ -1894,6 +1917,7 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
     "  TOOL:TIME                  current time\n"
     "  TOOL:RND                   random float\n"
     "  TOOL:PERMISSION            ask user for explicit permission\n"
+    "  TOOL:INTROSPECT            introspect your settings, top_k etc\n"
     "  TOOL:CURL   <url>          HTTP GET; returns response body (max 32 KB)\n\n"
     "Rules:\n"
     "- Never access files outside the sandbox.\n"
@@ -2084,8 +2108,9 @@ static void handle_slash(const std::string &input,
     }
 
     if (ok) {
-      if (needs_reparam && agent.model_loaded)
+      if (needs_reparam && agent.model_loaded) {
         agent.apply_generation_params(cfg);
+      }
       save_settings(cfg);
       tui.append_line("[sys] " + key + " = " + val);
     }
@@ -2185,7 +2210,10 @@ int main(int argc, char **argv) {
     std::error_code ec;
     cfg.sandbox = fs::current_path(ec).string();
   }
-  { std::error_code ec; fs::create_directories(cfg.sandbox, ec); }
+  {
+    std::error_code ec;
+    fs::create_directories(cfg.sandbox, ec);
+  }
 
   // ── Auto-discover knowledge files ─────────────────────────────────
   for (const char *kf : {"nitro.md", "AGENTS.md", "README.md"}) {

From 4737d150624dcf18c21877a829da078a0c3e28e5 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Mon, 25 May 2026 20:02:02 +0930
Subject: [PATCH 39/54] LLAMA: fix nitro tool handling

---
 llama/llama-sb.cpp |  54 ++++---
 llama/llama-sb.h   |  26 ++--
 llama/nitro.cpp    | 367 +++++++++++++++++++++++++--------------------
 3 files changed, 250 insertions(+), 197 deletions(-)

diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index 6a74c69..a4ad885 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -61,8 +61,9 @@ Llama::Llama() :
   _max_tokens(0),
   _log_level(GGML_LOG_LEVEL_CONT),
   _n_gpu_layers(0),
-  _n_past(0),
+  _n_system_tokens(0),
   _is_gemma4(false),
+  _sampler_dirty(false),
   _seed(LLAMA_DEFAULT_SEED) {
   llama_log_set([](enum ggml_log_level level, const char *text, void *user_data) {
     Llama *llama = (Llama *)user_data;
@@ -99,8 +100,9 @@ Llama::Llama(Llama &&other) noexcept
   , _max_tokens(other._max_tokens)
   , _log_level(other._log_level)
   , _n_gpu_layers(other._n_gpu_layers)
-  , _n_past(other._n_past)
+  , _n_system_tokens(other._n_system_tokens)
   , _is_gemma4(other._is_gemma4)
+  , _sampler_dirty(other._sampler_dirty)
   , _seed(other._seed) {
 }
 
@@ -129,8 +131,9 @@ void Llama::reset() {
   _top_p = 1.0f;
   _min_p = 0.0f;
   _max_tokens = 150;
-  _n_past = 0;
+  _n_system_tokens = 0;
   _seed = LLAMA_DEFAULT_SEED;
+  _sampler_dirty = true;
   if (_ctx) {
     llama_memory_clear(llama_get_memory(_ctx), true);
   }
@@ -210,13 +213,13 @@ bool Llama::load_embedding_model(string model_path) {
 void Llama::set_grammar(const string &src, const string &root) {
   _grammar_src = src;
   _grammar_root = root;
+  dirty();
 }
 
 bool Llama::add_message(LlamaIter &iter, const string &role, const string &content) {
   llama_chat_message message = {role.c_str(), content.c_str()};
   int buf_size = 2 * (int)(role.size() + content.size() + 64);
   vector<char> buf(buf_size);
-  bool add_ass = (role == "user" || role == "tool");
   int32_t n = 0;
 
   if (_template.empty()) {
@@ -225,14 +228,18 @@ bool Llama::add_message(LlamaIter &iter, const string &role, const string &conte
   }
 
   if (_is_gemma4) {
-    string str = "<|turn>" + role + "\n" + content + "<turn|>\n";
-    if (add_ass) {
-      str += "<|turn>model\n";
+    // see: https://ai.google.dev/gemma/docs/core/prompt-formatting-gemma4
+    string str;
+    if (role == "system") {
+      str = "<|turn>system\n<|think|>" + content + "<turn|>\n";
+    } else {
+      str = "<|turn>" + role + "\n" + content + "<turn|>\n";
     }
     n = str.size();
     buf.assign(str.begin(), str.end());
     buf.push_back('\0');
   } else {
+    bool add_ass = (role == "user" || role == "tool" || role == "tool_result");
     n = llama_chat_apply_template(_template.c_str(), &message, 1, add_ass, buf.data(), buf_size);
     if (n < 0) {
       _last_error = "No chat template no supported";
@@ -244,8 +251,12 @@ bool Llama::add_message(LlamaIter &iter, const string &role, const string &conte
   }
   string prompt(buf.data(), n);
 
-  if (!configure_sampler()) {
-    return false;
+  if (_sampler_dirty) {
+    // avoid wasteful rebuild
+    if (!configure_sampler()) {
+      return false;
+    }
+    _sampler_dirty = false;
   }
 
   vector<llama_token> prompt_tokens = tokenize(prompt);
@@ -253,7 +264,12 @@ bool Llama::add_message(LlamaIter &iter, const string &role, const string &conte
     return false;
   }
 
-  if (!make_space_for_tokens(prompt_tokens.size(), _n_past)) {
+  if (role == "system") {
+    // always retain system tokens
+    _n_system_tokens = prompt_tokens.size();
+  }
+
+  if (!make_space_for_tokens(prompt_tokens.size())) {
     return false;
   }
 
@@ -278,7 +294,6 @@ bool Llama::add_message(LlamaIter &iter, const string &role, const string &conte
     }
   }
 
-  _n_past += prompt_tokens.size();
   iter._t_start = std::chrono::high_resolution_clock::now();
   iter._llama = this;
   iter._has_next = true;
@@ -326,7 +341,6 @@ string Llama::all(LlamaIter &iter) {
 
     // end-of-generation check
     if (llama_vocab_is_eog(_vocab, tok)) {
-      iter._has_next = false;
       break;
     }
 
@@ -342,6 +356,9 @@ string Llama::all(LlamaIter &iter) {
     }
   }
 
+  // tokens exhausted - call add_message to continue
+  iter._has_next = false;
+
   // detokenize sequentially
   if (!decoded.empty()) {
     for (llama_token tok : decoded) {
@@ -407,8 +424,8 @@ bool Llama::batch_decode_tokens(vector<llama_token> &tokens) {
     llama_batch batch = llama_batch_get_one(tokens.data() + i, batch_size);
     int result = llama_decode(_ctx, batch);
     if (result != 0) {
-      _last_error = std::format("Failed to decode batch. position:{} error:{} [size:{}, past:{}]",
-                                i, result, tokens.size(), _n_past);
+      _last_error = std::format("Failed to decode batch. position:{} error:{} [size:{}]",
+                                i, result, tokens.size());
       return false;
     }
   }
@@ -507,9 +524,8 @@ bool Llama::ends_with_sentence_boundary(const string &text) {
 //
 // Parameters:
 //   n_tokens  - Number of tokens we need space for
-//   keep_min  - Minimum tokens to keep (e.g., system prompt), default 0
 //
-bool Llama::make_space_for_tokens(int n_tokens, int keep_min) {
+bool Llama::make_space_for_tokens(int n_tokens) {
   int n_ctx = llama_n_ctx(_ctx);
   if (n_tokens > n_ctx) {
     _last_error = "Too many tokens, increase context size (n_ctx)";
@@ -539,10 +555,10 @@ bool Llama::make_space_for_tokens(int n_tokens, int keep_min) {
   // Calculate how many tokens to remove
   int tokens_to_remove = space_needed - space_available;
 
-  // Can't remove more than we have (minus keep_min)
-  int removable = current_used - keep_min;
+  // Can't remove more than we have (minus _n_system_tokens)
+  int removable = current_used - _n_system_tokens;
   if (tokens_to_remove > removable) {
-    _last_error = "Can't make enough space while keeping keep_min tokens";
+    _last_error = "Can't make enough space while keeping num_system_tokens tokens";
     return false;
   }
 
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
index 0ca3998..43b7c7c 100644
--- a/llama/llama-sb.h
+++ b/llama/llama-sb.h
@@ -81,17 +81,17 @@ struct Llama {
   // generation parameters
   void add_stop(const char *stop) { _stop_sequences.push_back(stop); }
   void clear_stops() { _stop_sequences.clear(); }
-  void set_penalty_last_n(int32_t penalty_last_n) { _penalty_last_n = penalty_last_n; }
-  void set_penalty_repeat(float penalty_repeat) { _penalty_repeat = penalty_repeat; }
-  void set_penalty_freq(float penalty_freq) { _penalty_freq = penalty_freq; }
-  void set_penalty_present(float penalty_present) { _penalty_present = penalty_present; }
-  void set_max_tokens(int max_tokens) { _max_tokens = max_tokens; }
-  void set_min_p(float min_p) { _min_p = min_p; }
-  void set_temperature(float temperature) { _temperature = temperature; }
-  void set_top_k(int top_k) { _top_k = top_k; }
-  void set_top_p(float top_p) { _top_p = top_p; }
+  void set_penalty_last_n(int32_t penalty_last_n) { _penalty_last_n = penalty_last_n; dirty(); }
+  void set_penalty_repeat(float penalty_repeat) { _penalty_repeat = penalty_repeat; dirty(); }
+  void set_penalty_freq(float penalty_freq) { _penalty_freq = penalty_freq; dirty(); }
+  void set_penalty_present(float penalty_present) { _penalty_present = penalty_present; dirty(); }
+  void set_max_tokens(int max_tokens) { _max_tokens = max_tokens; dirty(); }
+  void set_min_p(float min_p) { _min_p = min_p; dirty(); }
+  void set_temperature(float temperature) { _temperature = temperature; dirty(); }
+  void set_top_k(int top_k) { _top_k = top_k; dirty(); }
+  void set_top_p(float top_p) { _top_p = top_p; dirty(); }
   void set_grammar(const string &src, const string &root);
-  void set_seed(unsigned int seed) { _seed = seed; }
+  void set_seed(unsigned int seed) { _seed = seed; dirty(); }
 
   // error handling
   const char *last_error() { return _last_error.c_str(); }
@@ -110,8 +110,9 @@ struct Llama {
   private:
   bool batch_decode_tokens(vector<llama_token> &tokens);
   bool configure_sampler();
+  void dirty() {_sampler_dirty = true; }
   bool ends_with_sentence_boundary(const string &out);
-  bool make_space_for_tokens(int n_tokens, int keep_min);
+  bool make_space_for_tokens(int n_tokens);
   vector<llama_token> tokenize(const string &prompt);
   string token_to_string(LlamaIter &iter, llama_token tok);
   void set_last_error(const char *message);
@@ -136,7 +137,8 @@ struct Llama {
   int _max_tokens;
   int _log_level;
   int _n_gpu_layers;
-  int _n_past;
+  int _n_system_tokens;
   bool _is_gemma4;
+  bool _sampler_dirty;
   unsigned int _seed;
 };
diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index 2fed672..bdff9ea 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -252,6 +252,38 @@ class InputHistory {
   int current_index = 0;
 };
 
+// ═══════════════════════════════════════════════════════════════════════════
+// Logging
+// ═══════════════════════════════════════════════════════════════════════════
+// ─── Debug logging (file-backed, safe to call while notcurses is active) ──
+static FILE *g_logfile = nullptr;
+
+static void log_open() {
+  const char *home = getenv("HOME");
+  std::string path = std::string(home ? home : ".") + "/.config/nitro.log";
+  g_logfile = fopen(path.c_str(), "a");
+}
+
+static void log_close() {
+  if (g_logfile) { fclose(g_logfile); g_logfile = nullptr; }
+}
+
+static void log_write(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+static void log_write(const char *fmt, ...) {
+  if (!g_logfile) return;
+  // timestamp
+  time_t t = time(nullptr);
+  char ts[32];
+  strftime(ts, sizeof(ts), "%H:%M:%S", localtime(&t));
+  fprintf(g_logfile, "[%s] ", ts);
+  va_list ap;
+  va_start(ap, fmt);
+  vfprintf(g_logfile, fmt, ap);
+  va_end(ap);
+  fputc('\n', g_logfile);
+  fflush(g_logfile);  // flush immediately so tail -f works
+}
+
 // ═══════════════════════════════════════════════════════════════════════════
 // Settings persistence  (~/.config/nitro.settings.json)
 // ═══════════════════════════════════════════════════════════════════════════
@@ -326,14 +358,26 @@ static bool settings_get_float(const std::string &json,
                                float &out) {
   std::string search = "\"" + key + "\":";
   size_t pos = json.find(search);
-  if (pos == std::string::npos) return false;
+  if (pos == std::string::npos) {
+    return false;
+  }
   pos += search.size();
-  while (pos < json.size() && (json[pos] == ' ' || json[pos] == '\t')) ++pos;
-  if (pos >= json.size()) return false;
+  while (pos < json.size() && (json[pos] == ' ' || json[pos] == '\t')) {
+    ++pos;
+  }
+  if (pos >= json.size()) {
+    return false;
+  }
   size_t start = pos;
-  if (json[pos] == '-') ++pos;
-  while (pos < json.size() && (std::isdigit((unsigned char)json[pos]) || json[pos] == '.')) ++pos;
-  if (pos == start) return false;
+  if (json[pos] == '-') {
+    ++pos;
+  }
+  while (pos < json.size() && (std::isdigit((unsigned char)json[pos]) || json[pos] == '.')) {
+    ++pos;
+  }
+  if (pos == start) {
+    return false;
+  }
   out = std::stof(json.substr(start, pos - start));
   return true;
 }
@@ -438,6 +482,23 @@ static bool save_settings(const NitroConfig &cfg) {
   return f.good();
 }
 
+// Trims whitespace from both ends of a string
+std::string trim(std::string_view str) {
+  const std::string_view whitespace = " \t\n\r\f\v";
+
+  // Find the first non-whitespace character
+  const auto start = str.find_first_not_of(whitespace);
+  if (start == std::string_view::npos) {
+    return ""; // The string is entirely whitespace
+  }
+
+  // Find the last non-whitespace character
+  const auto end = str.find_last_not_of(whitespace);
+
+  // Return the substring between start and end
+  return std::string(str.substr(start, end - start + 1));
+}
+
 // ═══════════════════════════════════════════════════════════════════════════
 // Notcurses TUI
 // ═══════════════════════════════════════════════════════════════════════════
@@ -580,9 +641,11 @@ void TuiState::init() {
   notcurses_mice_enable(nc, NCMICE_BUTTON_EVENT);
   redraw_all();
 }
+
 void TuiState::destroy() {
   if (nc) { notcurses_stop(nc); nc = nullptr; }
 }
+
 void TuiState::resize() {
   notcurses_term_dim_yx(nc, (unsigned *)&term_rows, (unsigned *)&term_cols);
   ncplane_resize_simple(header,  1,                       (unsigned)term_cols);
@@ -678,13 +741,13 @@ void TuiState::redraw_input() {
   ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
   ncplane_putstr_yx(inputpl, 1, prompt_cols, before.c_str());
   int cx = prompt_cols + cur_in_view;
-  ncplane_set_channels(inputpl,
-                       NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B, 180, 230, 255));
+  ncplane_set_channels(inputpl, NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B, 180, 230, 255));
   char cbuf[2] = { cursor_ch_val, '\0' };
   ncplane_putstr_yx(inputpl, 1, cx, cbuf);
   ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
-  if (!after.empty())
+  if (!after.empty()) {
     ncplane_putstr_yx(inputpl, 1, cx + 1, after.c_str());
+  }
 }
 
 void TuiState::redraw_all() {
@@ -714,8 +777,9 @@ void TuiState::append_line(const std::string &line) {
   if ((int)line.size() <= w) {
     chat_lines.push_back(line);
   } else {
-    for (int off = 0; off < (int)line.size(); off += w)
+    for (int off = 0; off < (int)line.size(); off += w) {
       chat_lines.push_back(line.substr(off, w));
+    }
   }
 }
 
@@ -723,7 +787,9 @@ void TuiState::append_token(const std::string &token) {
   token_acc += token;
   for (;;) {
     auto pos = token_acc.find('\n');
-    if (pos == std::string::npos) break;
+    if (pos == std::string::npos) {
+      break;
+    }
     append_line(token_acc.substr(0, pos));
     token_acc = token_acc.substr(pos + 1);
   }
@@ -895,8 +961,7 @@ std::string TuiState::file_picker(const std::string &start_dir,
   if (!picker) return "";
 
   static constexpr uint32_t PBG_R = 18, PBG_G = 24, PBG_B = 40;
-  ncplane_set_base(picker, " ", 0,
-                   NCCHANNELS_INITIALIZER(PBG_R, PBG_G, PBG_B, PBG_R, PBG_G, PBG_B));
+  ncplane_set_base(picker, " ", 0, NCCHANNELS_INITIALIZER(PBG_R, PBG_G, PBG_B, PBG_R, PBG_G, PBG_B));
   // Build a compact hint line appropriate to the operation.
   // /rag adds 's=select dir'; /model and /embed only need file selection.
   std::string hint_line = "↑↓ navigate  Enter open/select  Esc cancel";
@@ -920,8 +985,7 @@ std::string TuiState::file_picker(const std::string &start_dir,
     ncplane_putstr_yx(picker, PH - 1, PW - 1, "╝");
 
     // Title
-    ncplane_set_channels(picker,
-                         NCCHANNELS_INITIALIZER(255, 220, 80, PBG_R, PBG_G, PBG_B));
+    ncplane_set_channels(picker, NCCHANNELS_INITIALIZER(255, 220, 80, PBG_R, PBG_G, PBG_B));
     std::string title_str = " 📂 " + title_hint + " Picker ";
     if ((int)title_str.size() > PW - 4) title_str = title_str.substr(0, PW - 4);
     ncplane_putstr_yx(picker, 0, 2, title_str.c_str());
@@ -929,12 +993,10 @@ std::string TuiState::file_picker(const std::string &start_dir,
     std::string path_display = current_dir;
     if ((int)path_display.size() > PW - 4)
       path_display = "…" + path_display.substr(path_display.size() - (PW - 5));
-    ncplane_set_channels(picker,
-                         NCCHANNELS_INITIALIZER(160, 200, 240, PBG_R, PBG_G, PBG_B));
+    ncplane_set_channels(picker, NCCHANNELS_INITIALIZER(160, 200, 240, PBG_R, PBG_G, PBG_B));
     ncplane_putstr_yx(picker, 1, 2, path_display.c_str());
     // Hint line (bottom interior row).
-    ncplane_set_channels(picker,
-                         NCCHANNELS_INITIALIZER(120, 120, 160, PBG_R, PBG_G, PBG_B));
+    ncplane_set_channels(picker, NCCHANNELS_INITIALIZER(120, 120, 160, PBG_R, PBG_G, PBG_B));
     std::string hint_trunc = hint_line;
     if ((int)hint_trunc.size() > PW - 4) hint_trunc = hint_trunc.substr(0, PW - 4);
     ncplane_putstr_yx(picker, PH - 2, 2, hint_trunc.c_str());
@@ -954,8 +1016,7 @@ std::string TuiState::file_picker(const std::string &start_dir,
       uint32_t br = is_selected ? 100 : PBG_R;
       uint32_t bg = is_selected ? 180 : PBG_G;
       uint32_t bb = is_selected ? 255 : PBG_B;
-      ncplane_set_channels(picker,
-                           NCCHANNELS_INITIALIZER(fr, fg, fb, br, bg, bb));
+      ncplane_set_channels(picker, NCCHANNELS_INITIALIZER(fr, fg, fb, br, bg, bb));
       std::string label = (is_selected ? " ▶ " : "   ") + entries[idx];
       if ((int)label.size() > PW - 2) label = label.substr(0, PW - 2);
       while ((int)label.size() < PW - 2) label += ' ';
@@ -1216,6 +1277,7 @@ struct AgentState {
 };
 
 void AgentState::apply_generation_params(const NitroConfig &cfg) {
+  //  llama->add_stop(MARKER_END_TOOL);
   llama->add_stop("<|turn|>");
   llama->add_stop("<|im_end|>");
   llama->set_max_tokens(cfg.n_max_tokens);
@@ -1347,40 +1409,6 @@ bool AgentState::rag_index(const std::string &path, TuiState &tui) {
   return true;
 }
 
-// ═══════════════════════════════════════════════════════════════════════════
-// Think-tag filtering
-// ═══════════════════════════════════════════════════════════════════════════
-// Strips everything between (and including) <think>…</think> or the
-// <|think|>…</|think|> variant from a completed line/buffer.
-// Also strips any bare close-tag that appears without a matching open-tag
-// (can happen when the open was in a previous chunk already consumed).
-// Returns the visible text that should be shown to the user.
-std::string filter_think_tags(const std::string &text) {
-  static const struct { const char *open; const char *close; } PAIRS[] = {
-    { "<think>",   "</think>"   },
-    { "<|think|>", "</|think|>" },
-  };
-  std::string out = text;
-  for (auto &p : PAIRS) {
-    std::string open(p.open), close(p.close);
-    for (;;) {
-      auto ob = out.find(open);
-      if (ob == std::string::npos) break;
-      auto ce = out.find(close, ob);
-      if (ce == std::string::npos) {
-        out.erase(ob);   // no closing tag — strip to end
-        break;
-      }
-      out.erase(ob, ce + close.size() - ob);
-    }
-    // Strip orphan close-tags (open tag was in an earlier chunk).
-    size_t pos = 0;
-    while ((pos = out.find(close, pos)) != std::string::npos)
-      out.erase(pos, close.size());
-  }
-  return out;
-}
-
 // ═══════════════════════════════════════════════════════════════════════════
 // Agent turn
 // ═══════════════════════════════════════════════════════════════════════════
@@ -1411,110 +1439,88 @@ bool AgentState::run_turn(const std::string &user_message,
     return false;
   }
   tui.append_line("Nitro: ");
+
   // in_think starts false — models that don't use <think> blocks emit
   // visible text immediately.  The spinner activates only while thinking.
-  bool in_think = false;
+  enum {t_init, t_think, t_thunk} think_mode = t_init;
   tui.set_thinking(false);
   std::string buffer;
+
+  auto invoke_tool = [&](const std::string &buffer, const std::string_view template_str) -> void {
+    std::string result = process_tool(buffer, cfg, tui);
+    std::string content = std::vformat(template_str, std::make_format_args(result));
+    if (!llama->add_message(*iter, "tool_result", content)) {
+      tui.append_line(std::string("[err] tool result inject: ") + llama->last_error());
+      tui.redraw_all();
+    }
+    if (!iter->_has_next) {
+      tui.append_line(std::string("[err] failed to evoke tool response: ") + llama->last_error());
+      tui.redraw_all();
+    }
+  };
+
+  auto start_think = [&](const std::string &tag) {
+    if (think_mode == t_init) {
+      auto pos = buffer.find(tag);
+      if (pos != std::string::npos) {
+        think_mode = t_think;
+        tui.set_thinking(true);
+        // display prededing text
+        buffer = buffer.substr(0, pos);
+      }
+    }
+  };
+
+  auto end_think = [&](const std::string &tag) {
+    if (think_mode == t_think) {
+      auto pos = buffer.find(tag);
+      if (pos != std::string::npos) {
+        think_mode = t_thunk;
+        tui.set_thinking(false);
+        // display remaining text
+        buffer = buffer.substr(pos + tag.length());
+      }
+    }
+  };
+
   while (iter->_has_next) {
     std::string tok = llama->next(*iter);
     buffer += tok;
-    // Detect think-tag transitions on the accumulated buffer so we never
-    // miss a tag that was split across two tokens.
-    bool was_thinking = in_think;
-    if (!in_think) {
-      if (buffer.find("<think>")   != std::string::npos ||
-          buffer.find("<|think|>") != std::string::npos)
-        in_think = true;
-    }
-    if (in_think) {
-      if (buffer.find("</think>")   != std::string::npos ||
-          buffer.find("</|think|>") != std::string::npos)
-        in_think = false;
-    }
-    // Update spinner: animate only while in a think block.
-    if (in_think || was_thinking) {
-      tui.set_thinking(in_think);
-      if (in_think) tui.tick_spinner();
-    }
-    auto nl = buffer.find('\n');
-    if (nl != std::string::npos) {
-      std::string text_line = buffer.substr(0, nl);
-      buffer = buffer.substr(nl + 1);
-      std::string trimmed = text_line;
-      trimmed.erase(0, trimmed.find_first_not_of(" \t"));
-      if (trimmed.substr(0, 5) == "TOOL:") {
-        std::string tail = buffer + llama->all(*iter);
-        std::string op, arg1, payload;
-        {
-          auto s1 = trimmed.find(' ');
-          if (s1 != std::string::npos) {
-            op = trimmed.substr(0, s1);
-            std::string rest = trimmed.substr(s1 + 1);
-            rest.erase(0, rest.find_first_not_of(" \t"));
-            auto s2 = rest.find(' ');
-            if (s2 != std::string::npos) {
-              arg1    = rest.substr(0, s2);
-              payload = rest.substr(s2 + 1) + tail;
-            } else {
-              arg1    = rest;
-              payload = tail;
-            }
-          } else {
-            op = trimmed;
-          }
-        }
-        std::string tool_line;
-        if (op == "TOOL:WRITE") {
-          tool_line = op + " " + arg1 + " " + payload;
-          while (!tool_line.empty() && tool_line.back() == '\n') tool_line.pop_back();
-        } else {
-          tool_line = op;
-          if (!arg1.empty()) tool_line += " " + arg1;
-          if (!payload.empty()) {
-            std::string flat = payload;
-            flat.erase(std::remove(flat.begin(), flat.end(), '\n'), flat.end());
-            while (!flat.empty() && std::isspace((unsigned char)flat.back())) flat.pop_back();
-            if (!flat.empty()) tool_line += " " + flat;
-          }
-        }
-        tui.append_line("[tool] " + op + " " + arg1 +
-                        (op == "TOOL:WRITE" ? " <content>" : ""));
-        tui.redraw_all();
-        std::string result = process_tool(tool_line, cfg, tui);
-        tui.append_line("[tool] → " +
-                        result.substr(0, 200) + (result.size() > 200 ? "…" : ""));
-        tui.redraw_all();
-        // Inject the tool result back into the conversation as a user-role
-        // message using the TOOL_RESULT: prefix that the system prompt
-        // describes.  Using "user" role ensures the injected text is correctly
-        // formatted regardless of whether the underlying llama-sb layer knows
-        // a dedicated "tool" role.
-        std::string tool_result_msg = "TOOL_RESULT: " + result;
-        if (!llama->add_message(*iter, "user", tool_result_msg)) {
-          tui.append_line(std::string("[err] tool result inject: ") + llama->last_error());
-          tui.redraw_all();
-          break;
-        }
+
+    if (think_mode == t_init) {
+      start_think("<think>");
+      start_think("<|think|>");
+      start_think("<think|>");
+      start_think("<|channel>thought");
+    }
+    if (think_mode == t_think) {
+      tui.tick_spinner();
+      end_think("</think>");
+      end_think("</|think|>");
+      end_think("<think|>");
+      end_think("<channel|>");
+    }
+    if (think_mode == t_thunk) {
+      auto tool_start = buffer.find("TOOL:");
+      if (tool_start != std::string::npos) {
+        // fetch all remaining tokens
+        invoke_tool(buffer + llama->all(*iter), "TOOL_RESULT: {}");
         buffer.clear();
-      } else if (!in_think) {
-        tui.append_token(filter_think_tags(text_line) + "\n");
+        think_mode = t_init;
+        continue;
+      }
+      auto pos = buffer.find('\n');
+      if (pos != std::string::npos && pos > 0) {
+        tui.append_token(buffer.substr(0, pos) + "\n");
+        buffer = buffer.substr(pos + 1);
       }
     }
   }
+
   if (!buffer.empty()) {
-    std::string trimmed = buffer;
-    trimmed.erase(0, trimmed.find_first_not_of(" \t"));
-    if (trimmed.substr(0, 5) == "TOOL:") {
-      std::string result = process_tool(trimmed, cfg, tui);
-      tui.append_line("[tool] → " + result.substr(0, 200));
-      tui.redraw_all();
-      std::string tool_result_msg = "TOOL_RESULT: " + result;
-      llama->add_message(*iter, "user", tool_result_msg);
-    } else if (!in_think) {
-      tui.append_token(filter_think_tags(buffer));
-    }
+    tui.append_token(buffer + "\n");
   }
+
   tui.flush_token_acc();
   tui.set_thinking(false);
   tui.tokens_per_sec = tokens_per_sec();
@@ -1556,7 +1562,9 @@ static bool path_in_sandbox(const std::string &sandbox, const std::string &path)
 
 static std::string read_file(const std::string &path) {
   std::ifstream f(path, std::ios::binary);
-  if (!f) return "ERROR: cannot open " + path;
+  if (!f) {
+    return "ERROR: cannot open [" + path + "]";
+  }
   std::ostringstream oss; oss << f.rdbuf();
   return oss.str();
 }
@@ -1661,8 +1669,11 @@ static std::string html_to_text(const std::string &html) {
       size_t sp = inner.find_first_of(" \t/\r\n");
       std::string name = (sp != std::string::npos) ? inner.substr(0, sp) : inner;
       std::transform(name.begin(), name.end(), name.begin(), ::tolower);
-      for (int k = 0; BLOCK[k]; ++k)
-        if (name == BLOCK[k]) { out += '\n'; break; }
+      for (int k = 0; BLOCK[k]; ++k) {
+        if (name == BLOCK[k]) {
+          out += '\n'; break;
+        }
+      }
       i = ce + 1;
     }
     s = out;
@@ -1750,6 +1761,7 @@ static size_t curl_write_cb(void *contents, size_t size, size_t nmemb, void *use
   }
   return total;
 }
+
 static std::string tool_curl(const std::string &url) {
   if (url.empty()) return "ERROR: TOOL:CURL requires a URL argument";
   CURL *curl = curl_easy_init();
@@ -1804,9 +1816,9 @@ static std::string process_tool(const std::string &cmd, const NitroConfig &cfg,
   std::string op, arg1, arg2;
   auto sp1 = cmd.find(' ');
   if (sp1 == std::string::npos) {
-    op = cmd;
+    op = trim(cmd);
   } else {
-    op = cmd.substr(0, sp1);
+    op = trim(cmd.substr(0, sp1));
     std::string rest = cmd.substr(sp1 + 1);
     rest.erase(0, rest.find_first_not_of(" \t"));
     auto sp2 = rest.find(' ');
@@ -1825,6 +1837,9 @@ static std::string process_tool(const std::string &cmd, const NitroConfig &cfg,
     return join_path(sandbox, p);
   };
 
+  tui.append_line("[tool] → " + op);
+  tui.redraw_all();
+
   if (op == "TOOL:DATE") {
     char buf[32]; time_t t = time(nullptr);
     strftime(buf, sizeof(buf), "%Y-%m-%d", localtime(&t));
@@ -1873,7 +1888,9 @@ static std::string process_tool(const std::string &cmd, const NitroConfig &cfg,
   }
   if (op == "TOOL:RUN") {
     std::string prog = resolve(arg1);
-    if (!path_in_sandbox(sandbox, prog)) return "ERROR: path outside sandbox";
+    if (!path_in_sandbox(sandbox, prog)) {
+      return "ERROR: path outside sandbox";
+    }
     if (!run_allowed.empty()) {
       std::string basename = fs::path(prog).filename().string();
       bool permitted = std::any_of(run_allowed.begin(), run_allowed.end(),
@@ -1885,15 +1902,21 @@ static std::string process_tool(const std::string &cmd, const NitroConfig &cfg,
     }
     std::string command = prog + " " + arg2 + " 2>&1";
     FILE *fp = popen(command.c_str(), "r");
-    if (!fp) return "ERROR: popen failed";
+    if (!fp) {
+      return "ERROR: popen failed";
+    }
     std::string out;
     char buf[256];
-    while (fgets(buf, sizeof(buf), fp)) out += buf;
+    while (fgets(buf, sizeof(buf), fp)) {
+      out += buf;
+    }
     pclose(fp);
-    if (out.size() > 4096) out = out.substr(0, 4096) + "\n…(truncated)";
+    if (out.size() > 4096) {
+      out = out.substr(0, 4096) + "\n…(truncated)";
+    }
     return out;
   }
-  return "ERROR: unknown tool: " + op;
+  return "ERROR: unknown tool: [" + op + "]";
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
@@ -1905,8 +1928,9 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
   p += "You are Nitro, an agentic AI assistant for software development.\n"
     "Your sandbox (project directory) is: " + sandbox + "\n\n"
     "## Tool protocol\n"
-    "Emit tool calls on their own line. The host executes them and returns\n"
-    "TOOL_RESULT: <value> on the next line.\n\n"
+    " - Emit tool calls on their own new line. for example:\n\n"
+    "TOOL:LIST\n"
+    " - The host executes the tool and returns TOOL_RESULT: <value> on the next line.\n\n"
     "Available tools:\n"
     "  TOOL:LIST   [dir]          list files (default: sandbox root)\n"
     "  TOOL:READ   <file>         read file contents\n"
@@ -1923,7 +1947,7 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
     "- Never access files outside the sandbox.\n"
     "- Use TOOL:PERMISSION before destructive or irreversible operations.\n"
     "- Use TOOL:CURL to fetch documentation, APIs, or web content you need.\n"
-    "- Reason step-by-step inside <|think|>…</|think|> (hidden from user).\n"
+    "- Reason step-by-step inside <|think|> </|think|> (hidden from user).\n"
     "- After each tool call, explain what you did in plain English.\n\n";
   for (const auto &kf : knowledge_files) {
     std::ifstream f(kf);
@@ -2080,8 +2104,7 @@ static void handle_slash(const std::string &input,
       else if (key == "n_gpu_layers")   {
         cfg.n_gpu_layers = std::stoi(val);
         tui.append_line("[sys] n_gpu_layers will take effect on next /model load.");
-      }
-      else if (key == "run_allowed") {
+      } else if (key == "run_allowed") {
         // Accept a comma-separated list of basenames, or "none" to clear.
         cfg.run_allowed.clear();
         if (val != "none") {
@@ -2100,8 +2123,10 @@ static void handle_slash(const std::string &input,
           for (const auto &e : cfg.run_allowed) list += e + " ";
           tui.append_line("[sys] run_allowed: " + list);
         }
+      } else {
+        tui.append_line("[err] Unknown key '" + key + "'.  Try /help for list.");
+        ok = false;
       }
-      else { tui.append_line("[err] Unknown key '" + key + "'.  Try /help for list."); ok = false; }
     } catch (const std::exception &ex) {
       tui.append_line(std::string("[err] /set: ") + ex.what());
       ok = false;
@@ -2246,22 +2271,30 @@ int main(int argc, char **argv) {
     tui.redraw_all();
   }
 
+  // log_open();
+
   // ── Main loop ─────────────────────────────────────────────────────
   for (;;) {
     {
       unsigned rows = 0, cols = 0;
       notcurses_stddim_yx(tui.nc, &rows, &cols);
-      if ((int)rows != tui.term_rows || (int)cols != tui.term_cols)
+      if ((int)rows != tui.term_rows || (int)cols != tui.term_cols) {
         tui.resize();
+      }
     }
     std::string input = tui.readline_blocking();
     input.erase(0, input.find_first_not_of(" \t"));
-    if (!input.empty())
+    if (!input.empty()) {
       input.erase(input.find_last_not_of(" \t\r\n") + 1);
-    if (input.empty()) continue;
+    }
+    if (input.empty()) {
+      continue;
+    }
     tui.append_line("You: " + input);
     tui.redraw_all();
-    if (input == "exit" || input == "quit") break;
+    if (input == "exit" || input == "quit") {
+      break;
+    }
     if (input[0] == '/') {
       handle_slash(input, cfg, agent, tui);
     } else {
@@ -2269,6 +2302,8 @@ int main(int argc, char **argv) {
     }
   }
 
+  // log_close();
+
   tui.destroy();
   // Persist input history for the next session.
   tui.history.save(history_path());

From 7ecac603036610e981fda5ce18f0a535d833051f Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Mon, 25 May 2026 21:16:08 +0930
Subject: [PATCH 40/54] LLAMA: fix nitro tool handling

---
 llama/nitro.cpp | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index bdff9ea..2496a02 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -1446,8 +1446,8 @@ bool AgentState::run_turn(const std::string &user_message,
   tui.set_thinking(false);
   std::string buffer;
 
-  auto invoke_tool = [&](const std::string &buffer, const std::string_view template_str) -> void {
-    std::string result = process_tool(buffer, cfg, tui);
+  auto invoke_tool = [&](const std::string &tool, const std::string_view template_str) -> void {
+    std::string result = process_tool(tool, cfg, tui);
     std::string content = std::vformat(template_str, std::make_format_args(result));
     if (!llama->add_message(*iter, "tool_result", content)) {
       tui.append_line(std::string("[err] tool result inject: ") + llama->last_error());
@@ -1486,7 +1486,6 @@ bool AgentState::run_turn(const std::string &user_message,
   while (iter->_has_next) {
     std::string tok = llama->next(*iter);
     buffer += tok;
-
     if (think_mode == t_init) {
       start_think("<think>");
       start_think("<|think|>");
@@ -1502,7 +1501,7 @@ bool AgentState::run_turn(const std::string &user_message,
     }
     if (think_mode == t_thunk) {
       auto tool_start = buffer.find("TOOL:");
-      if (tool_start != std::string::npos) {
+      if (tool_start == 0) {
         // fetch all remaining tokens
         invoke_tool(buffer + llama->all(*iter), "TOOL_RESULT: {}");
         buffer.clear();
@@ -1510,8 +1509,8 @@ bool AgentState::run_turn(const std::string &user_message,
         continue;
       }
       auto pos = buffer.find('\n');
-      if (pos != std::string::npos && pos > 0) {
-        tui.append_token(buffer.substr(0, pos) + "\n");
+      if (pos != std::string::npos) {
+        tui.append_token(buffer.substr(0, pos + 1));
         buffer = buffer.substr(pos + 1);
       }
     }
@@ -1945,6 +1944,7 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
     "  TOOL:CURL   <url>          HTTP GET; returns response body (max 32 KB)\n\n"
     "Rules:\n"
     "- Never access files outside the sandbox.\n"
+    "- Only use one TOOL at a time. Never combine, always use each tool step by step\n"
     "- Use TOOL:PERMISSION before destructive or irreversible operations.\n"
     "- Use TOOL:CURL to fetch documentation, APIs, or web content you need.\n"
     "- Reason step-by-step inside <|think|> </|think|> (hidden from user).\n"
@@ -2200,6 +2200,8 @@ int main(int argc, char **argv) {
       cfg.embed_path = resolve_path(take_next(a.c_str()));
     } else if (a == "-g" || a == "--gpu-layers") {
       cfg.n_gpu_layers = std::stoi(take_next(a.c_str()));
+    } else if (a == "-l" || a == "--log") {
+      log_open();
     } else if (a == "-h" || a == "--help") {
       std::puts("Usage: nitro [options] [project_dir]\n"
                 "\n"
@@ -2207,6 +2209,7 @@ int main(int argc, char **argv) {
                 "  -m, --model  <path>      GGUF model to load on startup\n"
                 "  -e, --embed  <path>      embedding model for RAG\n"
                 "  -g, --gpu-layers <n>     GPU layers to offload (default: 32)\n"
+                "  -l, --log                enabled logging\n"
                 "  -h, --help               show this help\n"
                 "\n"
                 "project_dir defaults to the current working directory.\n"
@@ -2271,8 +2274,6 @@ int main(int argc, char **argv) {
     tui.redraw_all();
   }
 
-  // log_open();
-
   // ── Main loop ─────────────────────────────────────────────────────
   for (;;) {
     {
@@ -2302,8 +2303,7 @@ int main(int argc, char **argv) {
     }
   }
 
-  // log_close();
-
+  log_close();
   tui.destroy();
   // Persist input history for the next session.
   tui.history.save(history_path());

From a1fc6260ff06b0516a585628ff1384211bd8a856 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Wed, 27 May 2026 12:53:05 +0930
Subject: [PATCH 41/54] LLAMA: nitro - partial fix for RAG handling

---
 llama/CMakeLists.txt   |  13 +-
 llama/llama-sb-rag.cpp | 418 ++++++++++++++++++++++++++++++++---------
 llama/llama-sb-rag.h   |  20 ++
 llama/llama-sb.cpp     |  48 +++++
 llama/llama-sb.h       |  14 +-
 llama/llama.cpp        |   2 +-
 llama/nitro.cpp        |  82 +++++---
 7 files changed, 458 insertions(+), 139 deletions(-)

diff --git a/llama/CMakeLists.txt b/llama/CMakeLists.txt
index ef68b0f..562ba3f 100644
--- a/llama/CMakeLists.txt
+++ b/llama/CMakeLists.txt
@@ -113,7 +113,6 @@ add_subdirectory(${LLAMA_DIR})
 set(PLUGIN_SOURCES
   main.cpp
   llama-sb.cpp
-  llama-sb-rag.cpp
   ../include/param.cpp
   ../include/hashmap.cpp
   ../include/apiexec.cpp
@@ -237,6 +236,7 @@ if(NC_FOUND)
   message(STATUS "notcurses found — building nitro")
   add_executable(nitro
     nitro.cpp
+    llama-sb-rag.cpp
   )
   target_include_directories(nitro PRIVATE
     ${LLAMA_DIR}/include
@@ -262,17 +262,6 @@ else()
   message(STATUS "notcurses not found — skipping nitro (set -DNOTCURSES_DIR=... to enable)")
 endif()
 
-# -----------------------------
-# Header preparation for RAG indexer
-# -----------------------------
-add_executable(chunk_headers
-  chunk_headers.cpp
-)
-
-set_target_properties(chunk_headers PROPERTIES
-  RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin
-)
-
 # ------------------------------------------------------------------
 # Android native library
 # ------------------------------------------------------------------
diff --git a/llama/llama-sb-rag.cpp b/llama/llama-sb-rag.cpp
index a64f31f..4db4a9d 100644
--- a/llama/llama-sb-rag.cpp
+++ b/llama/llama-sb-rag.cpp
@@ -11,6 +11,7 @@
 #include <algorithm>
 #include <cmath>
 #include <cstdint>
+#include <filesystem>
 #include <fstream>
 #include <iostream>
 #include <numeric>
@@ -18,117 +19,235 @@
 #include <string>
 #include <vector>
 
-bool Llama::embed_text(const std::string &text, std::vector<float> &out, int embed_dim) {
-  vector<llama_token> tokens = tokenize(text);
-  if (tokens.size() == 0) {
-    return false;
-  }
-
-  // truncate to context window
-  int n_ctx = llama_n_ctx(_ctx);
-  int n = tokens.size();
-  if (n > n_ctx) {
-    _last_error = std::format("warning: chunk truncated {} -> {} tokens ", n, n_ctx);
-    n = n_ctx;
-    tokens.resize(n);
+namespace fs = std::filesystem;
+
+static constexpr uint32_t MAGIC = 0x52414744;
+static constexpr size_t MIN_CHUNK = 40;
+static constexpr const char *INSTRUCT_EMBED = "Instruct: Represent this API documentation for code retrieval\nQuery: ";
+static constexpr const char *INSTRUCT_QUERY = "Instruct: Given a programming question, retrieve relevant API documentation\nQuery: ";
+
+enum class ChunkType {
+  Function, Struct, Enum, Typedef, Defines, Other
+};
+
+static std::string type_name(ChunkType t) {
+  switch (t) {
+  case ChunkType::Function: return "function";
+  case ChunkType::Struct:   return "struct";
+  case ChunkType::Enum:     return "enum";
+  case ChunkType::Typedef:  return "typedef";
+  case ChunkType::Defines:  return "defines";
+  default:                  return "other";
   }
+}
 
-  llama_memory_clear(llama_get_memory(_ctx), true);
+/* ── helpers ───────────────────────────────────────────────── */
 
-  if (!batch_decode_tokens(tokens)) {
-    return false;
-  }
-
-  float *emb = llama_get_embeddings_seq(_ctx, 0);
-  if (!emb) {
-    emb = llama_get_embeddings_ith(_ctx, n - 1);
-  }
+static bool starts_with(const std::string &s, const std::string &prefix) {
+  return s.size() >= prefix.size() &&
+    s.compare(0, prefix.size(), prefix) == 0;
+}
 
-  if (!emb) {
-    _last_error = "no embedding returned\n";
-    return false;
-  }
+static bool is_blank(const std::string &s) {
+  for (char c : s) if (!isspace((unsigned char)c)) return false;
+  return true;
+}
 
-  out.assign(emb, emb + embed_dim);
+/* ── state machine ─────────────────────────────────────────── */
 
-  /* L2 normalize */
-  float norm = 0.0f;
-  for (float v : out) {
-    norm += v * v;
-  }
-  norm = std::sqrt(norm);
-  if (norm > 1e-9f) {
-    for (float &v : out) {
-      v /= norm;
-    }
-  }
+enum class State {
+  Idle, BlockComment, LineComment, Declaration, Struct, Defines
+};
 
-  return true;
-}
+template<typename EmitChunk>
 
-bool Llama::rag_load(RagDB &db, const std::string &path) {
-  std::ifstream f(path, std::ios::binary);
+static bool chunk_file(const fs::path &path, EmitChunk emit_chunk) {
+  std::ifstream f(path);
   if (!f) {
-    _last_error = std::format("rag_load: cannot open {}", path);
     return false;
   }
 
-  auto read32 = [&]() -> uint32_t {
-    uint32_t v = 0; f.read((char*)&v, 4); return v;
-  };
-  auto read16 = [&]() -> uint16_t {
-    uint16_t v = 0; f.read((char*)&v, 2); return v;
-  };
-  auto read8 = [&]() -> uint8_t {
-    uint8_t v = 0; f.read((char*)&v, 1); return v;
-  };
-  auto readstr = [&](size_t len) -> std::string {
-    std::string s(len, '\0');
-    f.read(&s[0], (std::streamsize)len);
-    return s;
+  const std::string source = path.filename().string();
+
+  State     state       = State::Idle;
+  std::string chunk;
+  ChunkType chunk_type  = ChunkType::Other;
+  int       brace_depth = 0;
+  int       paren_depth = 0;
+  int       define_count = 0;
+
+  auto flush = [&](ChunkType t) {
+    emit_chunk(source, t, chunk);
+    chunk.clear();
+    state       = State::Idle;
+    brace_depth = 0;
+    paren_depth = 0;
   };
 
-  uint32_t magic   = read32();
-  uint32_t version = read32();
-  uint32_t n       = read32();
-  uint32_t edim    = read32();
+  std::string line;
+  while (std::getline(f, line)) {
+    /* trim trailing CR */
+    if (!line.empty() && line.back() == '\r') line.pop_back();
+
+    /* find first non-whitespace for prefix checks */
+    size_t trim_pos = 0;
+    while (trim_pos < line.size() &&
+           (line[trim_pos] == ' ' || line[trim_pos] == '\t')) ++trim_pos;
+    const std::string trimmed = line.substr(trim_pos);
+
+    /* ── #define handling ─────────────────────────────────── */
+    if (starts_with(trimmed, "#define ")) {
+      if (state == State::BlockComment || state == State::LineComment) {
+        chunk += line + "\n";
+        state = State::Defines;
+        define_count = 1;
+      } else if (state == State::Defines) {
+        chunk += line + "\n";
+        define_count++;
+      } else {
+        if (chunk.size() >= MIN_CHUNK) emit_chunk(source, chunk_type, chunk);
+        chunk.clear();
+        chunk += line + "\n";
+        state = State::Defines;
+        define_count = 1;
+      }
+      continue;
+    }
 
-  if (magic != 0x52414744) {
-    _last_error = "rag_load: bad magic";
-    return false;
-  }
-  if (version != 2) {
-    _last_error = std::format("rag_load: unsupported version {} (expected 2)", version);
-    return false;
-  }
+    /* non-define while in define group */
+    if (state == State::Defines) {
+      flush(ChunkType::Defines);
+      define_count = 0;
+      /* fall through to process this line normally */
+    }
 
-  db.embed_dim = (int)edim;
-  db.chunks.resize(n);
+    /* ── block comment start ──────────────────────────────── */
+    if ((starts_with(trimmed, "/*") || starts_with(trimmed, "/**")) &&
+        state == State::Idle) {
+      if (chunk.size() >= MIN_CHUNK) emit_chunk(source, chunk_type, chunk);
+      chunk.clear();
+      chunk_type = ChunkType::Other;
+      chunk += line + "\n";
+      state = (trimmed.find("*/", 2) != std::string::npos)
+        ? State::LineComment
+        : State::BlockComment;
+      continue;
+    }
 
-  for (uint32_t i = 0; i < n; i++) {
-    RagChunk &c = db.chunks[i];
+    /* ── inside block comment ─────────────────────────────── */
+    if (state == State::BlockComment) {
+      chunk += line + "\n";
+      if (trimmed.find("*/") != std::string::npos)
+        state = State::LineComment;
+      continue;
+    }
 
-    uint32_t text_len = read32();
-    c.text = readstr(text_len);
+    /* ── // line comment ──────────────────────────────────── */
+    if (starts_with(trimmed, "//")) {
+      if (state == State::Idle) {
+        if (chunk.size() >= MIN_CHUNK) emit_chunk(source, chunk_type, chunk);
+        chunk.clear();
+        chunk += line + "\n";
+        state = State::LineComment;
+      } else if (state == State::LineComment) {
+        chunk += line + "\n";
+      }
+      continue;
+    }
 
-    uint16_t src_len = read16();
-    c.source = readstr(src_len);
+    /* ── blank line ───────────────────────────────────────── */
+    if (is_blank(trimmed)) {
+      if (state == State::LineComment)
+        flush(ChunkType::Other);
+      else if (state == State::Idle && chunk.size() >= MIN_CHUNK)
+        flush(chunk_type);
+      continue;
+    }
 
-    uint8_t type_len = read8();
-    c.type = readstr(type_len);
+    /* ── skip preprocessor noise ──────────────────────────── */
+    if (starts_with(trimmed, "#ifndef") || starts_with(trimmed, "#ifdef")  ||
+        starts_with(trimmed, "#endif")  || starts_with(trimmed, "#pragma") ||
+        starts_with(trimmed, "#include")) {
+      if (state == State::LineComment || state == State::BlockComment) {
+        chunk.clear();
+        state = State::Idle;
+      }
+      continue;
+    }
 
-    c.embedding.resize(edim);
-    f.read((char*)c.embedding.data(), (std::streamsize)(edim * sizeof(float)));
-  }
+    /* ── typedef struct / enum start ─────────────────────── */
+    if ((starts_with(trimmed, "typedef struct") ||
+         starts_with(trimmed, "typedef enum")   ||
+         starts_with(trimmed, "struct ")         ||
+         starts_with(trimmed, "enum "))          &&
+        (state == State::Idle || state == State::LineComment)) {
+
+      if (state == State::Idle && chunk.size() >= MIN_CHUNK)
+        emit_chunk(source, chunk_type, chunk);
+
+      /* preserve any comment already in chunk */
+      if (state == State::Idle) chunk.clear();
+
+      chunk += line + "\n";
+      chunk_type = starts_with(trimmed, "typedef") ? ChunkType::Typedef
+        : starts_with(trimmed, "enum ")   ? ChunkType::Enum
+        : ChunkType::Struct;
+      state = State::Struct;
+      for (char c : line) {
+        if (c == '{') ++brace_depth;
+        if (c == '}') --brace_depth;
+      }
+      if (brace_depth <= 0 && line.find(';') != std::string::npos)
+        flush(chunk_type);
+      continue;
+    }
 
-  if (!f) {
-    _last_error = "rag_load: read error";
-    return false;
+    /* ── inside struct/enum body ──────────────────────────── */
+    if (state == State::Struct) {
+      chunk += line + "\n";
+      for (char c : line) {
+        if (c == '{') ++brace_depth;
+        if (c == '}') --brace_depth;
+      }
+      if (brace_depth <= 0 && line.find(';') != std::string::npos)
+        flush(chunk_type);
+      continue;
+    }
+
+    /* ── function / other declaration ────────────────────── */
+    if (state == State::LineComment || state == State::Idle) {
+      if (state == State::Idle && chunk.size() >= MIN_CHUNK) {
+        emit_chunk(source, chunk_type, chunk);
+        chunk.clear();
+      }
+      chunk += line + "\n";
+      chunk_type = ChunkType::Function;
+      state = State::Declaration;
+      for (char c : line) {
+        if (c == '(') ++paren_depth;
+        if (c == ')') --paren_depth;
+      }
+      if (paren_depth <= 0 && line.find(';') != std::string::npos)
+        flush(ChunkType::Function);
+      continue;
+    }
+
+    /* ── multi-line declaration ───────────────────────────── */
+    if (state == State::Declaration) {
+      chunk += line + "\n";
+      for (char c : line) {
+        if (c == '(') ++paren_depth;
+        if (c == ')') --paren_depth;
+      }
+      if (paren_depth <= 0 && line.find(';') != std::string::npos)
+        flush(ChunkType::Function);
+      continue;
+    }
   }
 
-  std::cerr << "rag: loaded " << db.chunks.size()
-            << " chunks (dim=" << db.embed_dim
-            << ") from " << path << "\n";
+  /* flush remainder */
+  if (chunk.size() >= MIN_CHUNK) emit_chunk(source, chunk_type, chunk);
+
   return true;
 }
 
@@ -162,6 +281,29 @@ static std::string rag_build_context(const RagDB &db,
   return out.str();
 }
 
+//
+// index the file
+//
+bool Llama::rag_index(RagDB &db, const std::string &filepath) {
+  bool embed_fail = false;
+  auto emit_chunk = [&](const std::string &source, ChunkType type,
+                        const std::string &text) {
+    if (text.size() > MIN_CHUNK) {
+      RagChunk chunk;
+      chunk.text = text;
+      chunk.source = source;
+      chunk.type = type_name(type);
+      if (!embed_text(INSTRUCT_EMBED + text, chunk.embedding, db.embed_dim)) {
+        embed_fail = true;
+      } else {
+        db.chunks.push_back(std::move(chunk));
+      }
+    }
+  };
+
+  return !embed_fail && chunk_file(filepath, emit_chunk);
+}
+
 //
 // retrieve with session
 //
@@ -174,7 +316,7 @@ std::string Llama::rag_retrieve(const RagDB &db,
   }
 
   std::vector<float> qvec;
-  std::string text = "Instruct: Given a programming question, retrieve relevant API documentation\nQuery: " + query;
+  std::string text = INSTRUCT_QUERY + query;
   if (!embed_text(text, qvec, db.embed_dim)) {
     return {};
   }
@@ -183,11 +325,10 @@ std::string Llama::rag_retrieve(const RagDB &db,
   std::vector<int>   order(db.size());
   std::iota(order.begin(), order.end(), 0);
   std::vector<float> scores(db.size());
-  for (int i = 0; i < db.size(); i++)
+  for (int i = 0; i < db.size(); i++) {
     scores[i] = rag_cosine(qvec, db.chunks[i].embedding);
-
-  std::sort(order.begin(), order.end(),
-            [&](int a, int b){ return scores[a] > scores[b]; });
+  }
+  std::sort(order.begin(), order.end(), [&](int a, int b){ return scores[a] > scores[b]; });
 
   // collect top_k unseen, within budget, above threshold
   std::vector<int>   result_idx;
@@ -207,3 +348,96 @@ std::string Llama::rag_retrieve(const RagDB &db,
 
   return rag_build_context(db, result_idx, result_scores);
 }
+
+bool RagDB::save(const std::string &path) {
+  std::ofstream f(path, std::ios::binary);
+  if (!f) {
+    return false;
+  }
+
+  auto write32 = [&](uint32_t v) { f.write((char*)&v, 4); };
+  auto write16 = [&](uint16_t v) { f.write((char*)&v, 2); };
+  auto write8  = [&](uint8_t  v) { f.write((char*)&v, 1); };
+  auto writestr = [&](const std::string &s, size_t max_len) {
+    size_t len = std::min(s.size(), max_len);
+    f.write(s.c_str(), (std::streamsize)len);
+  };
+
+  write32(MAGIC);              /* magic "RAGD" */
+  write32(2);                       /* version      */
+  write32((uint32_t)chunks.size()); /* n_chunks     */
+  write32((uint32_t)embed_dim);     /* embed_dim    */
+
+  for (const RagChunk &c : chunks) {
+    write32((uint32_t)c.text.size());
+    f.write(c.text.c_str(), (std::streamsize)c.text.size());
+
+    uint16_t src_len = (uint16_t)std::min(c.source.size(), (size_t)65535);
+    write16(src_len);
+    writestr(c.source, src_len);
+
+    uint8_t type_len = (uint8_t)std::min(c.type.size(), (size_t)255);
+    write8(type_len);
+    writestr(c.type, type_len);
+
+    f.write((char*)c.embedding.data(),
+            (std::streamsize)(embed_dim * sizeof(float)));
+  }
+
+  return f.good();
+}
+
+bool RagDB::load(const std::string &path) {
+  std::ifstream f(path, std::ios::binary);
+  if (!f) {
+    return false;
+  }
+
+  auto read32 = [&]() -> uint32_t {
+    uint32_t v = 0; f.read((char*)&v, 4); return v;
+  };
+  auto read16 = [&]() -> uint16_t {
+    uint16_t v = 0; f.read((char*)&v, 2); return v;
+  };
+  auto read8 = [&]() -> uint8_t {
+    uint8_t v = 0; f.read((char*)&v, 1); return v;
+  };
+  auto readstr = [&](size_t len) -> std::string {
+    std::string s(len, '\0');
+    f.read(&s[0], (std::streamsize)len);
+    return s;
+  };
+
+  uint32_t magic   = read32();
+  uint32_t version = read32();
+  uint32_t n       = read32();
+  uint32_t edim    = read32();
+
+  if (magic != MAGIC) {
+    return false;
+  }
+  if (version != 2) {
+    return false;
+  }
+
+  embed_dim = (int)edim;
+  chunks.resize(n);
+
+  for (uint32_t i = 0; i < n; i++) {
+    RagChunk &c = chunks[i];
+
+    uint32_t text_len = read32();
+    c.text = readstr(text_len);
+
+    uint16_t src_len = read16();
+    c.source = readstr(src_len);
+
+    uint8_t type_len = read8();
+    c.type = readstr(type_len);
+
+    c.embedding.resize(edim);
+    f.read((char*)c.embedding.data(), (std::streamsize)(edim * sizeof(float)));
+  }
+
+  return true;
+}
diff --git a/llama/llama-sb-rag.h b/llama/llama-sb-rag.h
index d31706c..0296f26 100644
--- a/llama/llama-sb-rag.h
+++ b/llama/llama-sb-rag.h
@@ -14,10 +14,30 @@ struct RagChunk {
   std::vector<float> embedding;
 };
 
+/* ── on-disk chunk (variable-length text) ──────────────────── */
+/*
+ * db header  (16 bytes):
+ *   uint32  magic      = 0x52414744  "RAGD"
+ *   uint32  version    = 2
+ *   uint32  n_chunks
+ *   uint32  embed_dim
+ *
+ * per chunk:
+ *   uint32  text_len
+ *   char[]  text          (text_len bytes, no null)
+ *   uint16  source_len
+ *   char[]  source        (source_len bytes, no null)
+ *   uint8   type_len
+ *   char[]  type          (type_len bytes, no null)
+ *   float[] embedding     (embed_dim floats)
+ */
 struct RagDB {
   std::vector<RagChunk> chunks;
   int embed_dim = 0;
 
+  bool load(const std::string &path);
+  bool save(const std::string &path);
+
   int  size()  const { return (int)chunks.size(); }
   bool empty() const { return chunks.empty(); }
 };
diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index a4ad885..83e881f 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -417,6 +417,54 @@ LlamaMemoryInfo Llama::memory_info() {
   return info;
 }
 
+bool Llama::embed_text(const std::string &text, std::vector<float> &out, int embed_dim) {
+  vector<llama_token> tokens = tokenize(text);
+  if (tokens.size() == 0) {
+    return false;
+  }
+
+  // truncate to context window
+  int n_ctx = llama_n_ctx(_ctx);
+  int n = tokens.size();
+  if (n > n_ctx) {
+    _last_error = std::format("warning: chunk truncated {} -> {} tokens ", n, n_ctx);
+    n = n_ctx;
+    tokens.resize(n);
+  }
+
+  llama_memory_clear(llama_get_memory(_ctx), true);
+
+  if (!batch_decode_tokens(tokens)) {
+    return false;
+  }
+
+  float *emb = llama_get_embeddings_seq(_ctx, 0);
+  if (!emb) {
+    emb = llama_get_embeddings_ith(_ctx, n - 1);
+  }
+
+  if (!emb) {
+    _last_error = "no embedding returned\n";
+    return false;
+  }
+
+  out.assign(emb, emb + embed_dim);
+
+  /* L2 normalize */
+  float norm = 0.0f;
+  for (float v : out) {
+    norm += v * v;
+  }
+  norm = std::sqrt(norm);
+  if (norm > 1e-9f) {
+    for (float &v : out) {
+      v /= norm;
+    }
+  }
+
+  return true;
+}
+
 bool Llama::batch_decode_tokens(vector<llama_token> &tokens) {
   uint32_t n_batch = llama_n_batch(_ctx);
   for (size_t i = 0; i < tokens.size(); i += n_batch) {
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
index 43b7c7c..02e2359 100644
--- a/llama/llama-sb.h
+++ b/llama/llama-sb.h
@@ -101,12 +101,18 @@ struct Llama {
   // memory info
   LlamaMemoryInfo memory_info();
 
-  // rag support
-  bool embed_text(const std::string &text, std::vector<float> &out, int embed);
-  int get_embed_dim() const { return _model != nullptr ? llama_model_n_embd(_model) : 0; }
-  bool rag_load(RagDB &db, const std::string &path);
+  // creates an embedding vector of the given dimension for the given text
+  bool embed_text(const std::string &text, std::vector<float> &out, int embed_dim);
+
+  // retrieves rag query context informatiion from the rag database
   std::string rag_retrieve(const RagDB &db, const std::string &query, int top_k, RagSession &session);
 
+  // indexes the details from the given file
+  bool rag_index(RagDB &db, const std::string &filepath);
+
+  //  returns the emdedding dimension for the loaded model
+  int get_embed_dim() const { return _model != nullptr ? llama_model_n_embd(_model) : 0; }
+
   private:
   bool batch_decode_tokens(vector<llama_token> &tokens);
   bool configure_sampler();
diff --git a/llama/llama.cpp b/llama/llama.cpp
index 6db1304..dbe9c0c 160000
--- a/llama/llama.cpp
+++ b/llama/llama.cpp
@@ -1 +1 @@
-Subproject commit 6db130445d29b243ee2171efb8cd61b84a1c5322
+Subproject commit dbe9c0c8ce65354c372f5d4ab507e5424a755e9f
diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index 2496a02..79b2ebe 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -34,7 +34,6 @@
 //   TOOL:DATE
 //   TOOL:TIME
 //   TOOL:RND
-//   TOOL:PERMISSION
 //   TOOL:CURL   <url>
 //
 // Copyright (C) 2026 Chris Warren-Smith  —  GPLv2 or later
@@ -209,7 +208,7 @@ class InputHistory {
   }
 
   /**
-   * @brief Load history from ~/.config/nitro.history (one entry per line).
+   * @brief Load history from ~/.config/nitro/nitro.history (one entry per line).
    * Silently succeeds if the file doesn't exist.
    */
   void load(const std::string &path) {
@@ -260,7 +259,7 @@ static FILE *g_logfile = nullptr;
 
 static void log_open() {
   const char *home = getenv("HOME");
-  std::string path = std::string(home ? home : ".") + "/.config/nitro.log";
+  std::string path = std::string(home ? home : ".") + "/.config/nitro/nitro.log";
   g_logfile = fopen(path.c_str(), "a");
 }
 
@@ -285,7 +284,7 @@ static void log_write(const char *fmt, ...) {
 }
 
 // ═══════════════════════════════════════════════════════════════════════════
-// Settings persistence  (~/.config/nitro.settings.json)
+// Settings persistence  (~/.config/nitro/nitro.settings.json)
 // ═══════════════════════════════════════════════════════════════════════════
 // A minimal hand-rolled JSON reader/writer for the flat key-value settings
 // we care about.  We deliberately avoid a full JSON library dependency.
@@ -312,18 +311,18 @@ struct NitroConfig {
   std::vector<std::string> run_allowed;
 };
 
-// Returns the canonical settings path: ~/.config/nitro.settings.json
+// Returns the canonical settings path: ~/.config/nitro/settings.json
 static std::string settings_path() {
   const char *home = getenv("HOME");
   std::string base = home ? std::string(home) : ".";
-  return base + "/.config/nitro.settings.json";
+  return base + "/.config/nitro/settings.json";
 }
 
-// Returns the history file path: ~/.config/nitro.history
+// Returns the history file path: ~/.config/nitro/history.txt
 static std::string history_path() {
   const char *home = getenv("HOME");
   std::string base = home ? std::string(home) : ".";
-  return base + "/.config/nitro.history";
+  return base + "/.config/nitro/history.txt";
 }
 
 // Tiny helper: extract a quoted string value from flat JSON for a known key.
@@ -464,10 +463,9 @@ static std::string introspect(const NitroConfig &cfg) {
                      cfg.rag_top_k);
 }
 
-// Persist the current cfg to ~/.config/nitro.settings.json.
+// Persist the current cfg to ~/.config/nitro/settings.json.
 static bool save_settings(const NitroConfig &cfg) {
   std::string path = settings_path();
-  // Ensure ~/.config/ exists
   fs::path dir = fs::path(path).parent_path();
   std::error_code ec;
   fs::create_directories(dir, ec);
@@ -562,7 +560,7 @@ struct TuiState {
   void append_token(const std::string &token);
   void flush_token_acc();
   // ── interaction ───────────────────────────────────────────────────
-  void confirm_dialog(const std::string &prompt, std::string &result);
+  bool confirm_dialog(const std::string &prompt);
   // Blocking readline with history navigation, cursor, arrow-key scrolling.
   std::string readline_blocking();
   // Modal popup overlay while a long operation runs.
@@ -573,11 +571,11 @@ struct TuiState {
   void show_modal_popup(const std::string &message);
   void show_help();
   void dismiss_modal_popup();
-  // ── RAG folder picker popup ───────────────────────────────────────
+  // ── folder picker popup ───────────────────────────────────────
   // Presents an interactive directory browser to let the user choose a
   // folder (or file) to index.  Returns the selected path, or empty string
   // if the user cancelled.
-  // ── RAG / file browser popup ─────────────────────────────────────
+  // ── file browser popup ─────────────────────────────────────
   // Used by /rag, /model, and /embed to pick a path interactively.
   // Pass a hint string shown in the title bar (e.g. "RAG Folder",
   // "Model File", "Embedding Model").
@@ -898,7 +896,7 @@ void TuiState::dismiss_modal_popup() {
   }
 }
 
-// ─── TuiState::rag_folder_picker ──────────────────────────────────────────
+// ─── TuiState::file_picker ────────────────────────────────────────────────
 // Interactive directory/file browser popup.
 // Keyboard:  ↑/↓ navigate,  Enter select/descend,  Backspace go up,
 //            's' select current dir for indexing,   Esc cancel.
@@ -1099,7 +1097,7 @@ std::string TuiState::file_picker(const std::string &start_dir,
 }
 
 // ─── TuiState::confirm_dialog ─────────────────────────────────────────────
-void TuiState::confirm_dialog(const std::string &prompt, std::string &result) {
+bool TuiState::confirm_dialog(const std::string &prompt) {
   ncplane_erase(inputpl);
   ncplane_set_channels(inputpl, inp_ch(255, 200, 80));
   std::string msg = " " + prompt + " [y/n] ❯ ";
@@ -1119,9 +1117,9 @@ void TuiState::confirm_dialog(const std::string &prompt, std::string &result) {
   }
   std::string lo = answer;
   std::transform(lo.begin(), lo.end(), lo.begin(), ::tolower);
-  result = (lo == "y" || lo == "yes" || lo == "sure" || lo == "k") ? "YES" : "NO";
   redraw_input();
   notcurses_render(nc);
+  return (lo == "y" || lo == "yes" || lo == "sure" || lo == "k");
 }
 
 // ─── TuiState::readline_blocking ──────────────────────────────────────────
@@ -1389,23 +1387,31 @@ bool AgentState::rag_index(const std::string &path, TuiState &tui) {
     tui.redraw_all();
     return false;
   }
+
   auto index_one = [&](const std::string &filepath) {
     tui.append_line("[sys]   indexing: " + filepath);
     tui.redraw_all();
-    if (!embed_llama->rag_load(*rag_db, filepath)) {
+    if (!embed_llama->rag_index(*rag_db, filepath)) {
       tui.append_line(std::string("[err] rag_load: ") + embed_llama->last_error());
       tui.redraw_all();
     }
   };
+
+  // must be set before indexing
+  rag_db->embed_dim = embed_llama->get_embed_dim();
+
   fs::path rp(path);
   std::error_code ec;
   if (fs::is_directory(rp, ec)) {
     for (const auto &entry : fs::recursive_directory_iterator(rp, ec)) {
-      if (entry.is_regular_file()) index_one(entry.path().string());
+      if (entry.is_regular_file()) {
+        index_one(entry.path().string());
+      }
     }
   } else {
     index_one(path);
   }
+
   return true;
 }
 
@@ -1503,7 +1509,24 @@ bool AgentState::run_turn(const std::string &user_message,
       auto tool_start = buffer.find("TOOL:");
       if (tool_start == 0) {
         // fetch all remaining tokens
-        invoke_tool(buffer + llama->all(*iter), "TOOL_RESULT: {}");
+        invoke_tool(trim(buffer + llama->all(*iter)), "TOOL_RESULT: {}");
+        buffer.clear();
+        think_mode = t_init;
+        continue;
+      }
+      // see https://ai.google.dev/gemma/docs/core/prompt-formatting-gemma4
+      tool_start = buffer.find("<|tool_call>call:");
+      if (tool_start != std::string::npos) {
+        buffer += llama->all(*iter);
+        auto pos = buffer.find_last_not_of("}<tool_call|>");
+        if (pos != std::string::npos) {
+          buffer = buffer.substr(0, pos);
+         }
+        pos = buffer.find_first_not_of("{");
+        if (pos != std::string::npos) {
+          buffer = buffer.substr(0, pos) + buffer.substr(pos + 1);
+        }
+        invoke_tool(trim(buffer), "<|tool_response>{}<tool_response|>");
         buffer.clear();
         think_mode = t_init;
         continue;
@@ -1869,15 +1892,14 @@ static std::string process_tool(const std::string &cmd, const NitroConfig &cfg,
   }
   if (op == "TOOL:WRITE") {
     std::string p = resolve(arg1);
-    if (!path_in_sandbox(sandbox, p)) return "ERROR: path outside sandbox";
+    if (!path_in_sandbox(sandbox, p)) {
+      return "ERROR: path outside sandbox";
+    }
+    if (!tui.confirm_dialog(std::format("Allow model to write {}?", p))) {
+      return "ERROR: action prevented by user";
+    }
     std::string content = strip_code_fences(arg1, arg2);
-    return write_file(p, content) ? "OK: written to " + arg1
-      : "ERROR: write failed for " + arg1;
-  }
-  if (op == "TOOL:PERMISSION") {
-    std::string result;
-    tui.confirm_dialog("Allow model to proceed?", result);
-    return result;
+    return write_file(p, content) ? "OK: written to " + arg1 : "ERROR: write failed for " + arg1;
   }
   if (op == "TOOL:CURL") {
     return tool_curl(arg1);
@@ -1898,6 +1920,8 @@ static std::string process_tool(const std::string &cmd, const NitroConfig &cfg,
         return "ERROR: '" + basename + "' is not in the TOOL:RUN allowlist. "
           "Use /set run_allowed <name> to permit it.";
       }
+    } else if (!tui.confirm_dialog(std::format("Allow {} to run?", prog))) {
+      return "ERROR: prevented by user";
     }
     std::string command = prog + " " + arg2 + " 2>&1";
     FILE *fp = popen(command.c_str(), "r");
@@ -1939,13 +1963,11 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
     "  TOOL:DATE                  current date\n"
     "  TOOL:TIME                  current time\n"
     "  TOOL:RND                   random float\n"
-    "  TOOL:PERMISSION            ask user for explicit permission\n"
     "  TOOL:INTROSPECT            introspect your settings, top_k etc\n"
     "  TOOL:CURL   <url>          HTTP GET; returns response body (max 32 KB)\n\n"
     "Rules:\n"
     "- Never access files outside the sandbox.\n"
     "- Only use one TOOL at a time. Never combine, always use each tool step by step\n"
-    "- Use TOOL:PERMISSION before destructive or irreversible operations.\n"
     "- Use TOOL:CURL to fetch documentation, APIs, or web content you need.\n"
     "- Reason step-by-step inside <|think|> </|think|> (hidden from user).\n"
     "- After each tool call, explain what you did in plain English.\n\n";
@@ -2213,7 +2235,7 @@ int main(int argc, char **argv) {
                 "  -h, --help               show this help\n"
                 "\n"
                 "project_dir defaults to the current working directory.\n"
-                "Settings are persisted to ~/.config/nitro.settings.json.\n"
+                "Settings are persisted to ~/.config/nitro/settings.json.\n"
                 "\n"
                 "Slash commands inside nitro:\n"
                 "  /model  [path]           load / hot-reload a GGUF (picker if no path)\n"

From 4fd29cc426a25b8a003f2f1df67ab8f458df4339 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Wed, 27 May 2026 16:56:38 +0930
Subject: [PATCH 42/54] LLAMA: nitro - refactoring, added logging, escape
 handling

---
 llama/nitro.cpp | 400 ++++++++++++++++++++++--------------------------
 1 file changed, 186 insertions(+), 214 deletions(-)

diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index 79b2ebe..088ceaf 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -72,81 +72,31 @@ static std::string  strip_code_fences(const std::string &filename, const std::st
 static std::string  process_tool(const std::string &line, const NitroConfig &cfg, TuiState &tui);
 static std::string  build_system_prompt(const std::vector<std::string> &knowledge_files, const std::string &sandbox);
 
-// ─── RAG indexing ─────────────────────────────────────────────────────────────
-static constexpr int BATCH_SIZE = 512;
-
-struct Chunk {
-  std::string         text;
-  std::string         source;
-  std::string         type;
-  std::vector<float>  embedding;
+// ═══════════════════════════════════════════════════════════════════════════
+// NitroConfig
+// ═══════════════════════════════════════════════════════════════════════════
+struct NitroConfig {
+  std::string model_path;
+  std::string embed_path;
+  std::string sandbox;
+  int   n_ctx          = 65536;
+  int   n_batch        = 512;
+  int   n_gpu_layers   = 32;
+  int   n_max_tokens   = 4096;
+  int   log_level      = GGML_LOG_LEVEL_CONT;
+  float temperature    = 0.6f;
+  float top_p          = 0.95f;
+  float min_p          = 0.0f;
+  int   top_k          = 20;
+  float penalty_repeat = 1.0f;
+  int   penalty_last_n = 256;
+  std::vector<std::string> knowledge_files;
+  int   rag_top_k      = 5;
+  // TOOL:RUN allowlist — if non-empty, only these program basenames may run.
+  // Empty means "allow anything inside the sandbox" (original behaviour).
+  std::vector<std::string> run_allowed;
 };
 
-static bool json_get_string(const std::string &json,
-                            const std::string &key,
-                            std::string       &out) {
-  std::string search = "\"" + key + "\":";
-  size_t pos = json.find(search);
-  if (pos == std::string::npos) return false;
-  pos += search.size();
-  while (pos < json.size() && json[pos] == ' ') ++pos;
-  if (pos >= json.size() || json[pos] != '"') return false;
-  ++pos;
-  out.clear();
-  while (pos < json.size()) {
-    char c = json[pos++];
-    if (c == '\\' && pos < json.size()) {
-      char e = json[pos++];
-      switch (e) {
-      case 'n':  out += '\n'; break;
-      case 't':  out += '\t'; break;
-      case '"':  out += '"';  break;
-      case '\\': out += '\\'; break;
-      default:   out += e;    break;
-      }
-    } else if (c == '"') {
-      break;
-    } else {
-      out += c;
-    }
-  }
-  return true;
-}
-
-static bool save_db(const std::string        &path,
-                    const std::vector<Chunk> &chunks,
-                    int                       embed_dim) {
-  std::ofstream f(path, std::ios::binary);
-  if (!f) {
-    std::fprintf(stderr, "cannot open for write: %s\n\n", path);
-    return false;
-  }
-  auto write32 = [&](uint32_t v) { f.write((char*)&v, 4); };
-  auto write16 = [&](uint16_t v) { f.write((char*)&v, 2); };
-  auto write8  = [&](uint8_t  v) { f.write((char*)&v, 1); };
-  auto writestr = [&](const std::string &s, size_t max_len) {
-    size_t len = std::min(s.size(), max_len);
-    f.write(s.c_str(), (std::streamsize)len);
-  };
-  write32(0x52414744);
-  write32(2);
-  write32((uint32_t)chunks.size());
-  write32((uint32_t)embed_dim);
-  for (const Chunk &c : chunks) {
-    write32((uint32_t)c.text.size());
-    f.write(c.text.c_str(), (std::streamsize)c.text.size());
-    uint16_t src_len = (uint16_t)std::min(c.source.size(), (size_t)65535);
-    write16(src_len);
-    writestr(c.source, src_len);
-    uint8_t type_len = (uint8_t)std::min(c.type.size(), (size_t)255);
-    write8(type_len);
-    writestr(c.type, type_len);
-    f.write((char*)c.embedding.data(),
-            (std::streamsize)(embed_dim * sizeof(float)));
-  }
-  return f.good();
-}
-
 // ═══════════════════════════════════════════════════════════════════════════
 // InputHistory — up/down arrow navigation through submitted inputs
 // ═══════════════════════════════════════════════════════════════════════════
@@ -251,6 +201,121 @@ class InputHistory {
   int current_index = 0;
 };
 
+// ═══════════════════════════════════════════════════════════════════════════
+// Notcurses TUI
+// ═══════════════════════════════════════════════════════════════════════════
+//
+//  ┌──────────────────── header (1 row) ─────────────────────────────────┐
+//  │ ✦ NITRO  model: …  tok/s: …  KV: …%  VRAM: …%                       │
+//  ├─────────────────────────────────────────────────────────────────────┤
+//  │                                                                     │
+//  │  chat pane  (rows 1 … term_rows-3)                                  │
+//  │                                                                     │
+//  ├─────────────────────────────────────────────────────────────────────┤
+//  │ ─────────────────────────────────────  (separator)                  │
+//  │ ❯ input                                                             │
+//  └─────────────────────────────────────────────────────────────────────┘
+struct TuiState {
+  // ── notcurses handles ──────────────────────────────────────────────
+  struct notcurses *nc      = nullptr;
+  struct ncplane   *stdpl   = nullptr;
+  struct ncplane   *header  = nullptr;
+  struct ncplane   *chatpl  = nullptr;
+  struct ncplane   *inputpl = nullptr;
+  // ── chat buffer ───────────────────────────────────────────────────
+  std::vector<std::string> chat_lines;
+  int scroll_offset = 0;
+  std::mutex lines_mutex;
+  // ── streaming accumulator ─────────────────────────────────────────
+  std::string token_acc;
+  // ── input ─────────────────────────────────────────────────────────
+  std::string input_buf;
+  size_t      cursor_pos = 0;
+  bool        mouse_mode = true;
+  // ── status bar values ─────────────────────────────────────────────
+  std::string current_model  = "none";
+  float       tokens_per_sec = 0.0f;
+  int         kv_used        = 0;
+  int         kv_total       = 1;
+  size_t      vram_used      = 0;
+  size_t      vram_total     = 1;
+  int term_rows = 0;
+  int term_cols = 0;
+  // ── thinking spinner ──────────────────────────────────────────────
+  bool    thinking      = false;
+  int     spinner_frame = 0;
+  // ── input history ─────────────────────────────────────────────────
+  InputHistory history;
+  // Advance spinner by one frame and redraw the header.
+  void tick_spinner();
+  // Toggle thinking mode; redraws header immediately.
+  void set_thinking(bool on);
+  // ── lifecycle ─────────────────────────────────────────────────────
+  void init();
+  void destroy();
+  void resize();
+  // ── draw ──────────────────────────────────────────────────────────
+  void redraw_header();
+  void redraw_chat();
+  void redraw_input();
+  void redraw_all();
+  // ── content helpers ───────────────────────────────────────────────
+  void append_line(const std::string &line);
+  void append_token(const std::string &token);
+  void flush_token_acc();
+  // ── interaction ───────────────────────────────────────────────────
+  bool confirm_dialog(const std::string &prompt);
+  // Blocking readline with history navigation, cursor, arrow-key scrolling.
+  std::string readline_blocking();
+  // Modal popup overlay while a long operation runs.
+  // Call show_modal_popup to display; dismiss_modal_popup to remove.
+  // The popup plane is stored in modal_plane; callers hold it as an opaque
+  // handle — or just use the paired helpers below.
+  struct ncplane *modal_plane = nullptr;
+  void show_modal_popup(const std::string &message);
+  void show_help();
+  void dismiss_modal_popup();
+  // ── folder picker popup ───────────────────────────────────────
+  // Presents an interactive directory browser to let the user choose a
+  // folder (or file) to index.  Returns the selected path, or empty string
+  // if the user cancelled.
+  // ── file browser popup ─────────────────────────────────────
+  // Used by /rag, /model, and /embed to pick a path interactively.
+  // Pass a hint string shown in the title bar (e.g. "RAG Folder",
+  // "Model File", "Embedding Model").
+  // Returns the selected path, or empty string if the user cancelled.
+  std::string file_picker(const std::string &start_dir,
+                          const std::string &title_hint = "File");
+  // Legacy alias kept for callers that used the old name.
+  std::string rag_folder_picker(const std::string &start_dir) {
+    return file_picker(start_dir, "RAG Folder");
+  }
+};
+
+// ═══════════════════════════════════════════════════════════════════════════
+// AgentState
+// ═══════════════════════════════════════════════════════════════════════════
+struct AgentState {
+  std::unique_ptr<Llama> llama;
+  std::unique_ptr<LlamaIter> iter;
+  std::unique_ptr<Llama> embed_llama;
+  std::unique_ptr<RagDB>      rag_db;
+  std::unique_ptr<RagSession> rag_session;
+  bool model_loaded = false;
+  std::string system_prompt;
+
+  bool setup_model(const NitroConfig &cfg, TuiState &tui);
+  bool setup_embed(const std::string &path, TuiState &tui);
+  void apply_generation_params(const NitroConfig &cfg);
+  void reset_conversation(const std::string &sysprompt, TuiState &tui);
+  bool run_turn(const std::string &user_message,
+                const NitroConfig &cfg,
+                TuiState          &tui);
+  bool rag_index(const std::string &path, TuiState &tui);
+  std::string memory_info_text();
+  float tokens_per_sec() const;
+};
+
 // ═══════════════════════════════════════════════════════════════════════════
 // Logging
 // ═══════════════════════════════════════════════════════════════════════════
@@ -289,28 +354,6 @@ static void log_write(const char *fmt, ...) {
 // A minimal hand-rolled JSON reader/writer for the flat key-value settings
 // we care about.  We deliberately avoid a full JSON library dependency.
 
-struct NitroConfig {
-  std::string model_path;
-  std::string embed_path;
-  std::string sandbox;
-  int   n_ctx          = 65536;
-  int   n_batch        = 512;
-  int   n_gpu_layers   = 32;
-  int   n_max_tokens   = 4096;
-  int   log_level      = GGML_LOG_LEVEL_CONT;
-  float temperature    = 0.6f;
-  float top_p          = 0.95f;
-  float min_p          = 0.0f;
-  int   top_k          = 20;
-  float penalty_repeat = 1.0f;
-  int   penalty_last_n = 256;
-  std::vector<std::string> knowledge_files;
-  int   rag_top_k      = 5;
-  // TOOL:RUN allowlist — if non-empty, only these program basenames may run.
-  // Empty means "allow anything inside the sandbox" (original behaviour).
-  std::vector<std::string> run_allowed;
-};
-
 // Returns the canonical settings path: ~/.config/nitro/settings.json
 static std::string settings_path() {
   const char *home = getenv("HOME");
@@ -325,6 +368,37 @@ static std::string history_path() {
   return base + "/.config/nitro/history.txt";
 }
 
+static bool json_get_string(const std::string &json,
+                            const std::string &key,
+                            std::string       &out) {
+  std::string search = "\"" + key + "\":";
+  size_t pos = json.find(search);
+  if (pos == std::string::npos) return false;
+  pos += search.size();
+  while (pos < json.size() && json[pos] == ' ') ++pos;
+  if (pos >= json.size() || json[pos] != '"') return false;
+  ++pos;
+  out.clear();
+  while (pos < json.size()) {
+    char c = json[pos++];
+    if (c == '\\' && pos < json.size()) {
+      char e = json[pos++];
+      switch (e) {
+      case 'n':  out += '\n'; break;
+      case 't':  out += '\t'; break;
+      case '"':  out += '"';  break;
+      case '\\': out += '\\'; break;
+      default:   out += e;    break;
+      }
+    } else if (c == '"') {
+      break;
+    } else {
+      out += c;
+    }
+  }
+  return true;
+}
+
 // Tiny helper: extract a quoted string value from flat JSON for a known key.
 static bool settings_get_str(const std::string &json,
                              const std::string &key,
@@ -497,97 +571,6 @@ std::string trim(std::string_view str) {
   return std::string(str.substr(start, end - start + 1));
 }
 
-// ═══════════════════════════════════════════════════════════════════════════
-// Notcurses TUI
-// ═══════════════════════════════════════════════════════════════════════════
-//
-//  ┌──────────────────── header (1 row) ─────────────────────────────────┐
-//  │ ✦ NITRO  model: …  tok/s: …  KV: …%  VRAM: …%                       │
-//  ├─────────────────────────────────────────────────────────────────────┤
-//  │                                                                     │
-//  │  chat pane  (rows 1 … term_rows-3)                                  │
-//  │                                                                     │
-//  ├─────────────────────────────────────────────────────────────────────┤
-//  │ ─────────────────────────────────────  (separator)                  │
-//  │ ❯ input                                                             │
-//  └─────────────────────────────────────────────────────────────────────┘
-struct TuiState {
-  // ── notcurses handles ──────────────────────────────────────────────
-  struct notcurses *nc      = nullptr;
-  struct ncplane   *stdpl   = nullptr;
-  struct ncplane   *header  = nullptr;
-  struct ncplane   *chatpl  = nullptr;
-  struct ncplane   *inputpl = nullptr;
-  // ── chat buffer ───────────────────────────────────────────────────
-  std::vector<std::string> chat_lines;
-  int scroll_offset = 0;
-  std::mutex lines_mutex;
-  // ── streaming accumulator ─────────────────────────────────────────
-  std::string token_acc;
-  // ── input ─────────────────────────────────────────────────────────
-  std::string input_buf;
-  size_t      cursor_pos = 0;
-  bool        mouse_mode = true;
-  // ── status bar values ─────────────────────────────────────────────
-  std::string current_model  = "none";
-  float       tokens_per_sec = 0.0f;
-  int         kv_used        = 0;
-  int         kv_total       = 1;
-  size_t      vram_used      = 0;
-  size_t      vram_total     = 1;
-  int term_rows = 0;
-  int term_cols = 0;
-  // ── thinking spinner ──────────────────────────────────────────────
-  bool    thinking      = false;
-  int     spinner_frame = 0;
-  // ── input history ─────────────────────────────────────────────────
-  InputHistory history;
-  // Advance spinner by one frame and redraw the header.
-  void tick_spinner();
-  // Toggle thinking mode; redraws header immediately.
-  void set_thinking(bool on);
-  // ── lifecycle ─────────────────────────────────────────────────────
-  void init();
-  void destroy();
-  void resize();
-  // ── draw ──────────────────────────────────────────────────────────
-  void redraw_header();
-  void redraw_chat();
-  void redraw_input();
-  void redraw_all();
-  // ── content helpers ───────────────────────────────────────────────
-  void append_line(const std::string &line);
-  void append_token(const std::string &token);
-  void flush_token_acc();
-  // ── interaction ───────────────────────────────────────────────────
-  bool confirm_dialog(const std::string &prompt);
-  // Blocking readline with history navigation, cursor, arrow-key scrolling.
-  std::string readline_blocking();
-  // Modal popup overlay while a long operation runs.
-  // Call show_modal_popup to display; dismiss_modal_popup to remove.
-  // The popup plane is stored in modal_plane; callers hold it as an opaque
-  // handle — or just use the paired helpers below.
-  struct ncplane *modal_plane = nullptr;
-  void show_modal_popup(const std::string &message);
-  void show_help();
-  void dismiss_modal_popup();
-  // ── folder picker popup ───────────────────────────────────────
-  // Presents an interactive directory browser to let the user choose a
-  // folder (or file) to index.  Returns the selected path, or empty string
-  // if the user cancelled.
-  // ── file browser popup ─────────────────────────────────────
-  // Used by /rag, /model, and /embed to pick a path interactively.
-  // Pass a hint string shown in the title bar (e.g. "RAG Folder",
-  // "Model File", "Embedding Model").
-  // Returns the selected path, or empty string if the user cancelled.
-  std::string file_picker(const std::string &start_dir,
-                          const std::string &title_hint = "File");
-  // Legacy alias kept for callers that used the old name.
-  std::string rag_folder_picker(const std::string &start_dir) {
-    return file_picker(start_dir, "RAG Folder");
-  }
-};
-
 // ─── colour helpers ──────────────────────────────────────────────────────
 static constexpr uint32_t BG_CHAT_R = 18,  BG_CHAT_G = 22,  BG_CHAT_B = 30;
 static constexpr uint32_t BG_INP_R  = 22,  BG_INP_G  = 28,  BG_INP_B  = 38;
@@ -1205,7 +1188,7 @@ std::string TuiState::readline_blocking() {
       notcurses_render(nc);
       continue;
     }
-    if (ni.id == NCKEY_SCROLL_DOWN && scroll_offset > 1) {
+    if (ni.id == NCKEY_SCROLL_DOWN && scroll_offset > 0) {
       scroll_offset -= 1;
       redraw_chat();
       notcurses_render(nc);
@@ -1250,30 +1233,6 @@ std::string TuiState::readline_blocking() {
   }
 }
 
-// ═══════════════════════════════════════════════════════════════════════════
-// AgentState
-// ═══════════════════════════════════════════════════════════════════════════
-struct AgentState {
-  std::unique_ptr<Llama> llama;
-  std::unique_ptr<LlamaIter> iter;
-  std::unique_ptr<Llama> embed_llama;
-  std::unique_ptr<RagDB>      rag_db;
-  std::unique_ptr<RagSession> rag_session;
-  bool model_loaded = false;
-  std::string system_prompt;
-
-  bool setup_model(const NitroConfig &cfg, TuiState &tui);
-  bool setup_embed(const std::string &path, TuiState &tui);
-  void apply_generation_params(const NitroConfig &cfg);
-  void reset_conversation(const std::string &sysprompt, TuiState &tui);
-  bool run_turn(const std::string &user_message,
-                const NitroConfig &cfg,
-                TuiState          &tui);
-  bool rag_index(const std::string &path, TuiState &tui);
-  std::string memory_info_text();
-  float tokens_per_sec() const;
-};
-
 void AgentState::apply_generation_params(const NitroConfig &cfg) {
   //  llama->add_stop(MARKER_END_TOOL);
   llama->add_stop("<|turn|>");
@@ -1428,10 +1387,12 @@ bool AgentState::run_turn(const std::string &user_message,
   }
   std::string effective_message = user_message;
   if (embed_llama && rag_db && rag_session) {
-    std::string context = llama->rag_retrieve(*rag_db, user_message,
-                                              cfg.rag_top_k, *rag_session);
+    std::string context = embed_llama->rag_retrieve(*rag_db, user_message, cfg.rag_top_k, *rag_session);
     if (!context.empty()) {
+      log_write("RAG: %s", context.c_str());
       effective_message = "Context:\n" + context + "\n\nUser: " + user_message;
+    } else {
+      log_write("RAG: no context found [%s]", embed_llama->last_error());
     }
   }
   if (!iter) {
@@ -1490,6 +1451,14 @@ bool AgentState::run_turn(const std::string &user_message,
   };
 
   while (iter->_has_next) {
+    ncinput ni{};
+    notcurses_get_nblock(tui.nc, &ni);
+    if (ni.id == NCKEY_ESC) {
+      tui.set_thinking(false);
+      tui.append_line("[err] Generation cancelled by user (Escape)");
+      tui.redraw_all();
+      return false;
+    }
     std::string tok = llama->next(*iter);
     buffer += tok;
     if (think_mode == t_init) {
@@ -1521,7 +1490,7 @@ bool AgentState::run_turn(const std::string &user_message,
         auto pos = buffer.find_last_not_of("}<tool_call|>");
         if (pos != std::string::npos) {
           buffer = buffer.substr(0, pos);
-         }
+        }
         pos = buffer.find_first_not_of("{");
         if (pos != std::string::npos) {
           buffer = buffer.substr(0, pos) + buffer.substr(pos + 1);
@@ -2280,6 +2249,8 @@ int main(int argc, char **argv) {
   tui.history.load(history_path());
   welcome(tui, cfg.sandbox);
 
+  log_write("nitro starting");
+
   // ── Init agent ────────────────────────────────────────────────────
   AgentState agent;
   if (!cfg.model_path.empty()) {
@@ -2325,6 +2296,7 @@ int main(int argc, char **argv) {
     }
   }
 
+  log_write("nitro exiting");
   log_close();
   tui.destroy();
   // Persist input history for the next session.

From dd53542d7db4a239c1368d09d9b0b5a01e8a7772 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Wed, 27 May 2026 19:30:37 +0930
Subject: [PATCH 43/54] LLAMA: nitro - code cleanup added RAG tool

---
 llama/llama-sb-rag.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llama/llama-sb-rag.cpp b/llama/llama-sb-rag.cpp
index 4db4a9d..0b11d04 100644
--- a/llama/llama-sb-rag.cpp
+++ b/llama/llama-sb-rag.cpp
@@ -312,12 +312,14 @@ std::string Llama::rag_retrieve(const RagDB &db,
                                 int top_k,
                                 RagSession &session) {
   if (db.empty()) {
+    _last_error = "no input";
     return {};
   }
 
   std::vector<float> qvec;
   std::string text = INSTRUCT_QUERY + query;
   if (!embed_text(text, qvec, db.embed_dim)) {
+    _last_error = "failed to embed text";
     return {};
   }
 

From 4dab858533fd9422f71be544316ff4befa252b0e Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Wed, 27 May 2026 20:42:33 +0930
Subject: [PATCH 44/54] LLAMA: nitro - load and save rag index files

---
 llama/llama.cpp |   2 +-
 llama/nitro.cpp | 393 +++++++++++++++++++++++++++---------------------
 2 files changed, 225 insertions(+), 170 deletions(-)

diff --git a/llama/llama.cpp b/llama/llama.cpp
index dbe9c0c..4d8cc0c 160000
--- a/llama/llama.cpp
+++ b/llama/llama.cpp
@@ -1 +1 @@
-Subproject commit dbe9c0c8ce65354c372f5d4ab507e5424a755e9f
+Subproject commit 4d8cc0c56ffba3f8b7fdb0130627fed2a6f71958
diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index 088ceaf..bd97714 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -57,24 +57,23 @@
 
 namespace fs = std::filesystem;
 
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // Forward declarations
-// ═══════════════════════════════════════════════════════════════════════════
+//
 struct NitroConfig;
 struct TuiState;
 struct AgentState;
-static std::string  join_path(const std::string &a, const std::string &b);
-static std::string  read_file(const std::string &path);
+static bool         path_in_sandbox(const std::string &sandbox, const std::string &path);
 static bool         write_file(const std::string &path, const std::string &data);
+static std::string  build_system_prompt(const std::vector<std::string> &knowledge_files, const std::string &sandbox);
+static std::string  join_path(const std::string &a, const std::string &b);
 static std::string  list_dir(const std::string &path);
-static bool         path_in_sandbox(const std::string &sandbox, const std::string &path);
+static std::string  read_file(const std::string &path);
 static std::string  strip_code_fences(const std::string &filename, const std::string &src);
-static std::string  process_tool(const std::string &line, const NitroConfig &cfg, TuiState &tui);
-static std::string  build_system_prompt(const std::vector<std::string> &knowledge_files, const std::string &sandbox);
-
-// ═══════════════════════════════════════════════════════════════════════════
+static std::string  tool_curl(const std::string &url);
+//
 // NitroConfig
-// ═══════════════════════════════════════════════════════════════════════════
+//
 struct NitroConfig {
   std::string model_path;
   std::string embed_path;
@@ -97,9 +96,9 @@ struct NitroConfig {
   std::vector<std::string> run_allowed;
 };
 
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // InputHistory — up/down arrow navigation through submitted inputs
-// ═══════════════════════════════════════════════════════════════════════════
+//
 class InputHistory {
   public:
   explicit InputHistory() = default;
@@ -201,9 +200,9 @@ class InputHistory {
   int current_index = 0;
 };
 
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // Notcurses TUI
-// ═══════════════════════════════════════════════════════════════════════════
+//
 //
 //  ┌──────────────────── header (1 row) ─────────────────────────────────┐
 //  │ ✦ NITRO  model: …  tok/s: …  KV: …%  VRAM: …%                       │
@@ -292,9 +291,9 @@ struct TuiState {
   }
 };
 
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // AgentState
-// ═══════════════════════════════════════════════════════════════════════════
+//
 struct AgentState {
   std::unique_ptr<Llama> llama;
   std::unique_ptr<LlamaIter> iter;
@@ -304,21 +303,23 @@ struct AgentState {
   bool model_loaded = false;
   std::string system_prompt;
 
-  bool setup_model(const NitroConfig &cfg, TuiState &tui);
+  bool rag_index(const std::string &path, const NitroConfig &cfg, TuiState &tui);
+  bool rag_load_index(const std::string &path, TuiState &tui);
+  bool run_turn(const std::string &user_message, const NitroConfig &cfg, TuiState &tui);
   bool setup_embed(const std::string &path, TuiState &tui);
+  bool setup_model(const NitroConfig &cfg, TuiState &tui);
   void apply_generation_params(const NitroConfig &cfg);
   void reset_conversation(const std::string &sysprompt, TuiState &tui);
-  bool run_turn(const std::string &user_message,
-                const NitroConfig &cfg,
-                TuiState          &tui);
-  bool rag_index(const std::string &path, TuiState &tui);
   std::string memory_info_text();
+  std::string process_tool(const std::string &cmd, const NitroConfig &cfg, TuiState &tui);
+  std::string rag_tool(const NitroConfig &cfg, const std::string &agent_query);
   float tokens_per_sec() const;
 };
 
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // Logging
-// ═══════════════════════════════════════════════════════════════════════════
+//
+
 // ─── Debug logging (file-backed, safe to call while notcurses is active) ──
 static FILE *g_logfile = nullptr;
 
@@ -348,9 +349,9 @@ static void log_write(const char *fmt, ...) {
   fflush(g_logfile);  // flush immediately so tail -f works
 }
 
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // Settings persistence  (~/.config/nitro/nitro.settings.json)
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // A minimal hand-rolled JSON reader/writer for the flat key-value settings
 // we care about.  We deliberately avoid a full JSON library dependency.
 
@@ -588,7 +589,9 @@ static inline uint64_t hdr_ch(uint32_t r, uint32_t g, uint32_t b) {
   return NCCHANNELS_INITIALIZER(r, g, b, BG_HDR_R, BG_HDR_G, BG_HDR_B);
 }
 
-// ─── TuiState::init ──────────────────────────────────────────────────────
+//
+// TuiState::init
+//
 void TuiState::init() {
   notcurses_options opts{};
   opts.flags = NCOPTION_SUPPRESS_BANNERS;
@@ -637,7 +640,9 @@ void TuiState::resize() {
   redraw_all();
 }
 
-// ─── TuiState::redraw_* ──────────────────────────────────────────────────
+//
+// TuiState::redraw
+//
 void TuiState::redraw_header() {
   ncplane_erase(header);
   ncplane_set_base(header, " ", 0,
@@ -751,7 +756,9 @@ void TuiState::set_thinking(bool on) {
   notcurses_render(nc);
 }
 
-// ─── TuiState content helpers ─────────────────────────────────────────────
+//
+// TuiState content helpers
+//
 void TuiState::append_line(const std::string &line) {
   std::lock_guard<std::mutex> lk(lines_mutex);
   int w = std::max(1, term_cols - 1);
@@ -787,9 +794,10 @@ void TuiState::flush_token_acc() {
   }
 }
 
-// ─── TuiState::show_modal_popup / dismiss_modal_popup ─────────────────────
+//
 // Creates a centred floating plane with a border and a status message.
 // The popup sits above all other planes and blocks until explicitly dismissed.
+//
 void TuiState::show_modal_popup(const std::string &message) {
   // Dismiss any previous popup first.
   dismiss_modal_popup();
@@ -879,6 +887,7 @@ void TuiState::dismiss_modal_popup() {
   }
 }
 
+//
 // ─── TuiState::file_picker ────────────────────────────────────────────────
 // Interactive directory/file browser popup.
 // Keyboard:  ↑/↓ navigate,  Enter select/descend,  Backspace go up,
@@ -896,6 +905,7 @@ void TuiState::dismiss_modal_popup() {
 //   Esc        cancel → returns ""
 //
 // Returns the chosen path, or "" on cancel.
+//
 std::string TuiState::file_picker(const std::string &start_dir,
                                   const std::string &title_hint) {
   std::string current_dir = start_dir;
@@ -1079,7 +1089,9 @@ std::string TuiState::file_picker(const std::string &start_dir,
   return result;
 }
 
+//
 // ─── TuiState::confirm_dialog ─────────────────────────────────────────────
+//
 bool TuiState::confirm_dialog(const std::string &prompt) {
   ncplane_erase(inputpl);
   ncplane_set_channels(inputpl, inp_ch(255, 200, 80));
@@ -1105,9 +1117,10 @@ bool TuiState::confirm_dialog(const std::string &prompt) {
   return (lo == "y" || lo == "yes" || lo == "sure" || lo == "k");
 }
 
-// ─── TuiState::readline_blocking ──────────────────────────────────────────
+//
 // Integrates InputHistory:  Up/Down arrows navigate the history stack.
 // On submit the entry is pushed to history, and nav is reset.
+//
 std::string TuiState::readline_blocking() {
   input_buf.clear();
   cursor_pos = 0;
@@ -1234,7 +1247,6 @@ std::string TuiState::readline_blocking() {
 }
 
 void AgentState::apply_generation_params(const NitroConfig &cfg) {
-  //  llama->add_stop(MARKER_END_TOOL);
   llama->add_stop("<|turn|>");
   llama->add_stop("<|im_end|>");
   llama->set_max_tokens(cfg.n_max_tokens);
@@ -1247,8 +1259,9 @@ void AgentState::apply_generation_params(const NitroConfig &cfg) {
   llama->set_log_level(cfg.log_level);
 }
 
-// ─── AgentState::setup_model ──────────────────────────────────────────────
+//
 // Shows a modal loading popup while the model loads.
+//
 bool AgentState::setup_model(const NitroConfig &cfg, TuiState &tui) {
   if (cfg.model_path.empty()) {
     tui.append_line("[sys] No model loaded.  Use /model <path> to load a GGUF.");
@@ -1340,7 +1353,35 @@ std::string AgentState::memory_info_text() {
   return oss.str();
 }
 
-bool AgentState::rag_index(const std::string &path, TuiState &tui) {
+std::string AgentState::rag_tool(const NitroConfig &cfg, const std::string &agent_query) {
+  std::string result;
+  if (embed_llama && rag_db && rag_session) {
+    result = embed_llama->rag_retrieve(*rag_db, agent_query, cfg.rag_top_k, *rag_session);
+    if (result.empty()) {
+      result = "RAG: no context found";
+    }
+  } else {
+    result = "RAG: not enabled";
+  }
+  return result;
+}
+
+bool AgentState::rag_load_index(const std::string &path, TuiState &tui) {
+  if (!embed_llama || !rag_db) {
+    tui.append_line("[err] Load an embedding model first: /embed <path>");
+    tui.redraw_all();
+    return false;
+  }
+
+  if (!rag_db->load(path)) {
+    tui.append_line("[sys] failed to load");
+    tui.redraw_all();
+  }
+
+  return true;
+}
+
+bool AgentState::rag_index(const std::string &path, const NitroConfig &cfg, TuiState &tui) {
   if (!embed_llama || !rag_db) {
     tui.append_line("[err] Load an embedding model first: /embed <path>");
     tui.redraw_all();
@@ -1371,15 +1412,135 @@ bool AgentState::rag_index(const std::string &path, TuiState &tui) {
     index_one(path);
   }
 
+  std::string save_path = join_path(cfg.sandbox, "rag-index.bin");
+  tui.append_line("[sys] saving index: " + save_path);
+  tui.redraw_all();
+  rag_db->save(save_path);
+
   return true;
 }
 
-// ═══════════════════════════════════════════════════════════════════════════
+//
+// Tool dispatch
+//
+std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &cfg, TuiState &tui) {
+  const std::string &sandbox = cfg.sandbox;
+  const std::vector<std::string> &run_allowed = cfg.run_allowed;
+
+  std::string op, arg1, arg2;
+  auto sp1 = cmd.find(' ');
+  if (sp1 == std::string::npos) {
+    op = trim(cmd);
+  } else {
+    op = trim(cmd.substr(0, sp1));
+    std::string rest = cmd.substr(sp1 + 1);
+    rest.erase(0, rest.find_first_not_of(" \t"));
+    auto sp2 = rest.find(' ');
+    if (sp2 == std::string::npos) {
+      arg1 = rest;
+    } else {
+      arg1 = rest.substr(0, sp2);
+      arg2 = rest.substr(sp2 + 1);
+    }
+  }
+
+  auto resolve = [&](const std::string &p) -> std::string {
+    if (p.empty() || p == ".") return sandbox;
+    if (p.substr(0, 2) == "./") return join_path(sandbox, p.substr(2));
+    if (p[0] == '/') return p;
+    return join_path(sandbox, p);
+  };
+
+  tui.append_line("[tool] → " + op);
+  tui.redraw_all();
+
+  if (op == "TOOL:DATE") {
+    char buf[32]; time_t t = time(nullptr);
+    strftime(buf, sizeof(buf), "%Y-%m-%d", localtime(&t));
+    return buf;
+  }
+  if (op == "TOOL:TIME") {
+    char buf[32]; time_t t = time(nullptr);
+    strftime(buf, sizeof(buf), "%H:%M:%S", localtime(&t));
+    return buf;
+  }
+  if (op == "TOOL:RND") {
+    return std::to_string((double)rand() / RAND_MAX);
+  }
+  if (op == "TOOL:RAG") {
+    return rag_tool(cfg, arg1);
+  }
+  if (op == "TOOL:LIST") {
+    std::string dir = resolve(arg1);
+    if (!path_in_sandbox(sandbox, dir)) return "ERROR: path outside sandbox";
+    return list_dir(dir);
+  }
+  if (op == "TOOL:EXISTS") {
+    std::string p = resolve(arg1);
+    if (!path_in_sandbox(sandbox, p)) return "NO";
+    return fs::exists(p) ? "YES" : "NO";
+  }
+  if (op == "TOOL:READ") {
+    std::string p = resolve(arg1);
+    if (!path_in_sandbox(sandbox, p)) return "ERROR: path outside sandbox";
+    return read_file(p);
+  }
+  if (op == "TOOL:WRITE") {
+    std::string p = resolve(arg1);
+    if (!path_in_sandbox(sandbox, p)) {
+      return "ERROR: path outside sandbox";
+    }
+    if (!tui.confirm_dialog(std::format("Allow model to write {}?", p))) {
+      return "ERROR: action prevented by user";
+    }
+    std::string content = strip_code_fences(arg1, arg2);
+    return write_file(p, content) ? "OK: written to " + arg1 : "ERROR: write failed for " + arg1;
+  }
+  if (op == "TOOL:CURL") {
+    return tool_curl(arg1);
+  }
+  if (op == "TOOL:INTROSPECT") {
+    return introspect(cfg);
+  }
+  if (op == "TOOL:RUN") {
+    std::string prog = resolve(arg1);
+    if (!path_in_sandbox(sandbox, prog)) {
+      return "ERROR: path outside sandbox";
+    }
+    if (!run_allowed.empty()) {
+      std::string basename = fs::path(prog).filename().string();
+      bool permitted = std::any_of(run_allowed.begin(), run_allowed.end(),
+                                   [&](const std::string &a){ return a == basename; });
+      if (!permitted) {
+        return "ERROR: '" + basename + "' is not in the TOOL:RUN allowlist. "
+          "Use /set run_allowed <name> to permit it.";
+      }
+    } else if (!tui.confirm_dialog(std::format("Allow {} to run?", prog))) {
+      return "ERROR: prevented by user";
+    }
+    std::string command = prog + " " + arg2 + " 2>&1";
+    FILE *fp = popen(command.c_str(), "r");
+    if (!fp) {
+      return "ERROR: popen failed";
+    }
+    std::string out;
+    char buf[256];
+    while (fgets(buf, sizeof(buf), fp)) {
+      out += buf;
+    }
+    pclose(fp);
+    if (out.size() > 4096) {
+      out = out.substr(0, 4096) + "\n…(truncated)";
+    }
+    return out;
+  }
+  return "ERROR: unknown tool: [" + op + "]";
+}
+
+//
 // Agent turn
-// ═══════════════════════════════════════════════════════════════════════════
-bool AgentState::run_turn(const std::string &user_message,
-                          const NitroConfig &cfg,
-                          TuiState          &tui) {
+//
+bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cfg, TuiState &tui) {
   if (!model_loaded) {
     tui.append_line("[err] No model loaded. Use /model <path>");
     tui.redraw_all();
@@ -1530,9 +1691,9 @@ bool AgentState::run_turn(const std::string &user_message,
   return true;
 }
 
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // File-system helpers
-// ═══════════════════════════════════════════════════════════════════════════
+//
 static std::string join_path(const std::string &a, const std::string &b) {
   if (b.empty()) return a;
   if (b[0] == '/') return b;
@@ -1605,15 +1766,16 @@ static std::string strip_code_fences(const std::string &filename,
   return inner;
 }
 
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // html_to_text — strip HTML for cleaner TOOL:CURL context
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // Lightweight HTML→plain-text conversion:
 //   • Drops <head>, <script>, <style> blocks entirely.
 //   • Inserts newlines at block-level tags (p, div, br, li, h1-h6 …).
 //   • Strips all remaining tags.
 //   • Decodes common named & numeric HTML entities.
 //   • Collapses whitespace runs; caps consecutive blank lines at 2.
+//
 static std::string html_to_text(const std::string &html) {
   std::string s = html;
 
@@ -1739,9 +1901,9 @@ static std::string html_to_text(const std::string &html) {
   return s;
 }
 
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // TOOL:CURL
-// ═══════════════════════════════════════════════════════════════════════════
+//
 static size_t curl_write_cb(void *contents, size_t size, size_t nmemb, void *userp) {
   std::string *buf = static_cast<std::string *>(userp);
   size_t total = size * nmemb;
@@ -1797,123 +1959,9 @@ static std::string tool_curl(const std::string &url) {
   return body;
 }
 
-// ═══════════════════════════════════════════════════════════════════════════
-// Tool dispatch
-// ═══════════════════════════════════════════════════════════════════════════
-static std::string process_tool(const std::string &cmd, const NitroConfig &cfg, TuiState &tui) {
-  const std::string &sandbox = cfg.sandbox;
-  const std::vector<std::string> &run_allowed = cfg.run_allowed;
-
-  std::string op, arg1, arg2;
-  auto sp1 = cmd.find(' ');
-  if (sp1 == std::string::npos) {
-    op = trim(cmd);
-  } else {
-    op = trim(cmd.substr(0, sp1));
-    std::string rest = cmd.substr(sp1 + 1);
-    rest.erase(0, rest.find_first_not_of(" \t"));
-    auto sp2 = rest.find(' ');
-    if (sp2 == std::string::npos) {
-      arg1 = rest;
-    } else {
-      arg1 = rest.substr(0, sp2);
-      arg2 = rest.substr(sp2 + 1);
-    }
-  }
-
-  auto resolve = [&](const std::string &p) -> std::string {
-    if (p.empty() || p == ".") return sandbox;
-    if (p.substr(0, 2) == "./") return join_path(sandbox, p.substr(2));
-    if (p[0] == '/') return p;
-    return join_path(sandbox, p);
-  };
-
-  tui.append_line("[tool] → " + op);
-  tui.redraw_all();
-
-  if (op == "TOOL:DATE") {
-    char buf[32]; time_t t = time(nullptr);
-    strftime(buf, sizeof(buf), "%Y-%m-%d", localtime(&t));
-    return buf;
-  }
-  if (op == "TOOL:TIME") {
-    char buf[32]; time_t t = time(nullptr);
-    strftime(buf, sizeof(buf), "%H:%M:%S", localtime(&t));
-    return buf;
-  }
-  if (op == "TOOL:RND") {
-    return std::to_string((double)rand() / RAND_MAX);
-  }
-  if (op == "TOOL:LIST") {
-    std::string dir = resolve(arg1);
-    if (!path_in_sandbox(sandbox, dir)) return "ERROR: path outside sandbox";
-    return list_dir(dir);
-  }
-  if (op == "TOOL:EXISTS") {
-    std::string p = resolve(arg1);
-    if (!path_in_sandbox(sandbox, p)) return "NO";
-    return fs::exists(p) ? "YES" : "NO";
-  }
-  if (op == "TOOL:READ") {
-    std::string p = resolve(arg1);
-    if (!path_in_sandbox(sandbox, p)) return "ERROR: path outside sandbox";
-    return read_file(p);
-  }
-  if (op == "TOOL:WRITE") {
-    std::string p = resolve(arg1);
-    if (!path_in_sandbox(sandbox, p)) {
-      return "ERROR: path outside sandbox";
-    }
-    if (!tui.confirm_dialog(std::format("Allow model to write {}?", p))) {
-      return "ERROR: action prevented by user";
-    }
-    std::string content = strip_code_fences(arg1, arg2);
-    return write_file(p, content) ? "OK: written to " + arg1 : "ERROR: write failed for " + arg1;
-  }
-  if (op == "TOOL:CURL") {
-    return tool_curl(arg1);
-  }
-  if (op == "TOOL:INTROSPECT") {
-    return introspect(cfg);
-  }
-  if (op == "TOOL:RUN") {
-    std::string prog = resolve(arg1);
-    if (!path_in_sandbox(sandbox, prog)) {
-      return "ERROR: path outside sandbox";
-    }
-    if (!run_allowed.empty()) {
-      std::string basename = fs::path(prog).filename().string();
-      bool permitted = std::any_of(run_allowed.begin(), run_allowed.end(),
-                                   [&](const std::string &a){ return a == basename; });
-      if (!permitted) {
-        return "ERROR: '" + basename + "' is not in the TOOL:RUN allowlist. "
-          "Use /set run_allowed <name> to permit it.";
-      }
-    } else if (!tui.confirm_dialog(std::format("Allow {} to run?", prog))) {
-      return "ERROR: prevented by user";
-    }
-    std::string command = prog + " " + arg2 + " 2>&1";
-    FILE *fp = popen(command.c_str(), "r");
-    if (!fp) {
-      return "ERROR: popen failed";
-    }
-    std::string out;
-    char buf[256];
-    while (fgets(buf, sizeof(buf), fp)) {
-      out += buf;
-    }
-    pclose(fp);
-    if (out.size() > 4096) {
-      out = out.substr(0, 4096) + "\n…(truncated)";
-    }
-    return out;
-  }
-  return "ERROR: unknown tool: [" + op + "]";
-}
-
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // System prompt
-// ═══════════════════════════════════════════════════════════════════════════
+//
 static std::string build_system_prompt(const std::vector<std::string> &knowledge_files,
                                        const std::string &sandbox) {
   std::string p;
@@ -1932,6 +1980,7 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
     "  TOOL:DATE                  current date\n"
     "  TOOL:TIME                  current time\n"
     "  TOOL:RND                   random float\n"
+    "  TOOL:RAG    <query>        query the RAG index for additional context\n"
     "  TOOL:INTROSPECT            introspect your settings, top_k etc\n"
     "  TOOL:CURL   <url>          HTTP GET; returns response body (max 32 KB)\n\n"
     "Rules:\n"
@@ -1949,9 +1998,9 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
   return p;
 }
 
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // Slash command handler
-// ═══════════════════════════════════════════════════════════════════════════
+//
 static void handle_slash(const std::string &input,
                          NitroConfig       &cfg,
                          AgentState        &agent,
@@ -2025,9 +2074,15 @@ static void handle_slash(const std::string &input,
         return;
       }
     }
-    tui.append_line("[sys] Indexing: " + path);
-    tui.redraw_all();
-    agent.rag_index(path, tui);
+    if (path.find_last_not_of(".bin") != std::string::npos) {
+      tui.append_line("[sys] Loading index: " + path);
+      tui.redraw_all();
+      agent.rag_load_index(path, tui);
+    } else {
+      tui.append_line("[sys] Indexing: " + path);
+      tui.redraw_all();
+      agent.rag_index(path, cfg, tui);
+    }
     return;
   }
 
@@ -2138,9 +2193,9 @@ static void handle_slash(const std::string &input,
   tui.redraw_all();
 }
 
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // Welcome banner  — colourful multi-line ASCII logo
-// ═══════════════════════════════════════════════════════════════════════════
+//
 static void welcome(TuiState &tui, const std::string &sandbox) {
   tui.append_line("");
   tui.append_line("[logo_0]  ███╗   ██╗██╗████████╗██████╗  ██████╗ ");
@@ -2157,9 +2212,9 @@ static void welcome(TuiState &tui, const std::string &sandbox) {
   tui.redraw_all();
 }
 
-// ═══════════════════════════════════════════════════════════════════════════
+//
 // main()
-// ═══════════════════════════════════════════════════════════════════════════
+//
 int main(int argc, char **argv) {
   // ── Load persisted settings first (provides defaults) ────────────
   NitroConfig cfg;

From f72832ed4d728ca39c109a6880eeaf5beacb2954 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Wed, 27 May 2026 22:53:14 +0930
Subject: [PATCH 45/54] LLAMA: nitro - handle model arg char wrappers

---
 llama/nitro.cpp | 69 ++++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 57 insertions(+), 12 deletions(-)

diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index bd97714..5c5b895 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -555,8 +555,10 @@ static bool save_settings(const NitroConfig &cfg) {
   return f.good();
 }
 
+//
 // Trims whitespace from both ends of a string
-std::string trim(std::string_view str) {
+//
+static std::string trim(std::string_view str) {
   const std::string_view whitespace = " \t\n\r\f\v";
 
   // Find the first non-whitespace character
@@ -572,19 +574,44 @@ std::string trim(std::string_view str) {
   return std::string(str.substr(start, end - start + 1));
 }
 
+//
+// Removes any front and back characters
+//
+static std::string disclose(const std::string &input, char c1, char c2) {
+  // Check if string has at least 2 characters
+  if (input.length() < 2) {
+    return input;
+  }
+
+  // Check if first and last characters match the specified delimiters
+  if (input[0] == c1 && input[input.length() - 1] == c2) {
+    // Remove first and last characters
+    std::string result = input;
+    result.erase(0, 1);
+    result.erase(input.length() - 1, 1);
+    return result;
+  }
+
+  return input;
+}
+
 // ─── colour helpers ──────────────────────────────────────────────────────
 static constexpr uint32_t BG_CHAT_R = 18,  BG_CHAT_G = 22,  BG_CHAT_B = 30;
 static constexpr uint32_t BG_INP_R  = 22,  BG_INP_G  = 28,  BG_INP_B  = 38;
 static constexpr uint32_t BG_HDR_R  = 30,  BG_HDR_G  = 40,  BG_HDR_B  = 55;
+
 static inline uint64_t fg_rgb(uint32_t r, uint32_t g, uint32_t b) {
   return NCCHANNELS_INITIALIZER(r, g, b, 0, 0, 0);
 }
+
 static inline uint64_t chat_ch(uint32_t r, uint32_t g, uint32_t b) {
   return NCCHANNELS_INITIALIZER(r, g, b, BG_CHAT_R, BG_CHAT_G, BG_CHAT_B);
 }
+
 static inline uint64_t inp_ch(uint32_t r, uint32_t g, uint32_t b) {
   return NCCHANNELS_INITIALIZER(r, g, b, BG_INP_R, BG_INP_G, BG_INP_B);
 }
+
 static inline uint64_t hdr_ch(uint32_t r, uint32_t g, uint32_t b) {
   return NCCHANNELS_INITIALIZER(r, g, b, BG_HDR_R, BG_HDR_G, BG_HDR_B);
 }
@@ -627,7 +654,10 @@ void TuiState::init() {
 }
 
 void TuiState::destroy() {
-  if (nc) { notcurses_stop(nc); nc = nullptr; }
+  if (nc) {
+    notcurses_stop(nc);
+    nc = nullptr;
+  }
 }
 
 void TuiState::resize() {
@@ -1445,10 +1475,16 @@ std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &
   }
 
   auto resolve = [&](const std::string &p) -> std::string {
-    if (p.empty() || p == ".") return sandbox;
-    if (p.substr(0, 2) == "./") return join_path(sandbox, p.substr(2));
-    if (p[0] == '/') return p;
-    return join_path(sandbox, p);
+    if (p.empty() || p == ".") {
+      return sandbox;
+    }
+    if (p.substr(0, 2) == "./") {
+      return join_path(sandbox, p.substr(2));
+    }
+    if (p[0] == '/') {
+      return p;
+    }
+    return join_path(sandbox, disclose(disclose(p, '<', '>'), '[', ']'));
   };
 
   tui.append_line("[tool] → " + op);
@@ -1493,7 +1529,7 @@ std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &
     if (!tui.confirm_dialog(std::format("Allow model to write {}?", p))) {
       return "ERROR: action prevented by user";
     }
-    std::string content = strip_code_fences(arg1, arg2);
+    std::string content = disclose(strip_code_fences(arg1, arg2), '`', '`');
     return write_file(p, content) ? "OK: written to " + arg1 : "ERROR: write failed for " + arg1;
   }
   if (op == "TOOL:CURL") {
@@ -1577,6 +1613,7 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
   auto invoke_tool = [&](const std::string &tool, const std::string_view template_str) -> void {
     std::string result = process_tool(tool, cfg, tui);
     std::string content = std::vformat(template_str, std::make_format_args(result));
+    log_write("tool: [%s] result: [%s]", tool.c_str(), result.c_str());
     if (!llama->add_message(*iter, "tool_result", content)) {
       tui.append_line(std::string("[err] tool result inject: ") + llama->last_error());
       tui.redraw_all();
@@ -1948,7 +1985,9 @@ static std::string tool_curl(const std::string &url) {
   if (http_code >= 400) {
     return "ERROR: HTTP " + std::to_string(http_code) + " from " + url;
   }
-  if (body.empty()) return "(empty response)";
+  if (body.empty()) {
+    return "(empty response)";
+  }
 
   // Strip HTML tags so the model receives clean plain text.
   bool is_html = (content_type.find("text/html") != std::string::npos)
@@ -1990,9 +2029,14 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
     "- Reason step-by-step inside <|think|> </|think|> (hidden from user).\n"
     "- After each tool call, explain what you did in plain English.\n\n";
   for (const auto &kf : knowledge_files) {
-    std::ifstream f(kf);
-    if (!f) continue;
-    std::ostringstream oss; oss << f.rdbuf();
+    auto path = join_path(sandbox, kf);
+    std::ifstream f(path);
+    if (!f) {
+      continue;
+    }
+    log_write("loaded [%s]", path.c_str());
+    std::ostringstream oss;
+    oss << f.rdbuf();
     p += "## Knowledge: " + kf + "\n" + oss.str() + "\n\n";
   }
   return p;
@@ -2226,8 +2270,9 @@ int main(int argc, char **argv) {
       const char *home = getenv("HOME");
       return std::string(home ? home : ".") + "/" + arg.substr(2);
     }
-    if (arg.substr(0, 2) == "./")
+    if (arg.substr(0, 2) == "./") {
       return (fs::current_path(ec) / arg.substr(2)).string();
+    }
     return arg;
   };
 

From ddf39f6d26ff5e34df6c1d2f65b554ba8ef1caab Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Thu, 28 May 2026 06:54:08 +0930
Subject: [PATCH 46/54] LLAMA: nitro - reorganise code layout

---
 llama/nitro.cpp | 2522 +++++++++++++++++++++++------------------------
 1 file changed, 1258 insertions(+), 1264 deletions(-)

diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index 5c5b895..06d8293 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -57,20 +57,6 @@
 
 namespace fs = std::filesystem;
 
-//
-// Forward declarations
-//
-struct NitroConfig;
-struct TuiState;
-struct AgentState;
-static bool         path_in_sandbox(const std::string &sandbox, const std::string &path);
-static bool         write_file(const std::string &path, const std::string &data);
-static std::string  build_system_prompt(const std::vector<std::string> &knowledge_files, const std::string &sandbox);
-static std::string  join_path(const std::string &a, const std::string &b);
-static std::string  list_dir(const std::string &path);
-static std::string  read_file(const std::string &path);
-static std::string  strip_code_fences(const std::string &filename, const std::string &src);
-static std::string  tool_curl(const std::string &url);
 //
 // NitroConfig
 //
@@ -349,6 +335,14 @@ static void log_write(const char *fmt, ...) {
   fflush(g_logfile);  // flush immediately so tail -f works
 }
 
+//
+// constant for strip_code_fences
+//
+static const std::vector<std::string> CODE_EXTENSIONS = {
+  ".py",".c",".cpp",".h",".bas",".java",".html",".js",".ts",
+  ".json",".yaml",".toml",".sh",".go",".rs",".jsx",".tsx"
+};
+
 //
 // Settings persistence  (~/.config/nitro/nitro.settings.json)
 //
@@ -617,1429 +611,1426 @@ static inline uint64_t hdr_ch(uint32_t r, uint32_t g, uint32_t b) {
 }
 
 //
-// TuiState::init
-//
-void TuiState::init() {
-  notcurses_options opts{};
-  opts.flags = NCOPTION_SUPPRESS_BANNERS;
-  nc = notcurses_init(&opts, nullptr);
-  if (!nc) { std::fputs("notcurses_init failed\n", stderr); std::exit(1); }
-  stdpl = notcurses_stdplane(nc);
-  notcurses_term_dim_yx(nc, (unsigned *)&term_rows, (unsigned *)&term_cols);
-  uint64_t bg = NCCHANNELS_INITIALIZER(BG_CHAT_R, BG_CHAT_G, BG_CHAT_B,
-                                       BG_CHAT_R, BG_CHAT_G, BG_CHAT_B);
-  ncplane_set_base(stdpl, " ", 0, bg);
-  ncplane_erase(stdpl);
-  ncplane_options hopt{};
-  hopt.y = 0; hopt.x = 0;
-  hopt.rows = 1; hopt.cols = (unsigned)term_cols;
-  header = ncplane_create(stdpl, &hopt);
-  int chat_rows = std::max(1, term_rows - 3);
-  ncplane_options copt{};
-  copt.y = 1; copt.x = 0;
-  copt.rows = (unsigned)chat_rows; copt.cols = (unsigned)term_cols;
-  chatpl = ncplane_create(stdpl, &copt);
-  ncplane_set_base(chatpl, " ", 0,
-                   NCCHANNELS_INITIALIZER(BG_CHAT_R, BG_CHAT_G, BG_CHAT_B,
-                                          BG_CHAT_R, BG_CHAT_G, BG_CHAT_B));
-  ncplane_options iopt{};
-  iopt.y = term_rows - 2; iopt.x = 0;
-  iopt.rows = 2; iopt.cols = (unsigned)term_cols;
-  inputpl = ncplane_create(stdpl, &iopt);
-  ncplane_set_base(inputpl, " ", 0,
-                   NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B,
-                                          BG_INP_R, BG_INP_G, BG_INP_B));
-  notcurses_mice_enable(nc, NCMICE_BUTTON_EVENT);
-  redraw_all();
-}
-
-void TuiState::destroy() {
-  if (nc) {
-    notcurses_stop(nc);
-    nc = nullptr;
-  }
-}
-
-void TuiState::resize() {
-  notcurses_term_dim_yx(nc, (unsigned *)&term_rows, (unsigned *)&term_cols);
-  ncplane_resize_simple(header,  1,                       (unsigned)term_cols);
-  int cr = std::max(1, term_rows - 3);
-  ncplane_resize_simple(chatpl,  (unsigned)cr,            (unsigned)term_cols);
-  ncplane_move_yx(inputpl, term_rows - 2, 0);
-  ncplane_resize_simple(inputpl, 2,                       (unsigned)term_cols);
-  redraw_all();
-}
-
-//
-// TuiState::redraw
+// File-system helpers
 //
-void TuiState::redraw_header() {
-  ncplane_erase(header);
-  ncplane_set_base(header, " ", 0,
-                   NCCHANNELS_INITIALIZER(BG_HDR_R, BG_HDR_G, BG_HDR_B,
-                                          BG_HDR_R, BG_HDR_G, BG_HDR_B));
-  float kv_pct   = kv_total   > 0 ? 100.f * (float)kv_used   / (float)kv_total   : 0.f;
-  float vram_pct = vram_total  > 0 ? 100.f * (float)vram_used / (float)vram_total : 0.f;
-  static const char *const SPIN[] = { "⣾","⣽","⣻","⢿","⡿","⣟","⣯","⣷" };
-  const char *spin_str = thinking ? SPIN[spinner_frame % 8] : " ";
-  char buf[512];
-  int n = std::snprintf(buf, sizeof(buf),
-                        " ✦ NITRO  │ %-32s │ %5.1f tok/s │ KV %4.1f%%  VRAM %4.1f%%  %s",
-                        current_model.c_str(), (double)tokens_per_sec,
-                        (double)kv_pct, (double)vram_pct, spin_str);
-  if (n > term_cols) buf[term_cols] = '\0';
-  ncplane_set_channels(header, hdr_ch(130, 220, 200));
-  ncplane_putstr_yx(header, 0, 0, buf);
+static std::string join_path(const std::string &a, const std::string &b) {
+  if (b.empty()) return a;
+  if (b[0] == '/') return b;
+  std::string pa = a;
+  if (!pa.empty() && pa.back() == '/') pa.pop_back();
+  std::string pb = (b.front() == '/') ? b.substr(1) : b;
+  return pa + "/" + pb;
 }
 
-void TuiState::redraw_chat() {
-  ncplane_erase(chatpl);
-  unsigned rows, cols;
-  ncplane_dim_yx(chatpl, &rows, &cols);
-  std::lock_guard<std::mutex> lk(lines_mutex);
-  int total   = (int)chat_lines.size();
-  int visible = (int)rows;
-  int start   = std::max(0, total - visible - scroll_offset);
-  int end     = std::min(total, start + visible);
-  for (int i = start, row = 0; i < end; ++i, ++row) {
-    const std::string &line = chat_lines[i];
-    uint64_t ch;
-    // Logo lines use prefix "[logo_N]" where N is the row index 0-6.
-    // We interpolate a cyan→magenta gradient across the 7 art rows.
-    if (line.rfind("[logo_", 0) == 0 && line.size() > 7 && line[7] == ']') {
-      int logo_row = line[6] - '0';
-      // Gradient: cyan (0,230,255) → green (80,255,160) → magenta (220,80,255)
-      // 7 steps, indices 0-6.
-      static const uint32_t GRAD_R[] = {  0,  20,  60, 120, 180, 210, 220 };
-      static const uint32_t GRAD_G[] = { 230, 255, 255, 255, 200, 130,  80 };
-      static const uint32_t GRAD_B[] = { 255, 200, 140,  80, 100, 200, 255 };
-      int gi = std::max(0, std::min(logo_row, 6));
-      ch = chat_ch(GRAD_R[gi], GRAD_G[gi], GRAD_B[gi]);
-    }
-    else if (line.rfind("You: ",   0) == 0) ch = chat_ch(100, 200, 255);
-    else if (line.rfind("Nitro: ", 0) == 0) ch = chat_ch(180, 255, 180);
-    else if (line.rfind("[tool]",  0) == 0) ch = chat_ch(255, 180,  80);
-    else if (line.rfind("[err]",   0) == 0) ch = chat_ch(255,  80,  80);
-    else if (line.rfind("[sys]",   0) == 0) ch = chat_ch(140, 140, 200);
-    else                                     ch = chat_ch(210, 210, 210);
-    ncplane_set_channels(chatpl, ch);
-    // Strip the [logo_N] prefix before rendering.
-    std::string display = (line.rfind("[logo_", 0) == 0 && line.size() > 8)
-      ? line.substr(8) : line;
-    if (display.size() > cols) display = display.substr(0, cols);
-    ncplane_putstr_yx(chatpl, row, 0, display.c_str());
+static std::string read_file(const std::string &path) {
+  std::ifstream f(path, std::ios::binary);
+  if (!f) {
+    return "ERROR: cannot open [" + path + "]";
   }
+  std::ostringstream oss; oss << f.rdbuf();
+  return oss.str();
 }
 
-void TuiState::redraw_input() {
-  ncplane_erase(inputpl);
-  ncplane_set_channels(inputpl, inp_ch(80, 120, 160));
-  std::string sep(term_cols, '-');
-  ncplane_putstr_yx(inputpl, 0, 0, sep.c_str());
-  const std::string prompt = " ❯ ";
-  const int prompt_cols = 4;
-  ncplane_set_channels(inputpl, inp_ch(100, 210, 255));
-  ncplane_putstr_yx(inputpl, 1, 0, prompt.c_str());
-  int max_w = std::max(0, term_cols - prompt_cols - 1);
-  std::string visible = input_buf;
-  int view_offset = 0;
-  if ((int)visible.size() > max_w && max_w > 0) {
-    view_offset = (int)visible.size() - max_w;
-    visible = visible.substr(view_offset);
-  }
-  int cur_in_view = std::max(0, (int)cursor_pos - view_offset);
-  cur_in_view = std::min(cur_in_view, (int)visible.size());
-  std::string before = visible.substr(0, cur_in_view);
-  std::string after  = cur_in_view < (int)visible.size()
-    ? visible.substr(cur_in_view + 1) : "";
-  char cursor_ch_val = cur_in_view < (int)visible.size()
-    ? visible[cur_in_view] : ' ';
-  ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
-  ncplane_putstr_yx(inputpl, 1, prompt_cols, before.c_str());
-  int cx = prompt_cols + cur_in_view;
-  ncplane_set_channels(inputpl, NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B, 180, 230, 255));
-  char cbuf[2] = { cursor_ch_val, '\0' };
-  ncplane_putstr_yx(inputpl, 1, cx, cbuf);
-  ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
-  if (!after.empty()) {
-    ncplane_putstr_yx(inputpl, 1, cx + 1, after.c_str());
+static std::string list_dir(const std::string &path) {
+  std::ostringstream oss;
+  std::error_code ec;
+  for (const auto &e : fs::directory_iterator(path, ec)) {
+    if (ec) break;
+    std::string name = e.path().filename().string();
+    if (name.empty() || name[0] == '.') continue;
+    oss << (e.is_directory() ? "[" + name + "]" : name) << "\n";
   }
+  return oss.str();
 }
 
-void TuiState::redraw_all() {
-  redraw_header();
-  redraw_chat();
-  redraw_input();
-  notcurses_render(nc);
-}
-
-void TuiState::tick_spinner() {
-  ++spinner_frame;
-  redraw_header();
-  notcurses_render(nc);
+static bool path_in_sandbox(const std::string &sandbox, const std::string &path) {
+  std::error_code ec;
+  auto base   = fs::canonical(sandbox, ec);  if (ec) return false;
+  auto target = fs::weakly_canonical(path, ec);
+  std::string bstr = base.string() + "/";
+  std::string tstr = target.string();
+  return tstr == base.string() || tstr.compare(0, bstr.size(), bstr) == 0;
 }
 
-void TuiState::set_thinking(bool on) {
-  thinking = on;
-  if (!on) spinner_frame = 0;
-  redraw_header();
-  notcurses_render(nc);
+static bool write_file(const std::string &path, const std::string &data) {
+  fs::path p(path);
+  if (p.has_parent_path()) {
+    std::error_code ec;
+    fs::create_directories(p.parent_path(), ec);
+  }
+  std::ofstream f(path, std::ios::binary | std::ios::trunc);
+  if (!f) return false;
+  f.write(data.data(), (std::streamsize)data.size());
+  return f.good();
 }
 
 //
-// TuiState content helpers
+// System prompt
 //
-void TuiState::append_line(const std::string &line) {
-  std::lock_guard<std::mutex> lk(lines_mutex);
-  int w = std::max(1, term_cols - 1);
-  if ((int)line.size() <= w) {
-    chat_lines.push_back(line);
-  } else {
-    for (int off = 0; off < (int)line.size(); off += w) {
-      chat_lines.push_back(line.substr(off, w));
+static std::string build_system_prompt(const std::vector<std::string> &knowledge_files,
+                                       const std::string &sandbox) {
+  std::string p;
+  p += "You are Nitro, an agentic AI assistant for software development.\n"
+    "Your sandbox (project directory) is: " + sandbox + "\n\n"
+    "## Tool protocol\n"
+    " - Emit tool calls on their own new line. for example:\n\n"
+    "TOOL:LIST\n"
+    " - The host executes the tool and returns TOOL_RESULT: <value> on the next line.\n\n"
+    "Available tools:\n"
+    "  TOOL:LIST   [dir]          list files (default: sandbox root)\n"
+    "  TOOL:READ   <file>         read file contents\n"
+    "  TOOL:WRITE  <file> <text>  write text to file\n"
+    "  TOOL:EXISTS <file>         YES or NO\n"
+    "  TOOL:RUN    <prog> [args]  run program inside sandbox\n"
+    "  TOOL:DATE                  current date\n"
+    "  TOOL:TIME                  current time\n"
+    "  TOOL:RND                   random float\n"
+    "  TOOL:RAG    <query>        query the RAG index for additional context\n"
+    "  TOOL:INTROSPECT            introspect your settings, top_k etc\n"
+    "  TOOL:CURL   <url>          HTTP GET; returns response body (max 32 KB)\n\n"
+    "Rules:\n"
+    "- Never access files outside the sandbox.\n"
+    "- Only use one TOOL at a time. Never combine, always use each tool step by step\n"
+    "- Use TOOL:CURL to fetch documentation, APIs, or web content you need.\n"
+    "- Reason step-by-step inside <|think|> </|think|> (hidden from user).\n"
+    "- After each tool call, explain what you did in plain English.\n\n";
+  for (const auto &kf : knowledge_files) {
+    auto path = join_path(sandbox, kf);
+    std::ifstream f(path);
+    if (!f) {
+      continue;
     }
+    log_write("loaded [%s]", path.c_str());
+    std::ostringstream oss;
+    oss << f.rdbuf();
+    p += "## Knowledge: " + kf + "\n" + oss.str() + "\n\n";
   }
+  return p;
 }
 
-void TuiState::append_token(const std::string &token) {
-  token_acc += token;
-  for (;;) {
-    auto pos = token_acc.find('\n');
-    if (pos == std::string::npos) {
-      break;
-    }
-    append_line(token_acc.substr(0, pos));
-    token_acc = token_acc.substr(pos + 1);
-  }
-  redraw_chat();
-  notcurses_render(nc);
+static std::string strip_code_fences(const std::string &filename,
+                                     const std::string &src) {
+  auto ext = fs::path(filename).extension().string();
+  bool is_code = std::any_of(CODE_EXTENSIONS.begin(), CODE_EXTENSIONS.end(),
+                             [&](const std::string &e){ return ext == e; });
+  if (!is_code) return src;
+  auto pos = src.find("```");
+  if (pos == std::string::npos) return src;
+  auto nl = src.find('\n', pos + 3);
+  if (nl == std::string::npos) return src;
+  std::string inner = src.substr(nl + 1);
+  auto end = inner.rfind("```");
+  if (end != std::string::npos) inner = inner.substr(0, end);
+  return inner;
 }
 
-void TuiState::flush_token_acc() {
-  if (!token_acc.empty()) {
-    append_line(token_acc);
-    token_acc.clear();
-    redraw_chat();
-    notcurses_render(nc);
+//
+// TOOL:CURL
+//
+static size_t curl_write_cb(void *contents, size_t size, size_t nmemb, void *userp) {
+  std::string *buf = static_cast<std::string *>(userp);
+  size_t total = size * nmemb;
+  static constexpr size_t MAX_BODY = 32 * 1024;
+  if (buf->size() < MAX_BODY) {
+    size_t room = MAX_BODY - buf->size();
+    buf->append(static_cast<char *>(contents), std::min(total, room));
   }
+  return total;
 }
 
 //
-// Creates a centred floating plane with a border and a status message.
-// The popup sits above all other planes and blocks until explicitly dismissed.
+// html_to_text — strip HTML for cleaner TOOL:CURL context
 //
-void TuiState::show_modal_popup(const std::string &message) {
-  // Dismiss any previous popup first.
-  dismiss_modal_popup();
-
-  // Clamp popup size to terminal.
-  int popup_w = std::min((int)message.size() + 8, term_cols - 4);
-  popup_w = std::max(popup_w, 20);
-  int popup_h = 5;
-  int py = std::max(0, (term_rows - popup_h) / 2);
-  int px = std::max(0, (term_cols - popup_w) / 2);
-
-  ncplane_options opts{};
-  opts.y    = py; opts.x    = px;
-  opts.rows = (unsigned)popup_h;
-  opts.cols = (unsigned)popup_w;
-  modal_plane = ncplane_create(stdpl, &opts);
-  if (!modal_plane) return;
-
-  // Background: deep navy.
-  static constexpr uint32_t PBG_R = 20, PBG_G = 28, PBG_B = 50;
-  ncplane_set_base(modal_plane, " ", 0,
-                   NCCHANNELS_INITIALIZER(PBG_R, PBG_G, PBG_B, PBG_R, PBG_G, PBG_B));
-  ncplane_erase(modal_plane);
-
-  // Border — bright cyan.
-  uint64_t border_ch = NCCHANNELS_INITIALIZER(80, 220, 255, PBG_R, PBG_G, PBG_B);
-  ncplane_set_channels(modal_plane, border_ch);
+// Lightweight HTML→plain-text conversion:
+//   • Drops <head>, <script>, <style> blocks entirely.
+//   • Inserts newlines at block-level tags (p, div, br, li, h1-h6 …).
+//   • Strips all remaining tags.
+//   • Decodes common named & numeric HTML entities.
+//   • Collapses whitespace runs; caps consecutive blank lines at 2.
+//
+static std::string html_to_text(const std::string &html) {
+  std::string s = html;
 
-  // Draw corners and edges manually so we don't require nccell border helpers.
-  // Top row
-  ncplane_putstr_yx(modal_plane, 0, 0, "╔");
-  for (int c = 1; c < popup_w - 1; ++c)
-    ncplane_putstr_yx(modal_plane, 0, c, "═");
-  ncplane_putstr_yx(modal_plane, 0, popup_w - 1, "╗");
-  // Middle rows
-  for (int r = 1; r < popup_h - 1; ++r) {
-    ncplane_putstr_yx(modal_plane, r, 0, "║");
-    ncplane_putstr_yx(modal_plane, r, popup_w - 1, "║");
+  // 1. Remove <head>…</head>
+  {
+    std::string lo = s;
+    std::transform(lo.begin(), lo.end(), lo.begin(), ::tolower);
+    auto p0 = lo.find("<head");
+    auto p1 = lo.find("</head>");
+    if (p0 != std::string::npos && p1 != std::string::npos)
+      s.erase(p0, p1 + 7 - p0);
   }
-  // Bottom row
-  ncplane_putstr_yx(modal_plane, popup_h - 1, 0, "╚");
-  for (int c = 1; c < popup_w - 1; ++c)
-    ncplane_putstr_yx(modal_plane, popup_h - 1, c, "═");
-  ncplane_putstr_yx(modal_plane, popup_h - 1, popup_w - 1, "╝");
 
-  // Title bar.
-  uint64_t title_ch = NCCHANNELS_INITIALIZER(255, 220, 80, PBG_R, PBG_G, PBG_B);
-  ncplane_set_channels(modal_plane, title_ch);
-  ncplane_putstr_yx(modal_plane, 1, 2, "⏳ Loading…");
+  // 2. Remove <script>…</script> and <style>…</style>
+  for (const std::string &tag : {"script", "style"}) {
+    std::string open  = "<" + tag;
+    std::string close = "</" + tag + ">";
+    std::string lo = s;
+    std::transform(lo.begin(), lo.end(), lo.begin(), ::tolower);
+    for (;;) {
+      auto p0 = lo.find(open);
+      if (p0 == std::string::npos) break;
+      auto p1 = lo.find(close, p0);
+      if (p1 == std::string::npos) { s.erase(p0); lo.erase(p0); break; }
+      s.erase(p0, p1 + close.size() - p0);
+      lo.erase(p0, p1 + close.size() - p0);
+    }
+  }
 
-  // Message.
-  uint64_t msg_ch = NCCHANNELS_INITIALIZER(200, 200, 200, PBG_R, PBG_G, PBG_B);
-  ncplane_set_channels(modal_plane, msg_ch);
-  // Truncate message to fit inside border.
-  int max_msg = popup_w - 4;
-  std::string display = message.size() > (size_t)max_msg
-    ? message.substr(0, max_msg)
-    : message;
-  ncplane_putstr_yx(modal_plane, 2, 2, display.c_str());
+  // 3. Replace block-level tags with '\n' before stripping all tags.
+  static const char *const BLOCK[] = {
+    "p","div","br","li","tr","h1","h2","h3","h4","h5","h6",
+    "article","section","header","footer","nav","main", nullptr
+  };
+  {
+    std::string out;
+    out.reserve(s.size());
+    size_t i = 0;
+    while (i < s.size()) {
+      if (s[i] != '<') { out += s[i++]; continue; }
+      auto ce = s.find('>', i);
+      if (ce == std::string::npos) { out += s[i++]; continue; }
+      std::string inner = s.substr(i + 1, ce - i - 1);
+      size_t sp = inner.find_first_of(" \t/\r\n");
+      std::string name = (sp != std::string::npos) ? inner.substr(0, sp) : inner;
+      std::transform(name.begin(), name.end(), name.begin(), ::tolower);
+      for (int k = 0; BLOCK[k]; ++k) {
+        if (name == BLOCK[k]) {
+          out += '\n'; break;
+        }
+      }
+      i = ce + 1;
+    }
+    s = out;
+  }
 
-  notcurses_render(nc);
-}
+  // 4. Strip all remaining tags.
+  {
+    std::string out; out.reserve(s.size());
+    bool in_tag = false;
+    for (char c : s) {
+      if (c == '<')  { in_tag = true;  continue; }
+      if (c == '>')  { in_tag = false; continue; }
+      if (!in_tag)     out += c;
+    }
+    s = out;
+  }
 
-void TuiState::show_help() {
-  append_line("[sys] Commands:");
-  append_line("[sys]   /model  [path]           load a GGUF model (picker if no path)");
-  append_line("[sys]   /embed  [path]           load an embedding model (picker if no path)");
-  append_line("[sys]   /rag    [path]           index file or directory (picker if no path)");
-  append_line("[sys]   /memory                  KV / VRAM / layer stats");
-  append_line("[sys]   /clear                   reset conversation");
-  append_line("[sys]   /settings                show current settings");
-  append_line("[sys]   /set    <key> <value>    change a setting live");
-  append_line("[sys]   /help                    this message");
-  append_line("[sys]   exit / quit              exit Nitro");
-  append_line("[sys] Settable keys (via /set):");
-  append_line("[sys]   temperature  top_p  top_k  min_p  penalty_repeat");
-  append_line("[sys]   n_max_tokens  penalty_last_n  rag_top_k  n_gpu_layers");
-  append_line("[sys]   run_allowed  (comma-separated list, e.g. python3,make)");
-  redraw_all();
-}
+  // 5. Decode common HTML entities.
+  static const std::pair<const char*, const char*> ENT[] = {
+    {"&amp;","&"},{"&lt;","<"},{"&gt;",">"},{"&quot;","\""},
+    {"&apos;","'"},{"&nbsp;"," "},{"&mdash;","—"},{"&ndash;","–"},
+    {"&hellip;","…"},{"&#39;","'"},{"&#34;","\""},
+    {nullptr,nullptr}
+  };
+  for (int k = 0; ENT[k].first; ++k) {
+    std::string e = ENT[k].first, r = ENT[k].second;
+    size_t pos = 0;
+    while ((pos = s.find(e, pos)) != std::string::npos)
+      { s.replace(pos, e.size(), r); pos += r.size(); }
+  }
+  // Numeric entities &#NNN; and &#xHHH;
+  {
+    std::string out; out.reserve(s.size());
+    size_t i = 0;
+    while (i < s.size()) {
+      if (s[i]=='&' && i+2<s.size() && s[i+1]=='#') {
+        size_t semi = s.find(';', i+2);
+        if (semi != std::string::npos && semi-i < 10) {
+          std::string num = s.substr(i+2, semi-i-2);
+          try {
+            uint32_t cp = (num[0]=='x'||num[0]=='X')
+              ? (uint32_t)std::stoul(num.substr(1),nullptr,16)
+              : (uint32_t)std::stoul(num);
+            if      (cp < 0x80)  { out += (char)cp; }
+            else if (cp < 0x800) { out += (char)(0xC0|(cp>>6)); out += (char)(0x80|(cp&0x3F)); }
+            else                 { out += (char)(0xE0|(cp>>12)); out += (char)(0x80|((cp>>6)&0x3F)); out += (char)(0x80|(cp&0x3F)); }
+            i = semi+1; continue;
+          } catch (...) {}
+        }
+      }
+      out += s[i++];
+    }
+    s = out;
+  }
 
-void TuiState::dismiss_modal_popup() {
-  if (modal_plane) {
-    ncplane_destroy(modal_plane);
-    modal_plane = nullptr;
-    notcurses_render(nc);
+  // 6. Collapse whitespace; cap blank lines at 2.
+  {
+    std::string out; out.reserve(s.size());
+    int nl_run = 0; bool last_sp = false;
+    for (char c : s) {
+      if (c == '\r') continue;
+      if (c == '\t') c = ' ';
+      if (c == '\n') { ++nl_run; last_sp=false; if (nl_run<=2) out+='\n'; continue; }
+      nl_run = 0;
+      if (c == ' ') { if (!last_sp) { out+=' '; last_sp=true; } continue; }
+      last_sp = false; out += c;
+    }
+    size_t f = out.find_first_not_of(" \n");
+    size_t l = out.find_last_not_of(" \n");
+    s = (f == std::string::npos) ? "" : out.substr(f, l-f+1);
   }
+  return s;
 }
 
-//
-// ─── TuiState::file_picker ────────────────────────────────────────────────
-// Interactive directory/file browser popup.
-// Keyboard:  ↑/↓ navigate,  Enter select/descend,  Backspace go up,
-//            's' select current dir for indexing,   Esc cancel.
-// Returns the chosen path or "" on cancel.
-// ─── TuiState::file_picker ────────────────────────────────────────────────
-// Unified interactive directory/file browser used by /rag, /model, /embed.
-// title_hint appears in the popup header (e.g. "RAG Folder", "Model File").
-//
-// Keyboard:
-//   ↑/↓        navigate list
-//   Enter      descend into directory, or select a file
-//   Backspace  go up one directory
-//   s          select the current directory itself (useful for /rag)
-//   Esc        cancel → returns ""
-//
-// Returns the chosen path, or "" on cancel.
-//
-std::string TuiState::file_picker(const std::string &start_dir,
-                                  const std::string &title_hint) {
-  std::string current_dir = start_dir;
-  {
-    std::error_code ec;
-    auto canon = fs::canonical(start_dir, ec);
-    if (!ec) current_dir = canon.string();
-  }
-  auto load_entries = [](const std::string &dir,
-                         std::vector<std::string> &entries) {
-    entries.clear();
-    std::error_code ec;
-    if (fs::path(dir).has_parent_path() &&
-        fs::path(dir) != fs::path(dir).root_path())
-      entries.push_back("..");
-    std::vector<std::string> dirs, files;
-    for (const auto &e : fs::directory_iterator(dir, ec)) {
-      if (ec) break;
-      std::string name = e.path().filename().string();
-      if (name.empty() || name[0] == '.') continue;
-      if (e.is_directory()) dirs.push_back(name);
-      else                  files.push_back(name);
-    }
-    std::sort(dirs.begin(), dirs.end());
-    std::sort(files.begin(), files.end());
-    for (auto &d : dirs)  entries.push_back(d + "/");
-    for (auto &f : files) entries.push_back(f);
-  };
-
-  std::vector<std::string> entries;
-  int selected = 0;
-  int scroll   = 0;
-
-  // Popup dimensions.
-  static constexpr int PW = 60;
-  static constexpr int PH = 20;
-  int py = std::max(0, (term_rows - PH) / 2);
-  int px = std::max(0, (term_cols - PW) / 2);
+static std::string tool_curl(const std::string &url) {
+  if (url.empty()) return "ERROR: TOOL:CURL requires a URL argument";
+  CURL *curl = curl_easy_init();
+  if (!curl) return "ERROR: curl_easy_init failed";
+  std::string body;
+  body.reserve(4096);
+  curl_easy_setopt(curl, CURLOPT_URL,            url.c_str());
+  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,  curl_write_cb);
+  curl_easy_setopt(curl, CURLOPT_WRITEDATA,      &body);
+  curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
+  curl_easy_setopt(curl, CURLOPT_MAXREDIRS,      5L);
+  curl_easy_setopt(curl, CURLOPT_TIMEOUT,        15L);
+  curl_easy_setopt(curl, CURLOPT_USERAGENT,      "nitro/1.0");
+  // Accept compressed responses; curl will decompress automatically.
+  curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
 
-  ncplane_options opts{};
-  opts.y = py; opts.x = px;
-  opts.rows = (unsigned)PH; opts.cols = (unsigned)PW;
-  struct ncplane *picker = ncplane_create(stdpl, &opts);
-  if (!picker) return "";
+  CURLcode res = curl_easy_perform(curl);
+  long http_code = 0;
+  curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
 
-  static constexpr uint32_t PBG_R = 18, PBG_G = 24, PBG_B = 40;
-  ncplane_set_base(picker, " ", 0, NCCHANNELS_INITIALIZER(PBG_R, PBG_G, PBG_B, PBG_R, PBG_G, PBG_B));
-  // Build a compact hint line appropriate to the operation.
-  // /rag adds 's=select dir'; /model and /embed only need file selection.
-  std::string hint_line = "↑↓ navigate  Enter open/select  Esc cancel";
-  if (title_hint.find("RAG") != std::string::npos ||
-      title_hint.find("Folder") != std::string::npos) {
-    hint_line = "↑↓ navigate  Enter open  s=select dir  Esc cancel";
+  // Query content-type before cleanup (pointer is only valid while handle lives).
+  char *ct_raw = nullptr;
+  curl_easy_getinfo(curl, CURLINFO_CONTENT_TYPE, &ct_raw);
+  std::string content_type = ct_raw ? ct_raw : "";
+  std::transform(content_type.begin(), content_type.end(),
+                 content_type.begin(), ::tolower);
+  curl_easy_cleanup(curl);
+  if (res != CURLE_OK) {
+    return std::string("ERROR: curl: ") + curl_easy_strerror(res);
+  }
+  if (http_code >= 400) {
+    return "ERROR: HTTP " + std::to_string(http_code) + " from " + url;
+  }
+  if (body.empty()) {
+    return "(empty response)";
   }
-  auto draw_picker = [&]() {
-    ncplane_erase(picker);
-    uint64_t border_ch = NCCHANNELS_INITIALIZER(100, 180, 255, PBG_R, PBG_G, PBG_B);
-    ncplane_set_channels(picker, border_ch);
-    ncplane_putstr_yx(picker, 0, 0, "╔");
-    for (int c = 1; c < PW - 1; ++c) ncplane_putstr_yx(picker, 0, c, "═");
-    ncplane_putstr_yx(picker, 0, PW - 1, "╗");
-    for (int r = 1; r < PH - 1; ++r) {
-      ncplane_putstr_yx(picker, r, 0,      "║");
-      ncplane_putstr_yx(picker, r, PW - 1, "║");
-    }
-    ncplane_putstr_yx(picker, PH - 1, 0, "╚");
-    for (int c = 1; c < PW - 1; ++c) ncplane_putstr_yx(picker, PH - 1, c, "═");
-    ncplane_putstr_yx(picker, PH - 1, PW - 1, "╝");
-
-    // Title
-    ncplane_set_channels(picker, NCCHANNELS_INITIALIZER(255, 220, 80, PBG_R, PBG_G, PBG_B));
-    std::string title_str = " 📂 " + title_hint + " Picker ";
-    if ((int)title_str.size() > PW - 4) title_str = title_str.substr(0, PW - 4);
-    ncplane_putstr_yx(picker, 0, 2, title_str.c_str());
-    // Current path (truncated).
-    std::string path_display = current_dir;
-    if ((int)path_display.size() > PW - 4)
-      path_display = "…" + path_display.substr(path_display.size() - (PW - 5));
-    ncplane_set_channels(picker, NCCHANNELS_INITIALIZER(160, 200, 240, PBG_R, PBG_G, PBG_B));
-    ncplane_putstr_yx(picker, 1, 2, path_display.c_str());
-    // Hint line (bottom interior row).
-    ncplane_set_channels(picker, NCCHANNELS_INITIALIZER(120, 120, 160, PBG_R, PBG_G, PBG_B));
-    std::string hint_trunc = hint_line;
-    if ((int)hint_trunc.size() > PW - 4) hint_trunc = hint_trunc.substr(0, PW - 4);
-    ncplane_putstr_yx(picker, PH - 2, 2, hint_trunc.c_str());
-    // Entry list.
-    int list_rows = PH - 5;
-    if (selected < scroll) scroll = selected;
-    if (selected >= scroll + list_rows) scroll = selected - list_rows + 1;
-    for (int i = 0; i < list_rows; ++i) {
-      int idx = scroll + i;
-      if (idx >= (int)entries.size()) break;
-      bool is_selected = (idx == selected);
-      bool is_dir = !entries[idx].empty() && entries[idx].back() == '/';
-      uint32_t fr, fg, fb;
-      if (is_selected)  { fr = 20;  fg = 20;  fb = 20;  }
-      else if (is_dir)   { fr = 120; fg = 200; fb = 255; }
-      else               { fr = 200; fg = 200; fb = 200; }
-      uint32_t br = is_selected ? 100 : PBG_R;
-      uint32_t bg = is_selected ? 180 : PBG_G;
-      uint32_t bb = is_selected ? 255 : PBG_B;
-      ncplane_set_channels(picker, NCCHANNELS_INITIALIZER(fr, fg, fb, br, bg, bb));
-      std::string label = (is_selected ? " ▶ " : "   ") + entries[idx];
-      if ((int)label.size() > PW - 2) label = label.substr(0, PW - 2);
-      while ((int)label.size() < PW - 2) label += ' ';
-      ncplane_putstr_yx(picker, 2 + i, 1, label.c_str());
-    }
-    notcurses_render(nc);
-  };
-
-  std::string result;
-  load_entries(current_dir, entries);
-  draw_picker();
 
-  for (;;) {
-    ncinput ni{};
-    notcurses_get_blocking(nc, &ni);
-    if (ni.id == NCKEY_ESC) {
-      break;  // cancelled
-    }
-    if (ni.id == NCKEY_UP) {
-      if (selected > 0) --selected;
-      draw_picker();
-      continue;
-    }
-    if (ni.id == NCKEY_DOWN) {
-      if (selected + 1 < (int)entries.size()) ++selected;
-      draw_picker();
-      continue;
-    }
-    // 's' — select the current directory (useful for /rag, ignored for file pickers).
-    if (ni.id == 's' || ni.id == 'S') {
-      // Select current directory for RAG indexing.
-      result = current_dir;
-      break;
-    }
-    if (ni.id == NCKEY_BACKSPACE || ni.id == 127) {
-      // Go up one level.
-      fs::path p(current_dir);
-      if (p.has_parent_path() && p != p.root_path()) {
-        current_dir = p.parent_path().string();
-        load_entries(current_dir, entries);
-        selected = 0; scroll = 0;
-        draw_picker();
-      }
-      continue;
-    }
-    if (ni.id == NCKEY_ENTER || ni.id == '\r' || ni.id == '\n') {
-      if (entries.empty()) continue;
-      const std::string &entry = entries[selected];
-      if (entry == "..") {
-        fs::path p(current_dir);
-        if (p.has_parent_path() && p != p.root_path()) {
-          current_dir = p.parent_path().string();
-          load_entries(current_dir, entries);
-          selected = 0; scroll = 0;
-          draw_picker();
-        }
-      } else if (!entry.empty() && entry.back() == '/') {
-        // Descend into directory.
-        current_dir = current_dir + "/" + entry.substr(0, entry.size() - 1);
-        {
-          std::error_code ec;
-          auto canon = fs::canonical(current_dir, ec);
-          if (!ec) current_dir = canon.string();
-        }
-        load_entries(current_dir, entries);
-        selected = 0; scroll = 0;
-        draw_picker();
-      } else {
-        // Select a specific file.
-        // Select the highlighted file.
-        result = current_dir + "/" + entry;
-        break;
-      }
-      continue;
-    }
+  // Strip HTML tags so the model receives clean plain text.
+  bool is_html = (content_type.find("text/html") != std::string::npos)
+    || (body.size() > 5 && body.substr(0,5) == "<!DOC")
+    || (body.size() > 6 && body.substr(0,6) == "<html>");
+  if (is_html) {
+    body = html_to_text(body);
   }
-  ncplane_destroy(picker);
-  notcurses_render(nc);
-  return result;
+
+  return body;
 }
 
 //
-// ─── TuiState::confirm_dialog ─────────────────────────────────────────────
+// TuiState::init
 //
-bool TuiState::confirm_dialog(const std::string &prompt) {
-  ncplane_erase(inputpl);
-  ncplane_set_channels(inputpl, inp_ch(255, 200, 80));
-  std::string msg = " " + prompt + " [y/n] ❯ ";
-  ncplane_putstr_yx(inputpl, 1, 0, msg.c_str());
-  notcurses_render(nc);
-  std::string answer;
-  for (;;) {
-    ncinput ni{};
-    notcurses_get_blocking(nc, &ni);
-    if (ni.id == NCKEY_ENTER || ni.id == '\r' || ni.id == '\n') break;
-    if (ni.id == NCKEY_BACKSPACE && !answer.empty()) { answer.pop_back(); }
-    else if (ni.id >= 32 && ni.id < 127) { answer += (char)ni.id; }
-    ncplane_erase(inputpl);
-    ncplane_set_channels(inputpl, inp_ch(255, 200, 80));
-    ncplane_putstr_yx(inputpl, 1, 0, (msg + answer).c_str());
-    notcurses_render(nc);
+void TuiState::init() {
+  notcurses_options opts{};
+  opts.flags = NCOPTION_SUPPRESS_BANNERS;
+  nc = notcurses_init(&opts, nullptr);
+  if (!nc) { std::fputs("notcurses_init failed\n", stderr); std::exit(1); }
+  stdpl = notcurses_stdplane(nc);
+  notcurses_term_dim_yx(nc, (unsigned *)&term_rows, (unsigned *)&term_cols);
+  uint64_t bg = NCCHANNELS_INITIALIZER(BG_CHAT_R, BG_CHAT_G, BG_CHAT_B,
+                                       BG_CHAT_R, BG_CHAT_G, BG_CHAT_B);
+  ncplane_set_base(stdpl, " ", 0, bg);
+  ncplane_erase(stdpl);
+  ncplane_options hopt{};
+  hopt.y = 0; hopt.x = 0;
+  hopt.rows = 1; hopt.cols = (unsigned)term_cols;
+  header = ncplane_create(stdpl, &hopt);
+  int chat_rows = std::max(1, term_rows - 3);
+  ncplane_options copt{};
+  copt.y = 1; copt.x = 0;
+  copt.rows = (unsigned)chat_rows; copt.cols = (unsigned)term_cols;
+  chatpl = ncplane_create(stdpl, &copt);
+  ncplane_set_base(chatpl, " ", 0,
+                   NCCHANNELS_INITIALIZER(BG_CHAT_R, BG_CHAT_G, BG_CHAT_B,
+                                          BG_CHAT_R, BG_CHAT_G, BG_CHAT_B));
+  ncplane_options iopt{};
+  iopt.y = term_rows - 2; iopt.x = 0;
+  iopt.rows = 2; iopt.cols = (unsigned)term_cols;
+  inputpl = ncplane_create(stdpl, &iopt);
+  ncplane_set_base(inputpl, " ", 0,
+                   NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B,
+                                          BG_INP_R, BG_INP_G, BG_INP_B));
+  notcurses_mice_enable(nc, NCMICE_BUTTON_EVENT);
+  redraw_all();
+}
+
+void TuiState::destroy() {
+  if (nc) {
+    notcurses_stop(nc);
+    nc = nullptr;
   }
-  std::string lo = answer;
-  std::transform(lo.begin(), lo.end(), lo.begin(), ::tolower);
-  redraw_input();
-  notcurses_render(nc);
-  return (lo == "y" || lo == "yes" || lo == "sure" || lo == "k");
+}
+
+void TuiState::resize() {
+  notcurses_term_dim_yx(nc, (unsigned *)&term_rows, (unsigned *)&term_cols);
+  ncplane_resize_simple(header,  1,                       (unsigned)term_cols);
+  int cr = std::max(1, term_rows - 3);
+  ncplane_resize_simple(chatpl,  (unsigned)cr,            (unsigned)term_cols);
+  ncplane_move_yx(inputpl, term_rows - 2, 0);
+  ncplane_resize_simple(inputpl, 2,                       (unsigned)term_cols);
+  redraw_all();
 }
 
 //
-// Integrates InputHistory:  Up/Down arrows navigate the history stack.
-// On submit the entry is pushed to history, and nav is reset.
+// TuiState::redraw
 //
-std::string TuiState::readline_blocking() {
-  input_buf.clear();
-  cursor_pos = 0;
-  history.reset_nav();
-  redraw_input();
-  notcurses_render(nc);
-
-  // Temporary saved draft so Down from history restores the user's current text.
-  std::string draft;
-
-  for (;;) {
-    ncinput ni{};
-    notcurses_get_blocking(nc, &ni);
-
-    if (ni.id == NCKEY_ENTER || ni.id == '\r' || ni.id == '\n') {
-      std::string result = input_buf;
-      if (!result.empty()) {
-        history.push(result);
-      }
-      input_buf.clear();
-      cursor_pos = 0;
-      redraw_input();
-      notcurses_render(nc);
-      return result;
-    }
-
-    if (ni.id == NCKEY_UP) {
-      // Entering history from a fresh prompt: save current text as draft.
-      std::string hist_entry;
-      if (history.up(hist_entry)) {
-        if (input_buf.size() > 0 && hist_entry != input_buf) {
-          // Only save draft when we first leave the bottom of history.
-          // (history.reset_nav was called on entry so the first Up call
-          //  always comes from the "new input" position.)
-          draft = input_buf;
-        }
-        input_buf  = hist_entry;
-        cursor_pos = input_buf.size();
-      }
-      redraw_input();
-      notcurses_render(nc);
-      continue;
-    }
-
-    if (ni.id == NCKEY_DOWN) {
-      std::string hist_entry;
-      bool got = history.down(hist_entry);
-      if (got) {
-        input_buf  = hist_entry;
-        cursor_pos = input_buf.size();
-      } else {
-        // Past the newest entry → restore draft.
-        input_buf  = draft;
-        cursor_pos = input_buf.size();
-        draft.clear();
-      }
-      redraw_input();
-      notcurses_render(nc);
-      continue;
-    }
+void TuiState::redraw_header() {
+  ncplane_erase(header);
+  ncplane_set_base(header, " ", 0,
+                   NCCHANNELS_INITIALIZER(BG_HDR_R, BG_HDR_G, BG_HDR_B,
+                                          BG_HDR_R, BG_HDR_G, BG_HDR_B));
+  float kv_pct   = kv_total   > 0 ? 100.f * (float)kv_used   / (float)kv_total   : 0.f;
+  float vram_pct = vram_total  > 0 ? 100.f * (float)vram_used / (float)vram_total : 0.f;
+  static const char *const SPIN[] = { "⣾","⣽","⣻","⢿","⡿","⣟","⣯","⣷" };
+  const char *spin_str = thinking ? SPIN[spinner_frame % 8] : " ";
+  char buf[512];
+  int n = std::snprintf(buf, sizeof(buf),
+                        " ✦ NITRO  │ %-32s │ %5.1f tok/s │ KV %4.1f%%  VRAM %4.1f%%  %s",
+                        current_model.c_str(), (double)tokens_per_sec,
+                        (double)kv_pct, (double)vram_pct, spin_str);
+  if (n > term_cols) buf[term_cols] = '\0';
+  ncplane_set_channels(header, hdr_ch(130, 220, 200));
+  ncplane_putstr_yx(header, 0, 0, buf);
+}
 
-    // Scroll the chat pane — not the input history.
-    if (ni.id == NCKEY_PGUP) {
-      scroll_offset += std::max(1, term_rows - 4);
-      redraw_chat();
-      notcurses_render(nc);
-      continue;
-    }
-    if (ni.id == NCKEY_PGDOWN) {
-      scroll_offset = std::max(0, scroll_offset - std::max(1, term_rows - 4));
-      redraw_chat();
-      notcurses_render(nc);
-      continue;
-    }
-    if (ni.id == NCKEY_SCROLL_UP && scroll_offset < term_rows + 10) {
-      scroll_offset += 1;
-      redraw_chat();
-      notcurses_render(nc);
-      continue;
-    }
-    if (ni.id == NCKEY_SCROLL_DOWN && scroll_offset > 0) {
-      scroll_offset -= 1;
-      redraw_chat();
-      notcurses_render(nc);
-      continue;
-    }
-    if (ni.id == NCKEY_F01) {
-      show_help();
-      continue;
-    }
-    if (ni.id == NCKEY_F02) {
-      mouse_mode = !mouse_mode;
-      if (mouse_mode) {
-        notcurses_mice_enable(nc, NCMICE_BUTTON_EVENT);
-      } else {
-        notcurses_mice_disable(nc);
-      }
-      continue;
-    }
-    if (ni.id == NCKEY_BACKSPACE || ni.id == 127) {
-      if (cursor_pos > 0) { input_buf.erase(cursor_pos - 1, 1); --cursor_pos; }
-    } else if (ni.id == NCKEY_LEFT) {
-      if (cursor_pos > 0) --cursor_pos;
-    } else if (ni.id == NCKEY_RIGHT) {
-      if (cursor_pos < input_buf.size()) ++cursor_pos;
-    } else if (ni.id == NCKEY_HOME) {
-      cursor_pos = 0;
-    } else if (ni.id == NCKEY_END) {
-      cursor_pos = input_buf.size();
-    } else if (ni.id == NCKEY_DEL) {
-      if (cursor_pos < input_buf.size()) input_buf.erase(cursor_pos, 1);
-    } else if (ni.id >= 32 && ni.id < 0xD800) {
-      // Any printable character — entering new text clears the nav draft
-      // so that Down won't resurrect a stale saved buffer.
-      draft.clear();
-      history.reset_nav();
-      input_buf.insert(cursor_pos, 1, (char)ni.id);
-      ++cursor_pos;
+void TuiState::redraw_chat() {
+  ncplane_erase(chatpl);
+  unsigned rows, cols;
+  ncplane_dim_yx(chatpl, &rows, &cols);
+  std::lock_guard<std::mutex> lk(lines_mutex);
+  int total   = (int)chat_lines.size();
+  int visible = (int)rows;
+  int start   = std::max(0, total - visible - scroll_offset);
+  int end     = std::min(total, start + visible);
+  for (int i = start, row = 0; i < end; ++i, ++row) {
+    const std::string &line = chat_lines[i];
+    uint64_t ch;
+    // Logo lines use prefix "[logo_N]" where N is the row index 0-6.
+    // We interpolate a cyan→magenta gradient across the 7 art rows.
+    if (line.rfind("[logo_", 0) == 0 && line.size() > 7 && line[7] == ']') {
+      int logo_row = line[6] - '0';
+      // Gradient: cyan (0,230,255) → green (80,255,160) → magenta (220,80,255)
+      // 7 steps, indices 0-6.
+      static const uint32_t GRAD_R[] = {  0,  20,  60, 120, 180, 210, 220 };
+      static const uint32_t GRAD_G[] = { 230, 255, 255, 255, 200, 130,  80 };
+      static const uint32_t GRAD_B[] = { 255, 200, 140,  80, 100, 200, 255 };
+      int gi = std::max(0, std::min(logo_row, 6));
+      ch = chat_ch(GRAD_R[gi], GRAD_G[gi], GRAD_B[gi]);
     }
+    else if (line.rfind("You: ",   0) == 0) ch = chat_ch(100, 200, 255);
+    else if (line.rfind("Nitro: ", 0) == 0) ch = chat_ch(180, 255, 180);
+    else if (line.rfind("[tool]",  0) == 0) ch = chat_ch(255, 180,  80);
+    else if (line.rfind("[err]",   0) == 0) ch = chat_ch(255,  80,  80);
+    else if (line.rfind("[sys]",   0) == 0) ch = chat_ch(140, 140, 200);
+    else                                     ch = chat_ch(210, 210, 210);
+    ncplane_set_channels(chatpl, ch);
+    // Strip the [logo_N] prefix before rendering.
+    std::string display = (line.rfind("[logo_", 0) == 0 && line.size() > 8)
+      ? line.substr(8) : line;
+    if (display.size() > cols) display = display.substr(0, cols);
+    ncplane_putstr_yx(chatpl, row, 0, display.c_str());
+  }
+}
 
-    redraw_input();
-    notcurses_render(nc);
+void TuiState::redraw_input() {
+  ncplane_erase(inputpl);
+  ncplane_set_channels(inputpl, inp_ch(80, 120, 160));
+  std::string sep(term_cols, '-');
+  ncplane_putstr_yx(inputpl, 0, 0, sep.c_str());
+  const std::string prompt = " ❯ ";
+  const int prompt_cols = 4;
+  ncplane_set_channels(inputpl, inp_ch(100, 210, 255));
+  ncplane_putstr_yx(inputpl, 1, 0, prompt.c_str());
+  int max_w = std::max(0, term_cols - prompt_cols - 1);
+  std::string visible = input_buf;
+  int view_offset = 0;
+  if ((int)visible.size() > max_w && max_w > 0) {
+    view_offset = (int)visible.size() - max_w;
+    visible = visible.substr(view_offset);
+  }
+  int cur_in_view = std::max(0, (int)cursor_pos - view_offset);
+  cur_in_view = std::min(cur_in_view, (int)visible.size());
+  std::string before = visible.substr(0, cur_in_view);
+  std::string after  = cur_in_view < (int)visible.size()
+    ? visible.substr(cur_in_view + 1) : "";
+  char cursor_ch_val = cur_in_view < (int)visible.size()
+    ? visible[cur_in_view] : ' ';
+  ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
+  ncplane_putstr_yx(inputpl, 1, prompt_cols, before.c_str());
+  int cx = prompt_cols + cur_in_view;
+  ncplane_set_channels(inputpl, NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B, 180, 230, 255));
+  char cbuf[2] = { cursor_ch_val, '\0' };
+  ncplane_putstr_yx(inputpl, 1, cx, cbuf);
+  ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
+  if (!after.empty()) {
+    ncplane_putstr_yx(inputpl, 1, cx + 1, after.c_str());
   }
 }
 
-void AgentState::apply_generation_params(const NitroConfig &cfg) {
-  llama->add_stop("<|turn|>");
-  llama->add_stop("<|im_end|>");
-  llama->set_max_tokens(cfg.n_max_tokens);
-  llama->set_temperature(cfg.temperature);
-  llama->set_top_k(cfg.top_k);
-  llama->set_top_p(cfg.top_p);
-  llama->set_min_p(cfg.min_p);
-  llama->set_penalty_repeat(cfg.penalty_repeat);
-  llama->set_penalty_last_n(cfg.penalty_last_n);
-  llama->set_log_level(cfg.log_level);
+void TuiState::redraw_all() {
+  redraw_header();
+  redraw_chat();
+  redraw_input();
+  notcurses_render(nc);
+}
+
+void TuiState::tick_spinner() {
+  ++spinner_frame;
+  redraw_header();
+  notcurses_render(nc);
+}
+
+void TuiState::set_thinking(bool on) {
+  thinking = on;
+  if (!on) spinner_frame = 0;
+  redraw_header();
+  notcurses_render(nc);
 }
 
 //
-// Shows a modal loading popup while the model loads.
+// TuiState content helpers
 //
-bool AgentState::setup_model(const NitroConfig &cfg, TuiState &tui) {
-  if (cfg.model_path.empty()) {
-    tui.append_line("[sys] No model loaded.  Use /model <path> to load a GGUF.");
-    tui.redraw_all();
-    return false;
-  }
-  // Show a modal popup so the user knows loading is in progress.
-  std::string model_name = fs::path(cfg.model_path).filename().string();
-  tui.show_modal_popup("Loading " + model_name);
-  // Destroy the iterator first — it holds references into the llama context.
-  // Freeing llama while iter is still alive causes use-after-free / load failure.
-  iter.reset();
-  model_loaded = false;
-  llama = std::make_unique<Llama>();
-
-  apply_generation_params(cfg);
-  if (!llama->load_model(cfg.model_path, cfg.n_ctx, cfg.n_batch,
-                         cfg.n_gpu_layers, cfg.log_level)) {
-    tui.dismiss_modal_popup();
-    tui.append_line(std::string("[err] ") + llama->last_error());
-    tui.redraw_all();
-    return false;
+void TuiState::append_line(const std::string &line) {
+  std::lock_guard<std::mutex> lk(lines_mutex);
+  int w = std::max(1, term_cols - 1);
+  if ((int)line.size() <= w) {
+    chat_lines.push_back(line);
+  } else {
+    for (int off = 0; off < (int)line.size(); off += w) {
+      chat_lines.push_back(line.substr(off, w));
+    }
   }
-  tui.dismiss_modal_popup();
-  model_loaded = true;
-  tui.current_model = model_name;
-  tui.append_line("[sys] Model ready: " + tui.current_model);
-  LlamaMemoryInfo mem = llama->memory_info();
-  tui.append_line("[sys] " + mem.advice);
-  tui.kv_used  = mem.kv_used;
-  tui.kv_total = mem.kv_total;
-  tui.vram_used  = mem.vram_used;
-  tui.vram_total = mem.vram_total;
-  tui.redraw_all();
-  return true;
 }
 
-bool AgentState::setup_embed(const std::string &path, TuiState &tui) {
-  tui.show_modal_popup("Loading embedding model: " + fs::path(path).filename().string());
-  tui.redraw_all();
-  embed_llama = std::make_unique<Llama>();
-  if (!embed_llama->load_embedding_model(path)) {
-    tui.dismiss_modal_popup();
-    tui.append_line(std::string("[err] ") + embed_llama->last_error());
-    tui.redraw_all();
-    embed_llama.reset();
-    return false;
+void TuiState::append_token(const std::string &token) {
+  token_acc += token;
+  for (;;) {
+    auto pos = token_acc.find('\n');
+    if (pos == std::string::npos) {
+      break;
+    }
+    append_line(token_acc.substr(0, pos));
+    token_acc = token_acc.substr(pos + 1);
   }
-  tui.dismiss_modal_popup();
-  rag_db      = std::make_unique<RagDB>();
-  rag_session = std::make_unique<RagSession>();
-  tui.append_line("[sys] Embedding model ready.");
-  tui.redraw_all();
-  return true;
+  redraw_chat();
+  notcurses_render(nc);
 }
 
-void AgentState::reset_conversation(const std::string &sysprompt, TuiState &tui) {
-  system_prompt = sysprompt;
-  llama->reset();
-  apply_generation_params(NitroConfig{});
-  iter = std::make_unique<LlamaIter>();
-  if (!llama->add_message(*iter, "system", system_prompt)) {
-    tui.append_line(std::string("[err] System prompt injection: ") + llama->last_error());
-    tui.redraw_all();
+void TuiState::flush_token_acc() {
+  if (!token_acc.empty()) {
+    append_line(token_acc);
+    token_acc.clear();
+    redraw_chat();
+    notcurses_render(nc);
   }
 }
 
-float AgentState::tokens_per_sec() const {
-  if (!iter) return 0.0f;
-  auto now = std::chrono::high_resolution_clock::now();
-  double elapsed = std::chrono::duration<double>(now - iter->_t_start).count();
-  if (elapsed <= 0.0 || iter->_tokens_generated <= 0) return 0.0f;
-  return (float)(iter->_tokens_generated / elapsed);
-}
+//
+// Creates a centred floating plane with a border and a status message.
+// The popup sits above all other planes and blocks until explicitly dismissed.
+//
+void TuiState::show_modal_popup(const std::string &message) {
+  // Dismiss any previous popup first.
+  dismiss_modal_popup();
 
-std::string AgentState::memory_info_text() {
-  if (!model_loaded) return "No model loaded.";
-  LlamaMemoryInfo m = llama->memory_info();
-  std::ostringstream oss;
-  oss << "KV cache  : " << m.kv_used << " / " << m.kv_total
-      << "  (" << m.kv_percent << "%)\n";
-  if (m.vram_total > 0) {
-    oss << "VRAM      : " << (m.vram_used >> 20) << " MB / "
-        << (m.vram_total >> 20) << " MB  (" << m.vram_percent << "%)\n";
-  }
-  oss << "GPU layers: " << m.n_layers_gpu << " / " << m.n_layers_total << "\n";
-  oss << "CPU layers: " << m.n_layers_cpu << "\n";
-  oss << "Advice    : " << m.advice << "\n";
-  return oss.str();
-}
+  // Clamp popup size to terminal.
+  int popup_w = std::min((int)message.size() + 8, term_cols - 4);
+  popup_w = std::max(popup_w, 20);
+  int popup_h = 5;
+  int py = std::max(0, (term_rows - popup_h) / 2);
+  int px = std::max(0, (term_cols - popup_w) / 2);
 
-std::string AgentState::rag_tool(const NitroConfig &cfg, const std::string &agent_query) {
-  std::string result;
-  if (embed_llama && rag_db && rag_session) {
-    result = embed_llama->rag_retrieve(*rag_db, agent_query, cfg.rag_top_k, *rag_session);
-    if (result.empty()) {
-      result = "RAG: no context found";
-    }
-  } else {
-    result = "RAG: not enabled";
+  ncplane_options opts{};
+  opts.y    = py; opts.x    = px;
+  opts.rows = (unsigned)popup_h;
+  opts.cols = (unsigned)popup_w;
+  modal_plane = ncplane_create(stdpl, &opts);
+  if (!modal_plane) return;
+
+  // Background: deep navy.
+  static constexpr uint32_t PBG_R = 20, PBG_G = 28, PBG_B = 50;
+  ncplane_set_base(modal_plane, " ", 0,
+                   NCCHANNELS_INITIALIZER(PBG_R, PBG_G, PBG_B, PBG_R, PBG_G, PBG_B));
+  ncplane_erase(modal_plane);
+
+  // Border — bright cyan.
+  uint64_t border_ch = NCCHANNELS_INITIALIZER(80, 220, 255, PBG_R, PBG_G, PBG_B);
+  ncplane_set_channels(modal_plane, border_ch);
+
+  // Draw corners and edges manually so we don't require nccell border helpers.
+  // Top row
+  ncplane_putstr_yx(modal_plane, 0, 0, "╔");
+  for (int c = 1; c < popup_w - 1; ++c)
+    ncplane_putstr_yx(modal_plane, 0, c, "═");
+  ncplane_putstr_yx(modal_plane, 0, popup_w - 1, "╗");
+  // Middle rows
+  for (int r = 1; r < popup_h - 1; ++r) {
+    ncplane_putstr_yx(modal_plane, r, 0, "║");
+    ncplane_putstr_yx(modal_plane, r, popup_w - 1, "║");
   }
-  return result;
+  // Bottom row
+  ncplane_putstr_yx(modal_plane, popup_h - 1, 0, "╚");
+  for (int c = 1; c < popup_w - 1; ++c)
+    ncplane_putstr_yx(modal_plane, popup_h - 1, c, "═");
+  ncplane_putstr_yx(modal_plane, popup_h - 1, popup_w - 1, "╝");
+
+  // Title bar.
+  uint64_t title_ch = NCCHANNELS_INITIALIZER(255, 220, 80, PBG_R, PBG_G, PBG_B);
+  ncplane_set_channels(modal_plane, title_ch);
+  ncplane_putstr_yx(modal_plane, 1, 2, "⏳ Loading…");
+
+  // Message.
+  uint64_t msg_ch = NCCHANNELS_INITIALIZER(200, 200, 200, PBG_R, PBG_G, PBG_B);
+  ncplane_set_channels(modal_plane, msg_ch);
+  // Truncate message to fit inside border.
+  int max_msg = popup_w - 4;
+  std::string display = message.size() > (size_t)max_msg
+    ? message.substr(0, max_msg)
+    : message;
+  ncplane_putstr_yx(modal_plane, 2, 2, display.c_str());
+
+  notcurses_render(nc);
 }
 
-bool AgentState::rag_load_index(const std::string &path, TuiState &tui) {
-  if (!embed_llama || !rag_db) {
-    tui.append_line("[err] Load an embedding model first: /embed <path>");
-    tui.redraw_all();
-    return false;
-  }
+void TuiState::show_help() {
+  append_line("[sys] Commands:");
+  append_line("[sys]   /model  [path]           load a GGUF model (picker if no path)");
+  append_line("[sys]   /embed  [path]           load an embedding model (picker if no path)");
+  append_line("[sys]   /rag    [path]           index file or directory (picker if no path)");
+  append_line("[sys]   /memory                  KV / VRAM / layer stats");
+  append_line("[sys]   /clear                   reset conversation");
+  append_line("[sys]   /settings                show current settings");
+  append_line("[sys]   /set    <key> <value>    change a setting live");
+  append_line("[sys]   /help                    this message");
+  append_line("[sys]   exit / quit              exit Nitro");
+  append_line("[sys] Settable keys (via /set):");
+  append_line("[sys]   temperature  top_p  top_k  min_p  penalty_repeat");
+  append_line("[sys]   n_max_tokens  penalty_last_n  rag_top_k  n_gpu_layers");
+  append_line("[sys]   run_allowed  (comma-separated list, e.g. python3,make)");
+  redraw_all();
+}
 
-  if (!rag_db->load(path)) {
-    tui.append_line("[sys] failed to load");
-    tui.redraw_all();
+void TuiState::dismiss_modal_popup() {
+  if (modal_plane) {
+    ncplane_destroy(modal_plane);
+    modal_plane = nullptr;
+    notcurses_render(nc);
   }
-
-  return true;
 }
 
-bool AgentState::rag_index(const std::string &path, const NitroConfig &cfg, TuiState &tui) {
-  if (!embed_llama || !rag_db) {
-    tui.append_line("[err] Load an embedding model first: /embed <path>");
-    tui.redraw_all();
-    return false;
+//
+// ─── TuiState::file_picker ────────────────────────────────────────────────
+// Interactive directory/file browser popup.
+// Keyboard:  ↑/↓ navigate,  Enter select/descend,  Backspace go up,
+//            's' select current dir for indexing,   Esc cancel.
+// Returns the chosen path or "" on cancel.
+// ─── TuiState::file_picker ────────────────────────────────────────────────
+// Unified interactive directory/file browser used by /rag, /model, /embed.
+// title_hint appears in the popup header (e.g. "RAG Folder", "Model File").
+//
+// Keyboard:
+//   ↑/↓        navigate list
+//   Enter      descend into directory, or select a file
+//   Backspace  go up one directory
+//   s          select the current directory itself (useful for /rag)
+//   Esc        cancel → returns ""
+//
+// Returns the chosen path, or "" on cancel.
+//
+std::string TuiState::file_picker(const std::string &start_dir,
+                                  const std::string &title_hint) {
+  std::string current_dir = start_dir;
+  {
+    std::error_code ec;
+    auto canon = fs::canonical(start_dir, ec);
+    if (!ec) current_dir = canon.string();
   }
-
-  auto index_one = [&](const std::string &filepath) {
-    tui.append_line("[sys]   indexing: " + filepath);
-    tui.redraw_all();
-    if (!embed_llama->rag_index(*rag_db, filepath)) {
-      tui.append_line(std::string("[err] rag_load: ") + embed_llama->last_error());
-      tui.redraw_all();
+  auto load_entries = [](const std::string &dir,
+                         std::vector<std::string> &entries) {
+    entries.clear();
+    std::error_code ec;
+    if (fs::path(dir).has_parent_path() &&
+        fs::path(dir) != fs::path(dir).root_path())
+      entries.push_back("..");
+    std::vector<std::string> dirs, files;
+    for (const auto &e : fs::directory_iterator(dir, ec)) {
+      if (ec) break;
+      std::string name = e.path().filename().string();
+      if (name.empty() || name[0] == '.') continue;
+      if (e.is_directory()) dirs.push_back(name);
+      else                  files.push_back(name);
     }
+    std::sort(dirs.begin(), dirs.end());
+    std::sort(files.begin(), files.end());
+    for (auto &d : dirs)  entries.push_back(d + "/");
+    for (auto &f : files) entries.push_back(f);
   };
 
-  // must be set before indexing
-  rag_db->embed_dim = embed_llama->get_embed_dim();
-
-  fs::path rp(path);
-  std::error_code ec;
-  if (fs::is_directory(rp, ec)) {
-    for (const auto &entry : fs::recursive_directory_iterator(rp, ec)) {
-      if (entry.is_regular_file()) {
-        index_one(entry.path().string());
-      }
-    }
-  } else {
-    index_one(path);
-  }
-
-  std::string save_path = join_path(cfg.sandbox, "rag-index.bin");
-  tui.append_line("[sys] saving index: " + save_path);
-  tui.redraw_all();
-  rag_db->save(save_path);
+  std::vector<std::string> entries;
+  int selected = 0;
+  int scroll   = 0;
 
-  return true;
-}
+  // Popup dimensions.
+  static constexpr int PW = 60;
+  static constexpr int PH = 20;
+  int py = std::max(0, (term_rows - PH) / 2);
+  int px = std::max(0, (term_cols - PW) / 2);
 
-//
-// Tool dispatch
-//
-std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &cfg, TuiState &tui) {
-  const std::string &sandbox = cfg.sandbox;
-  const std::vector<std::string> &run_allowed = cfg.run_allowed;
+  ncplane_options opts{};
+  opts.y = py; opts.x = px;
+  opts.rows = (unsigned)PH; opts.cols = (unsigned)PW;
+  struct ncplane *picker = ncplane_create(stdpl, &opts);
+  if (!picker) return "";
 
-  std::string op, arg1, arg2;
-  auto sp1 = cmd.find(' ');
-  if (sp1 == std::string::npos) {
-    op = trim(cmd);
-  } else {
-    op = trim(cmd.substr(0, sp1));
-    std::string rest = cmd.substr(sp1 + 1);
-    rest.erase(0, rest.find_first_not_of(" \t"));
-    auto sp2 = rest.find(' ');
-    if (sp2 == std::string::npos) {
-      arg1 = rest;
-    } else {
-      arg1 = rest.substr(0, sp2);
-      arg2 = rest.substr(sp2 + 1);
-    }
+  static constexpr uint32_t PBG_R = 18, PBG_G = 24, PBG_B = 40;
+  ncplane_set_base(picker, " ", 0, NCCHANNELS_INITIALIZER(PBG_R, PBG_G, PBG_B, PBG_R, PBG_G, PBG_B));
+  // Build a compact hint line appropriate to the operation.
+  // /rag adds 's=select dir'; /model and /embed only need file selection.
+  std::string hint_line = "↑↓ navigate  Enter open/select  Esc cancel";
+  if (title_hint.find("RAG") != std::string::npos ||
+      title_hint.find("Folder") != std::string::npos) {
+    hint_line = "↑↓ navigate  Enter open  s=select dir  Esc cancel";
   }
-
-  auto resolve = [&](const std::string &p) -> std::string {
-    if (p.empty() || p == ".") {
-      return sandbox;
-    }
-    if (p.substr(0, 2) == "./") {
-      return join_path(sandbox, p.substr(2));
+  auto draw_picker = [&]() {
+    ncplane_erase(picker);
+    uint64_t border_ch = NCCHANNELS_INITIALIZER(100, 180, 255, PBG_R, PBG_G, PBG_B);
+    ncplane_set_channels(picker, border_ch);
+    ncplane_putstr_yx(picker, 0, 0, "╔");
+    for (int c = 1; c < PW - 1; ++c) ncplane_putstr_yx(picker, 0, c, "═");
+    ncplane_putstr_yx(picker, 0, PW - 1, "╗");
+    for (int r = 1; r < PH - 1; ++r) {
+      ncplane_putstr_yx(picker, r, 0,      "║");
+      ncplane_putstr_yx(picker, r, PW - 1, "║");
     }
-    if (p[0] == '/') {
-      return p;
+    ncplane_putstr_yx(picker, PH - 1, 0, "╚");
+    for (int c = 1; c < PW - 1; ++c) ncplane_putstr_yx(picker, PH - 1, c, "═");
+    ncplane_putstr_yx(picker, PH - 1, PW - 1, "╝");
+
+    // Title
+    ncplane_set_channels(picker, NCCHANNELS_INITIALIZER(255, 220, 80, PBG_R, PBG_G, PBG_B));
+    std::string title_str = " 📂 " + title_hint + " Picker ";
+    if ((int)title_str.size() > PW - 4) title_str = title_str.substr(0, PW - 4);
+    ncplane_putstr_yx(picker, 0, 2, title_str.c_str());
+    // Current path (truncated).
+    std::string path_display = current_dir;
+    if ((int)path_display.size() > PW - 4)
+      path_display = "…" + path_display.substr(path_display.size() - (PW - 5));
+    ncplane_set_channels(picker, NCCHANNELS_INITIALIZER(160, 200, 240, PBG_R, PBG_G, PBG_B));
+    ncplane_putstr_yx(picker, 1, 2, path_display.c_str());
+    // Hint line (bottom interior row).
+    ncplane_set_channels(picker, NCCHANNELS_INITIALIZER(120, 120, 160, PBG_R, PBG_G, PBG_B));
+    std::string hint_trunc = hint_line;
+    if ((int)hint_trunc.size() > PW - 4) hint_trunc = hint_trunc.substr(0, PW - 4);
+    ncplane_putstr_yx(picker, PH - 2, 2, hint_trunc.c_str());
+    // Entry list.
+    int list_rows = PH - 5;
+    if (selected < scroll) scroll = selected;
+    if (selected >= scroll + list_rows) scroll = selected - list_rows + 1;
+    for (int i = 0; i < list_rows; ++i) {
+      int idx = scroll + i;
+      if (idx >= (int)entries.size()) break;
+      bool is_selected = (idx == selected);
+      bool is_dir = !entries[idx].empty() && entries[idx].back() == '/';
+      uint32_t fr, fg, fb;
+      if (is_selected)  { fr = 20;  fg = 20;  fb = 20;  }
+      else if (is_dir)   { fr = 120; fg = 200; fb = 255; }
+      else               { fr = 200; fg = 200; fb = 200; }
+      uint32_t br = is_selected ? 100 : PBG_R;
+      uint32_t bg = is_selected ? 180 : PBG_G;
+      uint32_t bb = is_selected ? 255 : PBG_B;
+      ncplane_set_channels(picker, NCCHANNELS_INITIALIZER(fr, fg, fb, br, bg, bb));
+      std::string label = (is_selected ? " ▶ " : "   ") + entries[idx];
+      if ((int)label.size() > PW - 2) label = label.substr(0, PW - 2);
+      while ((int)label.size() < PW - 2) label += ' ';
+      ncplane_putstr_yx(picker, 2 + i, 1, label.c_str());
     }
-    return join_path(sandbox, disclose(disclose(p, '<', '>'), '[', ']'));
+    notcurses_render(nc);
   };
 
-  tui.append_line("[tool] → " + op);
-  tui.redraw_all();
-
-  if (op == "TOOL:DATE") {
-    char buf[32]; time_t t = time(nullptr);
-    strftime(buf, sizeof(buf), "%Y-%m-%d", localtime(&t));
-    return buf;
-  }
-  if (op == "TOOL:TIME") {
-    char buf[32]; time_t t = time(nullptr);
-    strftime(buf, sizeof(buf), "%H:%M:%S", localtime(&t));
-    return buf;
-  }
-  if (op == "TOOL:RND") {
-    return std::to_string((double)rand() / RAND_MAX);
-  }
-  if (op == "TOOL:RAG") {
-    return rag_tool(cfg, arg1);
-  }
-  if (op == "TOOL:LIST") {
-    std::string dir = resolve(arg1);
-    if (!path_in_sandbox(sandbox, dir)) return "ERROR: path outside sandbox";
-    return list_dir(dir);
-  }
-  if (op == "TOOL:EXISTS") {
-    std::string p = resolve(arg1);
-    if (!path_in_sandbox(sandbox, p)) return "NO";
-    return fs::exists(p) ? "YES" : "NO";
-  }
-  if (op == "TOOL:READ") {
-    std::string p = resolve(arg1);
-    if (!path_in_sandbox(sandbox, p)) return "ERROR: path outside sandbox";
-    return read_file(p);
-  }
-  if (op == "TOOL:WRITE") {
-    std::string p = resolve(arg1);
-    if (!path_in_sandbox(sandbox, p)) {
-      return "ERROR: path outside sandbox";
-    }
-    if (!tui.confirm_dialog(std::format("Allow model to write {}?", p))) {
-      return "ERROR: action prevented by user";
+  std::string result;
+  load_entries(current_dir, entries);
+  draw_picker();
+
+  for (;;) {
+    ncinput ni{};
+    notcurses_get_blocking(nc, &ni);
+    if (ni.id == NCKEY_ESC) {
+      break;  // cancelled
     }
-    std::string content = disclose(strip_code_fences(arg1, arg2), '`', '`');
-    return write_file(p, content) ? "OK: written to " + arg1 : "ERROR: write failed for " + arg1;
-  }
-  if (op == "TOOL:CURL") {
-    return tool_curl(arg1);
-  }
-  if (op == "TOOL:INTROSPECT") {
-    return introspect(cfg);
-  }
-  if (op == "TOOL:RUN") {
-    std::string prog = resolve(arg1);
-    if (!path_in_sandbox(sandbox, prog)) {
-      return "ERROR: path outside sandbox";
+    if (ni.id == NCKEY_UP) {
+      if (selected > 0) --selected;
+      draw_picker();
+      continue;
     }
-    if (!run_allowed.empty()) {
-      std::string basename = fs::path(prog).filename().string();
-      bool permitted = std::any_of(run_allowed.begin(), run_allowed.end(),
-                                   [&](const std::string &a){ return a == basename; });
-      if (!permitted) {
-        return "ERROR: '" + basename + "' is not in the TOOL:RUN allowlist. "
-          "Use /set run_allowed <name> to permit it.";
-      }
-    } else if (!tui.confirm_dialog(std::format("Allow {} to run?", prog))) {
-      return "ERROR: prevented by user";
+    if (ni.id == NCKEY_DOWN) {
+      if (selected + 1 < (int)entries.size()) ++selected;
+      draw_picker();
+      continue;
     }
-    std::string command = prog + " " + arg2 + " 2>&1";
-    FILE *fp = popen(command.c_str(), "r");
-    if (!fp) {
-      return "ERROR: popen failed";
+    // 's' — select the current directory (useful for /rag, ignored for file pickers).
+    if (ni.id == 's' || ni.id == 'S') {
+      // Select current directory for RAG indexing.
+      result = current_dir;
+      break;
     }
-    std::string out;
-    char buf[256];
-    while (fgets(buf, sizeof(buf), fp)) {
-      out += buf;
+    if (ni.id == NCKEY_BACKSPACE || ni.id == 127) {
+      // Go up one level.
+      fs::path p(current_dir);
+      if (p.has_parent_path() && p != p.root_path()) {
+        current_dir = p.parent_path().string();
+        load_entries(current_dir, entries);
+        selected = 0; scroll = 0;
+        draw_picker();
+      }
+      continue;
     }
-    pclose(fp);
-    if (out.size() > 4096) {
-      out = out.substr(0, 4096) + "\n…(truncated)";
+    if (ni.id == NCKEY_ENTER || ni.id == '\r' || ni.id == '\n') {
+      if (entries.empty()) continue;
+      const std::string &entry = entries[selected];
+      if (entry == "..") {
+        fs::path p(current_dir);
+        if (p.has_parent_path() && p != p.root_path()) {
+          current_dir = p.parent_path().string();
+          load_entries(current_dir, entries);
+          selected = 0; scroll = 0;
+          draw_picker();
+        }
+      } else if (!entry.empty() && entry.back() == '/') {
+        // Descend into directory.
+        current_dir = current_dir + "/" + entry.substr(0, entry.size() - 1);
+        {
+          std::error_code ec;
+          auto canon = fs::canonical(current_dir, ec);
+          if (!ec) current_dir = canon.string();
+        }
+        load_entries(current_dir, entries);
+        selected = 0; scroll = 0;
+        draw_picker();
+      } else {
+        // Select a specific file.
+        // Select the highlighted file.
+        result = current_dir + "/" + entry;
+        break;
+      }
+      continue;
     }
-    return out;
   }
-  return "ERROR: unknown tool: [" + op + "]";
+  ncplane_destroy(picker);
+  notcurses_render(nc);
+  return result;
 }
 
 //
-// Agent turn
+// ─── TuiState::confirm_dialog ─────────────────────────────────────────────
 //
-bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cfg, TuiState &tui) {
-  if (!model_loaded) {
-    tui.append_line("[err] No model loaded. Use /model <path>");
-    tui.redraw_all();
-    return false;
-  }
-  std::string effective_message = user_message;
-  if (embed_llama && rag_db && rag_session) {
-    std::string context = embed_llama->rag_retrieve(*rag_db, user_message, cfg.rag_top_k, *rag_session);
-    if (!context.empty()) {
-      log_write("RAG: %s", context.c_str());
-      effective_message = "Context:\n" + context + "\n\nUser: " + user_message;
-    } else {
-      log_write("RAG: no context found [%s]", embed_llama->last_error());
-    }
-  }
-  if (!iter) {
-    tui.append_line("[err] Conversation not initialised (call /clear to reset)");
-    tui.redraw_all();
-    return false;
-  }
-  if (!llama->add_message(*iter, "user", effective_message)) {
-    tui.append_line(std::string("[err] add_message: ") + llama->last_error());
-    tui.redraw_all();
-    return false;
+bool TuiState::confirm_dialog(const std::string &prompt) {
+  ncplane_erase(inputpl);
+  ncplane_set_channels(inputpl, inp_ch(255, 200, 80));
+  std::string msg = " " + prompt + " [y/n] ❯ ";
+  ncplane_putstr_yx(inputpl, 1, 0, msg.c_str());
+  notcurses_render(nc);
+  std::string answer;
+  for (;;) {
+    ncinput ni{};
+    notcurses_get_blocking(nc, &ni);
+    if (ni.id == NCKEY_ENTER || ni.id == '\r' || ni.id == '\n') break;
+    if (ni.id == NCKEY_BACKSPACE && !answer.empty()) { answer.pop_back(); }
+    else if (ni.id >= 32 && ni.id < 127) { answer += (char)ni.id; }
+    ncplane_erase(inputpl);
+    ncplane_set_channels(inputpl, inp_ch(255, 200, 80));
+    ncplane_putstr_yx(inputpl, 1, 0, (msg + answer).c_str());
+    notcurses_render(nc);
   }
-  tui.append_line("Nitro: ");
+  std::string lo = answer;
+  std::transform(lo.begin(), lo.end(), lo.begin(), ::tolower);
+  redraw_input();
+  notcurses_render(nc);
+  return (lo == "y" || lo == "yes" || lo == "sure" || lo == "k");
+}
 
-  // in_think starts false — models that don't use <think> blocks emit
-  // visible text immediately.  The spinner activates only while thinking.
-  enum {t_init, t_think, t_thunk} think_mode = t_init;
-  tui.set_thinking(false);
-  std::string buffer;
+//
+// Integrates InputHistory:  Up/Down arrows navigate the history stack.
+// On submit the entry is pushed to history, and nav is reset.
+//
+std::string TuiState::readline_blocking() {
+  input_buf.clear();
+  cursor_pos = 0;
+  history.reset_nav();
+  redraw_input();
+  notcurses_render(nc);
 
-  auto invoke_tool = [&](const std::string &tool, const std::string_view template_str) -> void {
-    std::string result = process_tool(tool, cfg, tui);
-    std::string content = std::vformat(template_str, std::make_format_args(result));
-    log_write("tool: [%s] result: [%s]", tool.c_str(), result.c_str());
-    if (!llama->add_message(*iter, "tool_result", content)) {
-      tui.append_line(std::string("[err] tool result inject: ") + llama->last_error());
-      tui.redraw_all();
-    }
-    if (!iter->_has_next) {
-      tui.append_line(std::string("[err] failed to evoke tool response: ") + llama->last_error());
-      tui.redraw_all();
+  // Temporary saved draft so Down from history restores the user's current text.
+  std::string draft;
+
+  for (;;) {
+    ncinput ni{};
+    notcurses_get_blocking(nc, &ni);
+
+    if (ni.id == NCKEY_ENTER || ni.id == '\r' || ni.id == '\n') {
+      std::string result = input_buf;
+      if (!result.empty()) {
+        history.push(result);
+      }
+      input_buf.clear();
+      cursor_pos = 0;
+      redraw_input();
+      notcurses_render(nc);
+      return result;
     }
-  };
 
-  auto start_think = [&](const std::string &tag) {
-    if (think_mode == t_init) {
-      auto pos = buffer.find(tag);
-      if (pos != std::string::npos) {
-        think_mode = t_think;
-        tui.set_thinking(true);
-        // display prededing text
-        buffer = buffer.substr(0, pos);
+    if (ni.id == NCKEY_UP) {
+      // Entering history from a fresh prompt: save current text as draft.
+      std::string hist_entry;
+      if (history.up(hist_entry)) {
+        if (input_buf.size() > 0 && hist_entry != input_buf) {
+          // Only save draft when we first leave the bottom of history.
+          // (history.reset_nav was called on entry so the first Up call
+          //  always comes from the "new input" position.)
+          draft = input_buf;
+        }
+        input_buf  = hist_entry;
+        cursor_pos = input_buf.size();
       }
+      redraw_input();
+      notcurses_render(nc);
+      continue;
     }
-  };
 
-  auto end_think = [&](const std::string &tag) {
-    if (think_mode == t_think) {
-      auto pos = buffer.find(tag);
-      if (pos != std::string::npos) {
-        think_mode = t_thunk;
-        tui.set_thinking(false);
-        // display remaining text
-        buffer = buffer.substr(pos + tag.length());
+    if (ni.id == NCKEY_DOWN) {
+      std::string hist_entry;
+      bool got = history.down(hist_entry);
+      if (got) {
+        input_buf  = hist_entry;
+        cursor_pos = input_buf.size();
+      } else {
+        // Past the newest entry → restore draft.
+        input_buf  = draft;
+        cursor_pos = input_buf.size();
+        draft.clear();
       }
+      redraw_input();
+      notcurses_render(nc);
+      continue;
     }
-  };
 
-  while (iter->_has_next) {
-    ncinput ni{};
-    notcurses_get_nblock(tui.nc, &ni);
-    if (ni.id == NCKEY_ESC) {
-      tui.set_thinking(false);
-      tui.append_line("[err] Generation cancelled by user (Escape)");
-      tui.redraw_all();
-      return false;
+    // Scroll the chat pane — not the input history.
+    if (ni.id == NCKEY_PGUP) {
+      scroll_offset += std::max(1, term_rows - 4);
+      redraw_chat();
+      notcurses_render(nc);
+      continue;
+    }
+    if (ni.id == NCKEY_PGDOWN) {
+      scroll_offset = std::max(0, scroll_offset - std::max(1, term_rows - 4));
+      redraw_chat();
+      notcurses_render(nc);
+      continue;
     }
-    std::string tok = llama->next(*iter);
-    buffer += tok;
-    if (think_mode == t_init) {
-      start_think("<think>");
-      start_think("<|think|>");
-      start_think("<think|>");
-      start_think("<|channel>thought");
+    if (ni.id == NCKEY_SCROLL_UP && scroll_offset < term_rows + 10) {
+      scroll_offset += 1;
+      redraw_chat();
+      notcurses_render(nc);
+      continue;
     }
-    if (think_mode == t_think) {
-      tui.tick_spinner();
-      end_think("</think>");
-      end_think("</|think|>");
-      end_think("<think|>");
-      end_think("<channel|>");
+    if (ni.id == NCKEY_SCROLL_DOWN && scroll_offset > 0) {
+      scroll_offset -= 1;
+      redraw_chat();
+      notcurses_render(nc);
+      continue;
     }
-    if (think_mode == t_thunk) {
-      auto tool_start = buffer.find("TOOL:");
-      if (tool_start == 0) {
-        // fetch all remaining tokens
-        invoke_tool(trim(buffer + llama->all(*iter)), "TOOL_RESULT: {}");
-        buffer.clear();
-        think_mode = t_init;
-        continue;
-      }
-      // see https://ai.google.dev/gemma/docs/core/prompt-formatting-gemma4
-      tool_start = buffer.find("<|tool_call>call:");
-      if (tool_start != std::string::npos) {
-        buffer += llama->all(*iter);
-        auto pos = buffer.find_last_not_of("}<tool_call|>");
-        if (pos != std::string::npos) {
-          buffer = buffer.substr(0, pos);
-        }
-        pos = buffer.find_first_not_of("{");
-        if (pos != std::string::npos) {
-          buffer = buffer.substr(0, pos) + buffer.substr(pos + 1);
-        }
-        invoke_tool(trim(buffer), "<|tool_response>{}<tool_response|>");
-        buffer.clear();
-        think_mode = t_init;
-        continue;
-      }
-      auto pos = buffer.find('\n');
-      if (pos != std::string::npos) {
-        tui.append_token(buffer.substr(0, pos + 1));
-        buffer = buffer.substr(pos + 1);
+    if (ni.id == NCKEY_F01) {
+      show_help();
+      continue;
+    }
+    if (ni.id == NCKEY_F02) {
+      mouse_mode = !mouse_mode;
+      if (mouse_mode) {
+        notcurses_mice_enable(nc, NCMICE_BUTTON_EVENT);
+      } else {
+        notcurses_mice_disable(nc);
       }
+      continue;
+    }
+    if (ni.id == NCKEY_BACKSPACE || ni.id == 127) {
+      if (cursor_pos > 0) { input_buf.erase(cursor_pos - 1, 1); --cursor_pos; }
+    } else if (ni.id == NCKEY_LEFT) {
+      if (cursor_pos > 0) --cursor_pos;
+    } else if (ni.id == NCKEY_RIGHT) {
+      if (cursor_pos < input_buf.size()) ++cursor_pos;
+    } else if (ni.id == NCKEY_HOME) {
+      cursor_pos = 0;
+    } else if (ni.id == NCKEY_END) {
+      cursor_pos = input_buf.size();
+    } else if (ni.id == NCKEY_DEL) {
+      if (cursor_pos < input_buf.size()) input_buf.erase(cursor_pos, 1);
+    } else if (ni.id >= 32 && ni.id < 0xD800) {
+      // Any printable character — entering new text clears the nav draft
+      // so that Down won't resurrect a stale saved buffer.
+      draft.clear();
+      history.reset_nav();
+      input_buf.insert(cursor_pos, 1, (char)ni.id);
+      ++cursor_pos;
     }
+
+    redraw_input();
+    notcurses_render(nc);
   }
+}
 
-  if (!buffer.empty()) {
-    tui.append_token(buffer + "\n");
+void AgentState::apply_generation_params(const NitroConfig &cfg) {
+  llama->add_stop("<|turn|>");
+  llama->add_stop("<|im_end|>");
+  llama->set_max_tokens(cfg.n_max_tokens);
+  llama->set_temperature(cfg.temperature);
+  llama->set_top_k(cfg.top_k);
+  llama->set_top_p(cfg.top_p);
+  llama->set_min_p(cfg.min_p);
+  llama->set_penalty_repeat(cfg.penalty_repeat);
+  llama->set_penalty_last_n(cfg.penalty_last_n);
+  llama->set_log_level(cfg.log_level);
+}
+
+//
+// Shows a modal loading popup while the model loads.
+//
+bool AgentState::setup_model(const NitroConfig &cfg, TuiState &tui) {
+  if (cfg.model_path.empty()) {
+    tui.append_line("[sys] No model loaded.  Use /model <path> to load a GGUF.");
+    tui.redraw_all();
+    return false;
   }
+  // Show a modal popup so the user knows loading is in progress.
+  std::string model_name = fs::path(cfg.model_path).filename().string();
+  tui.show_modal_popup("Loading " + model_name);
+  // Destroy the iterator first — it holds references into the llama context.
+  // Freeing llama while iter is still alive causes use-after-free / load failure.
+  iter.reset();
+  model_loaded = false;
+  llama = std::make_unique<Llama>();
 
-  tui.flush_token_acc();
-  tui.set_thinking(false);
-  tui.tokens_per_sec = tokens_per_sec();
+  apply_generation_params(cfg);
+  if (!llama->load_model(cfg.model_path, cfg.n_ctx, cfg.n_batch,
+                         cfg.n_gpu_layers, cfg.log_level)) {
+    tui.dismiss_modal_popup();
+    tui.append_line(std::string("[err] ") + llama->last_error());
+    tui.redraw_all();
+    return false;
+  }
+  tui.dismiss_modal_popup();
+  model_loaded = true;
+  tui.current_model = model_name;
+  tui.append_line("[sys] Model ready: " + tui.current_model);
   LlamaMemoryInfo mem = llama->memory_info();
-  tui.kv_used    = mem.kv_used;
-  tui.kv_total   = mem.kv_total;
+  tui.append_line("[sys] " + mem.advice);
+  tui.kv_used  = mem.kv_used;
+  tui.kv_total = mem.kv_total;
   tui.vram_used  = mem.vram_used;
   tui.vram_total = mem.vram_total;
-  char stat[128];
-  std::snprintf(stat, sizeof(stat), "[sys] %.1f tok/s  (%d tokens)  KV %.1f%%",
-                (double)tui.tokens_per_sec,
-                iter->_tokens_generated,
-                (double)mem.kv_percent);
-  tui.append_line(stat);
   tui.redraw_all();
   return true;
 }
 
-//
-// File-system helpers
-//
-static std::string join_path(const std::string &a, const std::string &b) {
-  if (b.empty()) return a;
-  if (b[0] == '/') return b;
-  std::string pa = a;
-  if (!pa.empty() && pa.back() == '/') pa.pop_back();
-  std::string pb = (b.front() == '/') ? b.substr(1) : b;
-  return pa + "/" + pb;
-}
-
-static bool path_in_sandbox(const std::string &sandbox, const std::string &path) {
-  std::error_code ec;
-  auto base   = fs::canonical(sandbox, ec);  if (ec) return false;
-  auto target = fs::weakly_canonical(path, ec);
-  std::string bstr = base.string() + "/";
-  std::string tstr = target.string();
-  return tstr == base.string() || tstr.compare(0, bstr.size(), bstr) == 0;
+bool AgentState::setup_embed(const std::string &path, TuiState &tui) {
+  tui.show_modal_popup("Loading embedding model: " + fs::path(path).filename().string());
+  tui.redraw_all();
+  embed_llama = std::make_unique<Llama>();
+  if (!embed_llama->load_embedding_model(path)) {
+    tui.dismiss_modal_popup();
+    tui.append_line(std::string("[err] ") + embed_llama->last_error());
+    tui.redraw_all();
+    embed_llama.reset();
+    return false;
+  }
+  tui.dismiss_modal_popup();
+  rag_db      = std::make_unique<RagDB>();
+  rag_session = std::make_unique<RagSession>();
+  tui.append_line("[sys] Embedding model ready.");
+  tui.redraw_all();
+  return true;
 }
 
-static std::string read_file(const std::string &path) {
-  std::ifstream f(path, std::ios::binary);
-  if (!f) {
-    return "ERROR: cannot open [" + path + "]";
+void AgentState::reset_conversation(const std::string &sysprompt, TuiState &tui) {
+  system_prompt = sysprompt;
+  llama->reset();
+  apply_generation_params(NitroConfig{});
+  iter = std::make_unique<LlamaIter>();
+  if (!llama->add_message(*iter, "system", system_prompt)) {
+    tui.append_line(std::string("[err] System prompt injection: ") + llama->last_error());
+    tui.redraw_all();
   }
-  std::ostringstream oss; oss << f.rdbuf();
-  return oss.str();
 }
 
-static bool write_file(const std::string &path, const std::string &data) {
-  fs::path p(path);
-  if (p.has_parent_path()) {
-    std::error_code ec;
-    fs::create_directories(p.parent_path(), ec);
-  }
-  std::ofstream f(path, std::ios::binary | std::ios::trunc);
-  if (!f) return false;
-  f.write(data.data(), (std::streamsize)data.size());
-  return f.good();
+float AgentState::tokens_per_sec() const {
+  if (!iter) return 0.0f;
+  auto now = std::chrono::high_resolution_clock::now();
+  double elapsed = std::chrono::duration<double>(now - iter->_t_start).count();
+  if (elapsed <= 0.0 || iter->_tokens_generated <= 0) return 0.0f;
+  return (float)(iter->_tokens_generated / elapsed);
 }
 
-static std::string list_dir(const std::string &path) {
+std::string AgentState::memory_info_text() {
+  if (!model_loaded) return "No model loaded.";
+  LlamaMemoryInfo m = llama->memory_info();
   std::ostringstream oss;
-  std::error_code ec;
-  for (const auto &e : fs::directory_iterator(path, ec)) {
-    if (ec) break;
-    std::string name = e.path().filename().string();
-    if (name.empty() || name[0] == '.') continue;
-    oss << (e.is_directory() ? "[" + name + "]" : name) << "\n";
+  oss << "KV cache  : " << m.kv_used << " / " << m.kv_total
+      << "  (" << m.kv_percent << "%)\n";
+  if (m.vram_total > 0) {
+    oss << "VRAM      : " << (m.vram_used >> 20) << " MB / "
+        << (m.vram_total >> 20) << " MB  (" << m.vram_percent << "%)\n";
   }
+  oss << "GPU layers: " << m.n_layers_gpu << " / " << m.n_layers_total << "\n";
+  oss << "CPU layers: " << m.n_layers_cpu << "\n";
+  oss << "Advice    : " << m.advice << "\n";
   return oss.str();
 }
 
-static const std::vector<std::string> CODE_EXTENSIONS = {
-  ".py",".c",".cpp",".h",".bas",".java",".html",".js",".ts",
-  ".json",".yaml",".toml",".sh",".go",".rs",".jsx",".tsx"
-};
-
-static std::string strip_code_fences(const std::string &filename,
-                                     const std::string &src) {
-  auto ext = fs::path(filename).extension().string();
-  bool is_code = std::any_of(CODE_EXTENSIONS.begin(), CODE_EXTENSIONS.end(),
-                             [&](const std::string &e){ return ext == e; });
-  if (!is_code) return src;
-  auto pos = src.find("```");
-  if (pos == std::string::npos) return src;
-  auto nl = src.find('\n', pos + 3);
-  if (nl == std::string::npos) return src;
-  std::string inner = src.substr(nl + 1);
-  auto end = inner.rfind("```");
-  if (end != std::string::npos) inner = inner.substr(0, end);
-  return inner;
+std::string AgentState::rag_tool(const NitroConfig &cfg, const std::string &agent_query) {
+  std::string result;
+  if (embed_llama && rag_db && rag_session) {
+    result = embed_llama->rag_retrieve(*rag_db, agent_query, cfg.rag_top_k, *rag_session);
+    if (result.empty()) {
+      result = "RAG: no context found";
+    }
+  } else {
+    result = "RAG: not enabled";
+  }
+  return result;
 }
 
-//
-// html_to_text — strip HTML for cleaner TOOL:CURL context
-//
-// Lightweight HTML→plain-text conversion:
-//   • Drops <head>, <script>, <style> blocks entirely.
-//   • Inserts newlines at block-level tags (p, div, br, li, h1-h6 …).
-//   • Strips all remaining tags.
-//   • Decodes common named & numeric HTML entities.
-//   • Collapses whitespace runs; caps consecutive blank lines at 2.
-//
-static std::string html_to_text(const std::string &html) {
-  std::string s = html;
+bool AgentState::rag_load_index(const std::string &path, TuiState &tui) {
+  if (!embed_llama || !rag_db) {
+    tui.append_line("[err] Load an embedding model first: /embed <path>");
+    tui.redraw_all();
+    return false;
+  }
 
-  // 1. Remove <head>…</head>
-  {
-    std::string lo = s;
-    std::transform(lo.begin(), lo.end(), lo.begin(), ::tolower);
-    auto p0 = lo.find("<head");
-    auto p1 = lo.find("</head>");
-    if (p0 != std::string::npos && p1 != std::string::npos)
-      s.erase(p0, p1 + 7 - p0);
+  if (!rag_db->load(path)) {
+    tui.append_line("[sys] failed to load");
+    tui.redraw_all();
   }
 
-  // 2. Remove <script>…</script> and <style>…</style>
-  for (const std::string &tag : {"script", "style"}) {
-    std::string open  = "<" + tag;
-    std::string close = "</" + tag + ">";
-    std::string lo = s;
-    std::transform(lo.begin(), lo.end(), lo.begin(), ::tolower);
-    for (;;) {
-      auto p0 = lo.find(open);
-      if (p0 == std::string::npos) break;
-      auto p1 = lo.find(close, p0);
-      if (p1 == std::string::npos) { s.erase(p0); lo.erase(p0); break; }
-      s.erase(p0, p1 + close.size() - p0);
-      lo.erase(p0, p1 + close.size() - p0);
-    }
+  return true;
+}
+
+bool AgentState::rag_index(const std::string &path, const NitroConfig &cfg, TuiState &tui) {
+  if (!embed_llama || !rag_db) {
+    tui.append_line("[err] Load an embedding model first: /embed <path>");
+    tui.redraw_all();
+    return false;
   }
 
-  // 3. Replace block-level tags with '\n' before stripping all tags.
-  static const char *const BLOCK[] = {
-    "p","div","br","li","tr","h1","h2","h3","h4","h5","h6",
-    "article","section","header","footer","nav","main", nullptr
+  auto index_one = [&](const std::string &filepath) {
+    tui.append_line("[sys]   indexing: " + filepath);
+    tui.redraw_all();
+    if (!embed_llama->rag_index(*rag_db, filepath)) {
+      tui.append_line(std::string("[err] rag_load: ") + embed_llama->last_error());
+      tui.redraw_all();
+    }
   };
-  {
-    std::string out;
-    out.reserve(s.size());
-    size_t i = 0;
-    while (i < s.size()) {
-      if (s[i] != '<') { out += s[i++]; continue; }
-      auto ce = s.find('>', i);
-      if (ce == std::string::npos) { out += s[i++]; continue; }
-      std::string inner = s.substr(i + 1, ce - i - 1);
-      size_t sp = inner.find_first_of(" \t/\r\n");
-      std::string name = (sp != std::string::npos) ? inner.substr(0, sp) : inner;
-      std::transform(name.begin(), name.end(), name.begin(), ::tolower);
-      for (int k = 0; BLOCK[k]; ++k) {
-        if (name == BLOCK[k]) {
-          out += '\n'; break;
-        }
+
+  // must be set before indexing
+  rag_db->embed_dim = embed_llama->get_embed_dim();
+
+  fs::path rp(path);
+  std::error_code ec;
+  if (fs::is_directory(rp, ec)) {
+    for (const auto &entry : fs::recursive_directory_iterator(rp, ec)) {
+      if (entry.is_regular_file()) {
+        index_one(entry.path().string());
       }
-      i = ce + 1;
     }
-    s = out;
+  } else {
+    index_one(path);
   }
 
-  // 4. Strip all remaining tags.
-  {
-    std::string out; out.reserve(s.size());
-    bool in_tag = false;
-    for (char c : s) {
-      if (c == '<')  { in_tag = true;  continue; }
-      if (c == '>')  { in_tag = false; continue; }
-      if (!in_tag)     out += c;
+  std::string save_path = join_path(cfg.sandbox, "rag-index.bin");
+  tui.append_line("[sys] saving index: " + save_path);
+  tui.redraw_all();
+  rag_db->save(save_path);
+
+  return true;
+}
+
+//
+// Tool dispatch
+//
+std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &cfg, TuiState &tui) {
+  const std::string &sandbox = cfg.sandbox;
+  const std::vector<std::string> &run_allowed = cfg.run_allowed;
+
+  std::string op, arg1, arg2;
+  auto sp1 = cmd.find(' ');
+  if (sp1 == std::string::npos) {
+    op = trim(cmd);
+  } else {
+    op = trim(cmd.substr(0, sp1));
+    std::string rest = cmd.substr(sp1 + 1);
+    rest.erase(0, rest.find_first_not_of(" \t"));
+    auto sp2 = rest.find(' ');
+    if (sp2 == std::string::npos) {
+      arg1 = rest;
+    } else {
+      arg1 = rest.substr(0, sp2);
+      arg2 = rest.substr(sp2 + 1);
     }
-    s = out;
   }
 
-  // 5. Decode common HTML entities.
-  static const std::pair<const char*, const char*> ENT[] = {
-    {"&amp;","&"},{"&lt;","<"},{"&gt;",">"},{"&quot;","\""},
-    {"&apos;","'"},{"&nbsp;"," "},{"&mdash;","—"},{"&ndash;","–"},
-    {"&hellip;","…"},{"&#39;","'"},{"&#34;","\""},
-    {nullptr,nullptr}
+  auto resolve = [&](const std::string &p) -> std::string {
+    if (p.empty() || p == ".") {
+      return sandbox;
+    }
+    if (p.substr(0, 2) == "./") {
+      return join_path(sandbox, p.substr(2));
+    }
+    if (p[0] == '/') {
+      return p;
+    }
+    return join_path(sandbox, disclose(disclose(p, '<', '>'), '[', ']'));
   };
-  for (int k = 0; ENT[k].first; ++k) {
-    std::string e = ENT[k].first, r = ENT[k].second;
-    size_t pos = 0;
-    while ((pos = s.find(e, pos)) != std::string::npos)
-      { s.replace(pos, e.size(), r); pos += r.size(); }
+
+  tui.append_line("[tool] → " + op);
+  tui.redraw_all();
+
+  if (op == "TOOL:DATE") {
+    char buf[32]; time_t t = time(nullptr);
+    strftime(buf, sizeof(buf), "%Y-%m-%d", localtime(&t));
+    return buf;
   }
-  // Numeric entities &#NNN; and &#xHHH;
-  {
-    std::string out; out.reserve(s.size());
-    size_t i = 0;
-    while (i < s.size()) {
-      if (s[i]=='&' && i+2<s.size() && s[i+1]=='#') {
-        size_t semi = s.find(';', i+2);
-        if (semi != std::string::npos && semi-i < 10) {
-          std::string num = s.substr(i+2, semi-i-2);
-          try {
-            uint32_t cp = (num[0]=='x'||num[0]=='X')
-              ? (uint32_t)std::stoul(num.substr(1),nullptr,16)
-              : (uint32_t)std::stoul(num);
-            if      (cp < 0x80)  { out += (char)cp; }
-            else if (cp < 0x800) { out += (char)(0xC0|(cp>>6)); out += (char)(0x80|(cp&0x3F)); }
-            else                 { out += (char)(0xE0|(cp>>12)); out += (char)(0x80|((cp>>6)&0x3F)); out += (char)(0x80|(cp&0x3F)); }
-            i = semi+1; continue;
-          } catch (...) {}
-        }
-      }
-      out += s[i++];
+  if (op == "TOOL:TIME") {
+    char buf[32]; time_t t = time(nullptr);
+    strftime(buf, sizeof(buf), "%H:%M:%S", localtime(&t));
+    return buf;
+  }
+  if (op == "TOOL:RND") {
+    return std::to_string((double)rand() / RAND_MAX);
+  }
+  if (op == "TOOL:RAG") {
+    return rag_tool(cfg, arg1);
+  }
+  if (op == "TOOL:LIST") {
+    std::string dir = resolve(arg1);
+    if (!path_in_sandbox(sandbox, dir)) return "ERROR: path outside sandbox";
+    return list_dir(dir);
+  }
+  if (op == "TOOL:EXISTS") {
+    std::string p = resolve(arg1);
+    if (!path_in_sandbox(sandbox, p)) return "NO";
+    return fs::exists(p) ? "YES" : "NO";
+  }
+  if (op == "TOOL:READ") {
+    std::string p = resolve(arg1);
+    if (!path_in_sandbox(sandbox, p)) return "ERROR: path outside sandbox";
+    return read_file(p);
+  }
+  if (op == "TOOL:WRITE") {
+    std::string p = resolve(arg1);
+    if (!path_in_sandbox(sandbox, p)) {
+      return "ERROR: path outside sandbox";
     }
-    s = out;
+    if (!tui.confirm_dialog(std::format("Allow model to write {}?", p))) {
+      return "ERROR: action prevented by user";
+    }
+    std::string content = disclose(strip_code_fences(arg1, arg2), '`', '`');
+    return write_file(p, content) ? "OK: written to " + arg1 : "ERROR: write failed for " + arg1;
   }
-
-  // 6. Collapse whitespace; cap blank lines at 2.
-  {
-    std::string out; out.reserve(s.size());
-    int nl_run = 0; bool last_sp = false;
-    for (char c : s) {
-      if (c == '\r') continue;
-      if (c == '\t') c = ' ';
-      if (c == '\n') { ++nl_run; last_sp=false; if (nl_run<=2) out+='\n'; continue; }
-      nl_run = 0;
-      if (c == ' ') { if (!last_sp) { out+=' '; last_sp=true; } continue; }
-      last_sp = false; out += c;
+  if (op == "TOOL:CURL") {
+    return tool_curl(arg1);
+  }
+  if (op == "TOOL:INTROSPECT") {
+    return introspect(cfg);
+  }
+  if (op == "TOOL:RUN") {
+    std::string prog = resolve(arg1);
+    if (!path_in_sandbox(sandbox, prog)) {
+      return "ERROR: path outside sandbox";
+    }
+    if (!run_allowed.empty()) {
+      std::string basename = fs::path(prog).filename().string();
+      bool permitted = std::any_of(run_allowed.begin(), run_allowed.end(),
+                                   [&](const std::string &a){ return a == basename; });
+      if (!permitted) {
+        return "ERROR: '" + basename + "' is not in the TOOL:RUN allowlist. "
+          "Use /set run_allowed <name> to permit it.";
+      }
+    } else if (!tui.confirm_dialog(std::format("Allow {} to run?", prog))) {
+      return "ERROR: prevented by user";
     }
-    size_t f = out.find_first_not_of(" \n");
-    size_t l = out.find_last_not_of(" \n");
-    s = (f == std::string::npos) ? "" : out.substr(f, l-f+1);
+    std::string command = prog + " " + arg2 + " 2>&1";
+    FILE *fp = popen(command.c_str(), "r");
+    if (!fp) {
+      return "ERROR: popen failed";
+    }
+    std::string out;
+    char buf[256];
+    while (fgets(buf, sizeof(buf), fp)) {
+      out += buf;
+    }
+    pclose(fp);
+    if (out.size() > 4096) {
+      out = out.substr(0, 4096) + "\n…(truncated)";
+    }
+    return out;
   }
-  return s;
+  return "ERROR: unknown tool: [" + op + "]";
 }
 
 //
-// TOOL:CURL
+// Agent turn
 //
-static size_t curl_write_cb(void *contents, size_t size, size_t nmemb, void *userp) {
-  std::string *buf = static_cast<std::string *>(userp);
-  size_t total = size * nmemb;
-  static constexpr size_t MAX_BODY = 32 * 1024;
-  if (buf->size() < MAX_BODY) {
-    size_t room = MAX_BODY - buf->size();
-    buf->append(static_cast<char *>(contents), std::min(total, room));
+bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cfg, TuiState &tui) {
+  if (!model_loaded) {
+    tui.append_line("[err] No model loaded. Use /model <path>");
+    tui.redraw_all();
+    return false;
   }
-  return total;
-}
-
-static std::string tool_curl(const std::string &url) {
-  if (url.empty()) return "ERROR: TOOL:CURL requires a URL argument";
-  CURL *curl = curl_easy_init();
-  if (!curl) return "ERROR: curl_easy_init failed";
-  std::string body;
-  body.reserve(4096);
-  curl_easy_setopt(curl, CURLOPT_URL,            url.c_str());
-  curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION,  curl_write_cb);
-  curl_easy_setopt(curl, CURLOPT_WRITEDATA,      &body);
-  curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1L);
-  curl_easy_setopt(curl, CURLOPT_MAXREDIRS,      5L);
-  curl_easy_setopt(curl, CURLOPT_TIMEOUT,        15L);
-  curl_easy_setopt(curl, CURLOPT_USERAGENT,      "nitro/1.0");
-  // Accept compressed responses; curl will decompress automatically.
-  curl_easy_setopt(curl, CURLOPT_ACCEPT_ENCODING, "");
-
-  CURLcode res = curl_easy_perform(curl);
-  long http_code = 0;
-  curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code);
-
-  // Query content-type before cleanup (pointer is only valid while handle lives).
-  char *ct_raw = nullptr;
-  curl_easy_getinfo(curl, CURLINFO_CONTENT_TYPE, &ct_raw);
-  std::string content_type = ct_raw ? ct_raw : "";
-  std::transform(content_type.begin(), content_type.end(),
-                 content_type.begin(), ::tolower);
-  curl_easy_cleanup(curl);
-  if (res != CURLE_OK) {
-    return std::string("ERROR: curl: ") + curl_easy_strerror(res);
+  std::string effective_message = user_message;
+  if (embed_llama && rag_db && rag_session) {
+    std::string context = embed_llama->rag_retrieve(*rag_db, user_message, cfg.rag_top_k, *rag_session);
+    if (!context.empty()) {
+      log_write("RAG: %s", context.c_str());
+      effective_message = "Context:\n" + context + "\n\nUser: " + user_message;
+    } else {
+      log_write("RAG: no context found [%s]", embed_llama->last_error());
+    }
   }
-  if (http_code >= 400) {
-    return "ERROR: HTTP " + std::to_string(http_code) + " from " + url;
+  if (!iter) {
+    tui.append_line("[err] Conversation not initialised (call /clear to reset)");
+    tui.redraw_all();
+    return false;
   }
-  if (body.empty()) {
-    return "(empty response)";
+  if (!llama->add_message(*iter, "user", effective_message)) {
+    tui.append_line(std::string("[err] add_message: ") + llama->last_error());
+    tui.redraw_all();
+    return false;
   }
+  tui.append_line("Nitro: ");
 
-  // Strip HTML tags so the model receives clean plain text.
-  bool is_html = (content_type.find("text/html") != std::string::npos)
-    || (body.size() > 5 && body.substr(0,5) == "<!DOC")
-    || (body.size() > 6 && body.substr(0,6) == "<html>");
-  if (is_html) body = html_to_text(body);
+  // in_think starts false — models that don't use <think> blocks emit
+  // visible text immediately.  The spinner activates only while thinking.
+  enum {t_init, t_think, t_thunk} think_mode = t_init;
+  tui.set_thinking(false);
+  std::string buffer;
 
-  return body;
-}
+  auto invoke_tool = [&](const std::string &tool, const std::string_view template_str) -> void {
+    std::string result = process_tool(tool, cfg, tui);
+    std::string content = std::vformat(template_str, std::make_format_args(result));
+    log_write("tool: [%s] result: [%s]", tool.c_str(), result.c_str());
+    if (!llama->add_message(*iter, "tool_result", content)) {
+      tui.append_line(std::string("[err] tool result inject: ") + llama->last_error());
+      tui.redraw_all();
+    }
+    if (!iter->_has_next) {
+      tui.append_line(std::string("[err] failed to evoke tool response: ") + llama->last_error());
+      tui.redraw_all();
+    }
+  };
 
-//
-// System prompt
-//
-static std::string build_system_prompt(const std::vector<std::string> &knowledge_files,
-                                       const std::string &sandbox) {
-  std::string p;
-  p += "You are Nitro, an agentic AI assistant for software development.\n"
-    "Your sandbox (project directory) is: " + sandbox + "\n\n"
-    "## Tool protocol\n"
-    " - Emit tool calls on their own new line. for example:\n\n"
-    "TOOL:LIST\n"
-    " - The host executes the tool and returns TOOL_RESULT: <value> on the next line.\n\n"
-    "Available tools:\n"
-    "  TOOL:LIST   [dir]          list files (default: sandbox root)\n"
-    "  TOOL:READ   <file>         read file contents\n"
-    "  TOOL:WRITE  <file> <text>  write text to file\n"
-    "  TOOL:EXISTS <file>         YES or NO\n"
-    "  TOOL:RUN    <prog> [args]  run program inside sandbox\n"
-    "  TOOL:DATE                  current date\n"
-    "  TOOL:TIME                  current time\n"
-    "  TOOL:RND                   random float\n"
-    "  TOOL:RAG    <query>        query the RAG index for additional context\n"
-    "  TOOL:INTROSPECT            introspect your settings, top_k etc\n"
-    "  TOOL:CURL   <url>          HTTP GET; returns response body (max 32 KB)\n\n"
-    "Rules:\n"
-    "- Never access files outside the sandbox.\n"
-    "- Only use one TOOL at a time. Never combine, always use each tool step by step\n"
-    "- Use TOOL:CURL to fetch documentation, APIs, or web content you need.\n"
-    "- Reason step-by-step inside <|think|> </|think|> (hidden from user).\n"
-    "- After each tool call, explain what you did in plain English.\n\n";
-  for (const auto &kf : knowledge_files) {
-    auto path = join_path(sandbox, kf);
-    std::ifstream f(path);
-    if (!f) {
-      continue;
+  auto start_think = [&](const std::string &tag) {
+    if (think_mode == t_init) {
+      auto pos = buffer.find(tag);
+      if (pos != std::string::npos) {
+        think_mode = t_think;
+        tui.set_thinking(true);
+        // display prededing text
+        buffer = buffer.substr(0, pos);
+      }
+    }
+  };
+
+  auto end_think = [&](const std::string &tag) {
+    if (think_mode == t_think) {
+      auto pos = buffer.find(tag);
+      if (pos != std::string::npos) {
+        think_mode = t_thunk;
+        tui.set_thinking(false);
+        // display remaining text
+        buffer = buffer.substr(pos + tag.length());
+      }
+    }
+  };
+
+  while (iter->_has_next) {
+    ncinput ni{};
+    notcurses_get_nblock(tui.nc, &ni);
+    if (ni.id == NCKEY_ESC) {
+      tui.set_thinking(false);
+      tui.append_line("[err] Generation cancelled by user (Escape)");
+      tui.redraw_all();
+      return false;
+    }
+    std::string tok = llama->next(*iter);
+    buffer += tok;
+    if (think_mode == t_init) {
+      start_think("<think>");
+      start_think("<|think|>");
+      start_think("<think|>");
+      start_think("<|channel>thought");
+    }
+    if (think_mode == t_think) {
+      tui.tick_spinner();
+      end_think("</think>");
+      end_think("</|think|>");
+      end_think("<think|>");
+      end_think("<channel|>");
+    }
+    if (think_mode == t_thunk) {
+      auto tool_start = buffer.find("TOOL:");
+      if (tool_start == 0) {
+        // fetch all remaining tokens
+        invoke_tool(trim(buffer + llama->all(*iter)), "TOOL_RESULT: {}");
+        buffer.clear();
+        think_mode = t_init;
+        continue;
+      }
+      // see https://ai.google.dev/gemma/docs/core/prompt-formatting-gemma4
+      tool_start = buffer.find("<|tool_call>call:");
+      if (tool_start != std::string::npos) {
+        buffer += llama->all(*iter);
+        auto pos = buffer.find_last_not_of("}<tool_call|>");
+        if (pos != std::string::npos) {
+          buffer = buffer.substr(0, pos);
+        }
+        pos = buffer.find_first_not_of("{");
+        if (pos != std::string::npos) {
+          buffer = buffer.substr(0, pos) + buffer.substr(pos + 1);
+        }
+        invoke_tool(trim(buffer), "<|tool_response>{}<tool_response|>");
+        buffer.clear();
+        think_mode = t_init;
+        continue;
+      }
+      auto pos = buffer.find('\n');
+      if (pos != std::string::npos) {
+        tui.append_token(buffer.substr(0, pos + 1));
+        buffer = buffer.substr(pos + 1);
+      }
     }
-    log_write("loaded [%s]", path.c_str());
-    std::ostringstream oss;
-    oss << f.rdbuf();
-    p += "## Knowledge: " + kf + "\n" + oss.str() + "\n\n";
   }
-  return p;
+
+  if (!buffer.empty()) {
+    tui.append_token(buffer + "\n");
+  }
+
+  tui.flush_token_acc();
+  tui.set_thinking(false);
+  tui.tokens_per_sec = tokens_per_sec();
+  LlamaMemoryInfo mem = llama->memory_info();
+  tui.kv_used    = mem.kv_used;
+  tui.kv_total   = mem.kv_total;
+  tui.vram_used  = mem.vram_used;
+  tui.vram_total = mem.vram_total;
+  char stat[128];
+  std::snprintf(stat, sizeof(stat), "[sys] %.1f tok/s  (%d tokens)  KV %.1f%%",
+                (double)tui.tokens_per_sec,
+                iter->_tokens_generated,
+                (double)mem.kv_percent);
+  tui.append_line(stat);
+  tui.redraw_all();
+  return true;
 }
 
 //
@@ -2127,6 +2118,8 @@ static void handle_slash(const std::string &input,
       tui.redraw_all();
       agent.rag_index(path, cfg, tui);
     }
+    tui.append_line("[sys] done");
+    tui.redraw_all();
     return;
   }
 
@@ -2358,8 +2351,9 @@ int main(int argc, char **argv) {
       std::string sysp = build_system_prompt(cfg.knowledge_files, cfg.sandbox);
       agent.reset_conversation(sysp, tui);
     }
-    if (!cfg.embed_path.empty())
+    if (!cfg.embed_path.empty()) {
       agent.setup_embed(cfg.embed_path, tui);
+    }
   } else {
     tui.append_line("[sys] No model specified.  Use /model to open the file picker,");
     tui.append_line("[sys] or /model <path> to load directly.");

From 802e78ee61395b95d903652d07146ed1c8d2ca40 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Thu, 28 May 2026 17:42:47 +0930
Subject: [PATCH 47/54] LLAMA: nitro, apply clion suggestions, implement
 thinking animation

---
 llama/nitro.cpp | 209 +++++++++++++++++++++++++-----------------------
 1 file changed, 108 insertions(+), 101 deletions(-)

diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index 06d8293..d1b1b2b 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -46,7 +46,6 @@
 #include <fstream>
 #include <memory>
 #include <mutex>
-#include <optional>
 #include <sstream>
 #include <string>
 #include <vector>
@@ -240,16 +239,16 @@ struct TuiState {
   void destroy();
   void resize();
   // ── draw ──────────────────────────────────────────────────────────
-  void redraw_header();
+  void redraw_header() const;
   void redraw_chat();
-  void redraw_input();
+  void redraw_input() const;
   void redraw_all();
   // ── content helpers ───────────────────────────────────────────────
   void append_line(const std::string &line);
   void append_token(const std::string &token);
   void flush_token_acc();
   // ── interaction ───────────────────────────────────────────────────
-  bool confirm_dialog(const std::string &prompt);
+  bool confirm_dialog(const std::string &prompt) const;
   // Blocking readline with history navigation, cursor, arrow-key scrolling.
   std::string readline_blocking();
   // Modal popup overlay while a long operation runs.
@@ -270,9 +269,9 @@ struct TuiState {
   // "Model File", "Embedding Model").
   // Returns the selected path, or empty string if the user cancelled.
   std::string file_picker(const std::string &start_dir,
-                          const std::string &title_hint = "File");
+                          const std::string &title_hint = "File") const;
   // Legacy alias kept for callers that used the old name.
-  std::string rag_folder_picker(const std::string &start_dir) {
+  std::string rag_folder_picker(const std::string &start_dir) const {
     return file_picker(start_dir, "RAG Folder");
   }
 };
@@ -289,16 +288,16 @@ struct AgentState {
   bool model_loaded = false;
   std::string system_prompt;
 
-  bool rag_index(const std::string &path, const NitroConfig &cfg, TuiState &tui);
-  bool rag_load_index(const std::string &path, TuiState &tui);
-  bool run_turn(const std::string &user_message, const NitroConfig &cfg, TuiState &tui);
+  bool rag_index(const std::string &path, const NitroConfig &cfg, TuiState &tui) const;
+  bool rag_load_index(const std::string &path, TuiState &tui) const;
+  bool run_turn(const std::string &user_message, const NitroConfig &cfg, TuiState &tui) const;
   bool setup_embed(const std::string &path, TuiState &tui);
   bool setup_model(const NitroConfig &cfg, TuiState &tui);
-  void apply_generation_params(const NitroConfig &cfg);
+  void apply_generation_params(const NitroConfig &cfg) const;
   void reset_conversation(const std::string &sysprompt, TuiState &tui);
-  std::string memory_info_text();
-  std::string process_tool(const std::string &cmd, const NitroConfig &cfg, TuiState &tui);
-  std::string rag_tool(const NitroConfig &cfg, const std::string &agent_query);
+  std::string memory_info_text() const;
+  std::string process_tool(const std::string &cmd, const NitroConfig &cfg, TuiState &tui) const;
+  std::string rag_tool(const NitroConfig &cfg, const std::string &agent_query) const;
   float tokens_per_sec() const;
 };
 
@@ -336,7 +335,7 @@ static void log_write(const char *fmt, ...) {
 }
 
 //
-// constant for strip_code_fences
+// handling for strip_code_fences
 //
 static const std::vector<std::string> CODE_EXTENSIONS = {
   ".py",".c",".cpp",".h",".bas",".java",".html",".js",".ts",
@@ -481,22 +480,6 @@ static void load_settings(NitroConfig &cfg) {
   settings_get_float(json, "penalty_repeat", cfg.penalty_repeat);
 }
 
-// Escape a string for embedding in JSON.
-static std::string json_escape(const std::string &s) {
-  std::string out;
-  out.reserve(s.size() + 4);
-  for (char c : s) {
-    switch (c) {
-    case '"':  out += "\\\""; break;
-    case '\\': out += "\\\\"; break;
-    case '\n': out += "\\n";  break;
-    case '\t': out += "\\t";  break;
-    default:   out += c;      break;
-    }
-  }
-  return out;
-}
-
 static std::string introspect(const NitroConfig &cfg) {
   static constexpr std::string_view tmpl =
     "{{\n"
@@ -553,7 +536,7 @@ static bool save_settings(const NitroConfig &cfg) {
 // Trims whitespace from both ends of a string
 //
 static std::string trim(std::string_view str) {
-  const std::string_view whitespace = " \t\n\r\f\v";
+  constexpr std::string_view whitespace = " \t\n\r\f\v";
 
   // Find the first non-whitespace character
   const auto start = str.find_first_not_of(whitespace);
@@ -594,10 +577,6 @@ static constexpr uint32_t BG_CHAT_R = 18,  BG_CHAT_G = 22,  BG_CHAT_B = 30;
 static constexpr uint32_t BG_INP_R  = 22,  BG_INP_G  = 28,  BG_INP_B  = 38;
 static constexpr uint32_t BG_HDR_R  = 30,  BG_HDR_G  = 40,  BG_HDR_B  = 55;
 
-static inline uint64_t fg_rgb(uint32_t r, uint32_t g, uint32_t b) {
-  return NCCHANNELS_INITIALIZER(r, g, b, 0, 0, 0);
-}
-
 static inline uint64_t chat_ch(uint32_t r, uint32_t g, uint32_t b) {
   return NCCHANNELS_INITIALIZER(r, g, b, BG_CHAT_R, BG_CHAT_G, BG_CHAT_B);
 }
@@ -711,8 +690,8 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
 static std::string strip_code_fences(const std::string &filename,
                                      const std::string &src) {
   auto ext = fs::path(filename).extension().string();
-  bool is_code = std::any_of(CODE_EXTENSIONS.begin(), CODE_EXTENSIONS.end(),
-                             [&](const std::string &e){ return ext == e; });
+  bool is_code = ranges::any_of(CODE_EXTENSIONS,
+                                [&](const std::string &e){ return ext == e; });
   if (!is_code) return src;
   auto pos = src.find("```");
   if (pos == std::string::npos) return src;
@@ -728,8 +707,8 @@ static std::string strip_code_fences(const std::string &filename,
 // TOOL:CURL
 //
 static size_t curl_write_cb(void *contents, size_t size, size_t nmemb, void *userp) {
-  std::string *buf = static_cast<std::string *>(userp);
-  size_t total = size * nmemb;
+  auto *buf = static_cast<std::string *>(userp);
+  auto total = size * nmemb;
   static constexpr size_t MAX_BODY = 32 * 1024;
   if (buf->size() < MAX_BODY) {
     size_t room = MAX_BODY - buf->size();
@@ -754,7 +733,7 @@ static std::string html_to_text(const std::string &html) {
   // 1. Remove <head>…</head>
   {
     std::string lo = s;
-    std::transform(lo.begin(), lo.end(), lo.begin(), ::tolower);
+    ranges::transform(lo, lo.begin(), ::tolower);
     auto p0 = lo.find("<head");
     auto p1 = lo.find("</head>");
     if (p0 != std::string::npos && p1 != std::string::npos)
@@ -766,7 +745,7 @@ static std::string html_to_text(const std::string &html) {
     std::string open  = "<" + tag;
     std::string close = "</" + tag + ">";
     std::string lo = s;
-    std::transform(lo.begin(), lo.end(), lo.begin(), ::tolower);
+    ranges::transform(lo, lo.begin(), ::tolower);
     for (;;) {
       auto p0 = lo.find(open);
       if (p0 == std::string::npos) break;
@@ -793,7 +772,7 @@ static std::string html_to_text(const std::string &html) {
       std::string inner = s.substr(i + 1, ce - i - 1);
       size_t sp = inner.find_first_of(" \t/\r\n");
       std::string name = (sp != std::string::npos) ? inner.substr(0, sp) : inner;
-      std::transform(name.begin(), name.end(), name.begin(), ::tolower);
+      ranges::transform(name, name.begin(), ::tolower);
       for (int k = 0; BLOCK[k]; ++k) {
         if (name == BLOCK[k]) {
           out += '\n'; break;
@@ -897,8 +876,8 @@ static std::string tool_curl(const std::string &url) {
   char *ct_raw = nullptr;
   curl_easy_getinfo(curl, CURLINFO_CONTENT_TYPE, &ct_raw);
   std::string content_type = ct_raw ? ct_raw : "";
-  std::transform(content_type.begin(), content_type.end(),
-                 content_type.begin(), ::tolower);
+  ranges::transform(content_type,
+                    content_type.begin(), ::tolower);
   curl_easy_cleanup(curl);
   if (res != CURLE_OK) {
     return std::string("ERROR: curl: ") + curl_easy_strerror(res);
@@ -978,13 +957,14 @@ void TuiState::resize() {
 //
 // TuiState::redraw
 //
-void TuiState::redraw_header() {
+void TuiState::redraw_header() const {
   ncplane_erase(header);
   ncplane_set_base(header, " ", 0,
                    NCCHANNELS_INITIALIZER(BG_HDR_R, BG_HDR_G, BG_HDR_B,
                                           BG_HDR_R, BG_HDR_G, BG_HDR_B));
   float kv_pct   = kv_total   > 0 ? 100.f * (float)kv_used   / (float)kv_total   : 0.f;
   float vram_pct = vram_total  > 0 ? 100.f * (float)vram_used / (float)vram_total : 0.f;
+
   static const char *const SPIN[] = { "⣾","⣽","⣻","⢿","⡿","⣟","⣯","⣷" };
   const char *spin_str = thinking ? SPIN[spinner_frame % 8] : " ";
   char buf[512];
@@ -1036,38 +1016,61 @@ void TuiState::redraw_chat() {
   }
 }
 
-void TuiState::redraw_input() {
+void TuiState::redraw_input() const {
   ncplane_erase(inputpl);
-  ncplane_set_channels(inputpl, inp_ch(80, 120, 160));
-  std::string sep(term_cols, '-');
-  ncplane_putstr_yx(inputpl, 0, 0, sep.c_str());
-  const std::string prompt = " ❯ ";
-  const int prompt_cols = 4;
-  ncplane_set_channels(inputpl, inp_ch(100, 210, 255));
-  ncplane_putstr_yx(inputpl, 1, 0, prompt.c_str());
-  int max_w = std::max(0, term_cols - prompt_cols - 1);
-  std::string visible = input_buf;
-  int view_offset = 0;
-  if ((int)visible.size() > max_w && max_w > 0) {
-    view_offset = (int)visible.size() - max_w;
-    visible = visible.substr(view_offset);
-  }
-  int cur_in_view = std::max(0, (int)cursor_pos - view_offset);
-  cur_in_view = std::min(cur_in_view, (int)visible.size());
-  std::string before = visible.substr(0, cur_in_view);
-  std::string after  = cur_in_view < (int)visible.size()
-    ? visible.substr(cur_in_view + 1) : "";
-  char cursor_ch_val = cur_in_view < (int)visible.size()
-    ? visible[cur_in_view] : ' ';
-  ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
-  ncplane_putstr_yx(inputpl, 1, prompt_cols, before.c_str());
-  int cx = prompt_cols + cur_in_view;
-  ncplane_set_channels(inputpl, NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B, 180, 230, 255));
-  char cbuf[2] = { cursor_ch_val, '\0' };
-  ncplane_putstr_yx(inputpl, 1, cx, cbuf);
-  ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
-  if (!after.empty()) {
-    ncplane_putstr_yx(inputpl, 1, cx + 1, after.c_str());
+
+  if (thinking) {
+    static constexpr const char *ROBOT_RIGHT = "🤖➡";
+    static constexpr const char *ROBOT_LEFT  = "⬅🤖";
+    // 15 steps each way = 20 frame cycle
+    static constexpr int STEPS = 15;
+    int cycle = spinner_frame % (STEPS * 2);
+    bool going_right = (cycle < STEPS);
+    int pos = going_right ? cycle : (STEPS * 2 - 1 - cycle);
+
+    std::string sep(term_cols, '-');
+    // blank 4 cols to fit robot + arrow (each emoji is 2 cols wide)
+    for (int i = pos; i < std::min(pos + 4, term_cols); ++i) sep[i] = ' ';
+    ncplane_set_channels(inputpl, inp_ch(80, 120, 160));
+    ncplane_putstr_yx(inputpl, 0, 0, sep.c_str());
+
+    ncplane_set_channels(inputpl, NCCHANNELS_INITIALIZER(255, 220, 80, BG_INP_R, BG_INP_G, BG_INP_B));
+    ncplane_putstr_yx(inputpl, 0, pos, going_right ? ROBOT_RIGHT : ROBOT_LEFT);
+
+    ncplane_set_channels(inputpl, inp_ch(140, 140, 180));
+    ncplane_putstr_yx(inputpl, 1, 2, "thinking…");
+  } else {
+    ncplane_set_channels(inputpl, inp_ch(80, 120, 160));
+    std::string sep(term_cols, '-');
+    ncplane_putstr_yx(inputpl, 0, 0, sep.c_str());
+    const std::string prompt = " ❯ ";
+    const int prompt_cols = 4;
+    ncplane_set_channels(inputpl, inp_ch(100, 210, 255));
+    ncplane_putstr_yx(inputpl, 1, 0, prompt.c_str());
+    int max_w = std::max(0, term_cols - prompt_cols - 1);
+    std::string visible = input_buf;
+    int view_offset = 0;
+    if ((int)visible.size() > max_w && max_w > 0) {
+      view_offset = (int)visible.size() - max_w;
+      visible = visible.substr(view_offset);
+    }
+    int cur_in_view = std::max(0, (int)cursor_pos - view_offset);
+    cur_in_view = std::min(cur_in_view, (int)visible.size());
+    std::string before = visible.substr(0, cur_in_view);
+    std::string after  = cur_in_view < (int)visible.size()
+      ? visible.substr(cur_in_view + 1) : "";
+    char cursor_ch_val = cur_in_view < (int)visible.size()
+      ? visible[cur_in_view] : ' ';
+    ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
+    ncplane_putstr_yx(inputpl, 1, prompt_cols, before.c_str());
+    int cx = prompt_cols + cur_in_view;
+    ncplane_set_channels(inputpl, NCCHANNELS_INITIALIZER(BG_INP_R, BG_INP_G, BG_INP_B, 180, 230, 255));
+    char cbuf[2] = { cursor_ch_val, '\0' };
+    ncplane_putstr_yx(inputpl, 1, cx, cbuf);
+    ncplane_set_channels(inputpl, inp_ch(230, 230, 230));
+    if (!after.empty()) {
+      ncplane_putstr_yx(inputpl, 1, cx + 1, after.c_str());
+    }
   }
 }
 
@@ -1081,6 +1084,7 @@ void TuiState::redraw_all() {
 void TuiState::tick_spinner() {
   ++spinner_frame;
   redraw_header();
+  redraw_input();
   notcurses_render(nc);
 }
 
@@ -1088,6 +1092,7 @@ void TuiState::set_thinking(bool on) {
   thinking = on;
   if (!on) spinner_frame = 0;
   redraw_header();
+  redraw_input();
   notcurses_render(nc);
 }
 
@@ -1242,7 +1247,7 @@ void TuiState::dismiss_modal_popup() {
 // Returns the chosen path, or "" on cancel.
 //
 std::string TuiState::file_picker(const std::string &start_dir,
-                                  const std::string &title_hint) {
+                                  const std::string &title_hint) const {
   std::string current_dir = start_dir;
   {
     std::error_code ec;
@@ -1250,12 +1255,13 @@ std::string TuiState::file_picker(const std::string &start_dir,
     if (!ec) current_dir = canon.string();
   }
   auto load_entries = [](const std::string &dir,
-                         std::vector<std::string> &entries) {
+                         std::vector<std::string> &entries)
+  {
     entries.clear();
     std::error_code ec;
     if (fs::path(dir).has_parent_path() &&
         fs::path(dir) != fs::path(dir).root_path())
-      entries.push_back("..");
+      entries.emplace_back("..");
     std::vector<std::string> dirs, files;
     for (const auto &e : fs::directory_iterator(dir, ec)) {
       if (ec) break;
@@ -1264,8 +1270,8 @@ std::string TuiState::file_picker(const std::string &start_dir,
       if (e.is_directory()) dirs.push_back(name);
       else                  files.push_back(name);
     }
-    std::sort(dirs.begin(), dirs.end());
-    std::sort(files.begin(), files.end());
+    ranges::sort(dirs);
+    ranges::sort(files);
     for (auto &d : dirs)  entries.push_back(d + "/");
     for (auto &f : files) entries.push_back(f);
   };
@@ -1427,7 +1433,7 @@ std::string TuiState::file_picker(const std::string &start_dir,
 //
 // ─── TuiState::confirm_dialog ─────────────────────────────────────────────
 //
-bool TuiState::confirm_dialog(const std::string &prompt) {
+bool TuiState::confirm_dialog(const std::string &prompt) const {
   ncplane_erase(inputpl);
   ncplane_set_channels(inputpl, inp_ch(255, 200, 80));
   std::string msg = " " + prompt + " [y/n] ❯ ";
@@ -1446,7 +1452,7 @@ bool TuiState::confirm_dialog(const std::string &prompt) {
     notcurses_render(nc);
   }
   std::string lo = answer;
-  std::transform(lo.begin(), lo.end(), lo.begin(), ::tolower);
+  ranges::transform(lo, lo.begin(), ::tolower);
   redraw_input();
   notcurses_render(nc);
   return (lo == "y" || lo == "yes" || lo == "sure" || lo == "k");
@@ -1486,7 +1492,7 @@ std::string TuiState::readline_blocking() {
       // Entering history from a fresh prompt: save current text as draft.
       std::string hist_entry;
       if (history.up(hist_entry)) {
-        if (input_buf.size() > 0 && hist_entry != input_buf) {
+        if (!input_buf.empty() && hist_entry != input_buf) {
           // Only save draft when we first leave the bottom of history.
           // (history.reset_nav was called on entry so the first Up call
           //  always comes from the "new input" position.)
@@ -1502,8 +1508,7 @@ std::string TuiState::readline_blocking() {
 
     if (ni.id == NCKEY_DOWN) {
       std::string hist_entry;
-      bool got = history.down(hist_entry);
-      if (got) {
+      if (history.down(hist_entry)) {
         input_buf  = hist_entry;
         cursor_pos = input_buf.size();
       } else {
@@ -1581,7 +1586,7 @@ std::string TuiState::readline_blocking() {
   }
 }
 
-void AgentState::apply_generation_params(const NitroConfig &cfg) {
+void AgentState::apply_generation_params(const NitroConfig &cfg) const {
   llama->add_stop("<|turn|>");
   llama->add_stop("<|im_end|>");
   llama->set_max_tokens(cfg.n_max_tokens);
@@ -1672,7 +1677,7 @@ float AgentState::tokens_per_sec() const {
   return (float)(iter->_tokens_generated / elapsed);
 }
 
-std::string AgentState::memory_info_text() {
+std::string AgentState::memory_info_text() const {
   if (!model_loaded) return "No model loaded.";
   LlamaMemoryInfo m = llama->memory_info();
   std::ostringstream oss;
@@ -1688,7 +1693,7 @@ std::string AgentState::memory_info_text() {
   return oss.str();
 }
 
-std::string AgentState::rag_tool(const NitroConfig &cfg, const std::string &agent_query) {
+std::string AgentState::rag_tool(const NitroConfig &cfg, const std::string &agent_query) const {
   std::string result;
   if (embed_llama && rag_db && rag_session) {
     result = embed_llama->rag_retrieve(*rag_db, agent_query, cfg.rag_top_k, *rag_session);
@@ -1701,7 +1706,7 @@ std::string AgentState::rag_tool(const NitroConfig &cfg, const std::string &agen
   return result;
 }
 
-bool AgentState::rag_load_index(const std::string &path, TuiState &tui) {
+bool AgentState::rag_load_index(const std::string &path, TuiState &tui) const {
   if (!embed_llama || !rag_db) {
     tui.append_line("[err] Load an embedding model first: /embed <path>");
     tui.redraw_all();
@@ -1716,7 +1721,7 @@ bool AgentState::rag_load_index(const std::string &path, TuiState &tui) {
   return true;
 }
 
-bool AgentState::rag_index(const std::string &path, const NitroConfig &cfg, TuiState &tui) {
+bool AgentState::rag_index(const std::string &path, const NitroConfig &cfg, TuiState &tui) const {
   if (!embed_llama || !rag_db) {
     tui.append_line("[err] Load an embedding model first: /embed <path>");
     tui.redraw_all();
@@ -1758,7 +1763,7 @@ bool AgentState::rag_index(const std::string &path, const NitroConfig &cfg, TuiS
 //
 // Tool dispatch
 //
-std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &cfg, TuiState &tui) {
+std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &cfg, TuiState &tui) const {
   const std::string &sandbox = cfg.sandbox;
   const std::vector<std::string> &run_allowed = cfg.run_allowed;
 
@@ -1850,8 +1855,8 @@ std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &
     }
     if (!run_allowed.empty()) {
       std::string basename = fs::path(prog).filename().string();
-      bool permitted = std::any_of(run_allowed.begin(), run_allowed.end(),
-                                   [&](const std::string &a){ return a == basename; });
+      bool permitted = ranges::any_of(run_allowed,
+                                      [&](const std::string &a){ return a == basename; });
       if (!permitted) {
         return "ERROR: '" + basename + "' is not in the TOOL:RUN allowlist. "
           "Use /set run_allowed <name> to permit it.";
@@ -1881,7 +1886,7 @@ std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &
 //
 // Agent turn
 //
-bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cfg, TuiState &tui) {
+bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cfg, TuiState &tui) const {
   if (!model_loaded) {
     tui.append_line("[err] No model loaded. Use /model <path>");
     tui.redraw_all();
@@ -1916,6 +1921,7 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
   std::string buffer;
 
   auto invoke_tool = [&](const std::string &tool, const std::string_view template_str) -> void {
+    log_write("tool request: [%s]", tool.c_str());
     std::string result = process_tool(tool, cfg, tui);
     std::string content = std::vformat(template_str, std::make_format_args(result));
     log_write("tool: [%s] result: [%s]", tool.c_str(), result.c_str());
@@ -1994,7 +2000,7 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
         if (pos != std::string::npos) {
           buffer = buffer.substr(0, pos);
         }
-        pos = buffer.find_first_not_of("{");
+        pos = buffer.find_first_not_of('{');
         if (pos != std::string::npos) {
           buffer = buffer.substr(0, pos) + buffer.substr(pos + 1);
         }
@@ -2258,14 +2264,15 @@ int main(int argc, char **argv) {
   // ── Parse arguments (command-line overrides saved settings) ──────
   load_settings(cfg);
   auto resolve_path = [](const std::string &arg) -> std::string {
-    std::error_code ec;
     if (arg.substr(0, 2) == "~/") {
       const char *home = getenv("HOME");
       return std::string(home ? home : ".") + "/" + arg.substr(2);
     }
-    if (arg.substr(0, 2) == "./") {
-      return (fs::current_path(ec) / arg.substr(2)).string();
-    }
+    if (arg.substr(0, 2) == "./")
+      {
+        std::error_code ec;
+        return (fs::current_path(ec) / arg.substr(2)).string();
+      }
     return arg;
   };
 
@@ -2329,7 +2336,7 @@ int main(int argc, char **argv) {
 
   // ── Auto-discover knowledge files ─────────────────────────────────
   for (const char *kf : {"nitro.md", "AGENTS.md", "README.md"}) {
-    if (fs::exists(kf)) cfg.knowledge_files.push_back(kf);
+    if (fs::exists(kf)) cfg.knowledge_files.emplace_back(kf);
   }
 
   // ── Init curl globally ────────────────────────────────────────────

From 66c28757ad17c28317ae003543c22a61aeefd063 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Thu, 28 May 2026 21:24:02 +0930
Subject: [PATCH 48/54] LLAMA: nitro added mkdir tool

---
 llama/nitro.cpp | 41 +++++++++++++++++++++++++++++------------
 1 file changed, 29 insertions(+), 12 deletions(-)

diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index d1b1b2b..9b47e20 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -643,6 +643,21 @@ static bool write_file(const std::string &path, const std::string &data) {
   return f.good();
 }
 
+static bool make_dir(const std::string &path) {
+  try {
+    std::filesystem::path p(path);
+    if (fs::exists(p)) {
+      return true;
+    }
+    std::error_code ec;
+    return fs::create_directories(p, ec);
+  }
+  catch (const std::filesystem::filesystem_error &e) {
+    log_write("mkdir failed [%s]", e.what());
+    return false;
+  }
+}
+
 //
 // System prompt
 //
@@ -659,6 +674,7 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
     "  TOOL:LIST   [dir]          list files (default: sandbox root)\n"
     "  TOOL:READ   <file>         read file contents\n"
     "  TOOL:WRITE  <file> <text>  write text to file\n"
+    "  TOOL:MKDIR  <dir>          create a subfolder inside the sandbox\n"
     "  TOOL:EXISTS <file>         YES or NO\n"
     "  TOOL:RUN    <prog> [args]  run program inside sandbox\n"
     "  TOOL:DATE                  current date\n"
@@ -1768,7 +1784,7 @@ std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &
   const std::vector<std::string> &run_allowed = cfg.run_allowed;
 
   std::string op, arg1, arg2;
-  auto sp1 = cmd.find(' ');
+  auto sp1 = cmd.find_first_of(" \n");
   if (sp1 == std::string::npos) {
     op = trim(cmd);
   } else {
@@ -1839,9 +1855,16 @@ std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &
     if (!tui.confirm_dialog(std::format("Allow model to write {}?", p))) {
       return "ERROR: action prevented by user";
     }
-    std::string content = disclose(strip_code_fences(arg1, arg2), '`', '`');
+    std::string content = disclose(disclose(strip_code_fences(arg1, arg2), '`', '`'), '"', '"');
     return write_file(p, content) ? "OK: written to " + arg1 : "ERROR: write failed for " + arg1;
   }
+  if (op == "TOOL:MKDIR") {
+    std::string p = resolve(arg1);
+    if (!path_in_sandbox(sandbox, p)) {
+      return "ERROR: path outside sandbox";
+    }
+    return make_dir(p) ? "OK: created " + arg1 : "ERROR: mkdir failed for " + arg1;
+  }
   if (op == "TOOL:CURL") {
     return tool_curl(arg1);
   }
@@ -1849,22 +1872,16 @@ std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &
     return introspect(cfg);
   }
   if (op == "TOOL:RUN") {
-    std::string prog = resolve(arg1);
-    if (!path_in_sandbox(sandbox, prog)) {
-      return "ERROR: path outside sandbox";
-    }
     if (!run_allowed.empty()) {
-      std::string basename = fs::path(prog).filename().string();
-      bool permitted = ranges::any_of(run_allowed,
-                                      [&](const std::string &a){ return a == basename; });
+      bool permitted = ranges::any_of(run_allowed, [&](const std::string &a) {return a == arg1;});
       if (!permitted) {
-        return "ERROR: '" + basename + "' is not in the TOOL:RUN allowlist. "
+        return "ERROR: '" + arg1 + "' is not in the TOOL:RUN allowlist. "
           "Use /set run_allowed <name> to permit it.";
       }
-    } else if (!tui.confirm_dialog(std::format("Allow {} to run?", prog))) {
+    } else if (!tui.confirm_dialog(std::format("Allow {} {} to run?", arg1, arg2))) {
       return "ERROR: prevented by user";
     }
-    std::string command = prog + " " + arg2 + " 2>&1";
+    std::string command = arg1 + " " + arg2 + " 2>&1";
     FILE *fp = popen(command.c_str(), "r");
     if (!fp) {
       return "ERROR: popen failed";

From 638984923d7840d9e383d1b65b62f3f44941b265 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Thu, 28 May 2026 21:29:00 +0930
Subject: [PATCH 49/54] LLAMA: code cleanup

---
 llama/chunk_headers.cpp | 312 ----------------------------------------
 llama/rag_index.cpp     | 206 --------------------------
 2 files changed, 518 deletions(-)
 delete mode 100644 llama/chunk_headers.cpp
 delete mode 100644 llama/rag_index.cpp

diff --git a/llama/chunk_headers.cpp b/llama/chunk_headers.cpp
deleted file mode 100644
index 0d8eff0..0000000
--- a/llama/chunk_headers.cpp
+++ /dev/null
@@ -1,312 +0,0 @@
-/*
- * chunk_headers.cpp
- *
- * Smart chunker for C/C++ headers — keeps semantic units together:
- *   - block comment + following declaration/function
- *   - struct/enum/typedef blocks
- *   - grouped #define macros
- *   - standalone inline-commented declarations
- *
- * Output: one chunk per line in a .jsonl file:
- *   {"source":"notcurses.h","type":"function","text":"..."}
- *
- * Build: c++ -std=c++17 -o chunk_headers chunk_headers.cpp
- * Usage: ./chunk_headers notcurses/include/notcurses/notcurses.h > chunks.jsonl
- *        ./chunk_headers dir/                                    > chunks.jsonl
- */
-
-#include <iostream>
-#include <fstream>
-#include <sstream>
-#include <string>
-#include <vector>
-#include <algorithm>
-#include <filesystem>
-
-namespace fs = std::filesystem;
-
-/* ── tunables ──────────────────────────────────────────────── */
-static constexpr size_t MIN_CHUNK = 40;   /* ignore tiny fragments  */
-/* ─────────────────────────────────────────────────────────── */
-
-enum class ChunkType {
-  Function, Struct, Enum, Typedef, Defines, Other
-};
-
-static std::string type_name(ChunkType t) {
-  switch (t) {
-    case ChunkType::Function: return "function";
-    case ChunkType::Struct:   return "struct";
-    case ChunkType::Enum:     return "enum";
-    case ChunkType::Typedef:  return "typedef";
-    case ChunkType::Defines:  return "defines";
-    default:                  return "other";
-  }
-}
-
-/* ── helpers ───────────────────────────────────────────────── */
-
-static bool starts_with(const std::string &s, const std::string &prefix) {
-  return s.size() >= prefix.size() &&
-         s.compare(0, prefix.size(), prefix) == 0;
-}
-
-static bool is_blank(const std::string &s) {
-  for (char c : s) if (!isspace((unsigned char)c)) return false;
-  return true;
-}
-
-static std::string json_escape(const std::string &in) {
-  std::string out;
-  out.reserve(in.size() + 32);
-  for (unsigned char c : in) {
-    if      (c == '"')  { out += "\\\""; }
-    else if (c == '\\') { out += "\\\\"; }
-    else if (c == '\n') { out += "\\n";  }
-    else if (c == '\r') { /* skip */     }
-    else if (c == '\t') { out += "\\t";  }
-    else if (c < 0x20)  { /* skip */     }
-    else                { out += c;      }
-  }
-  return out;
-}
-
-static void emit_chunk(const std::string &source, ChunkType type,
-                       const std::string &text) {
-  if (text.size() < MIN_CHUNK) return;
-
-  /* trim trailing newlines */
-  size_t end = text.size();
-  while (end > 0 && (text[end-1] == '\n' || text[end-1] == '\r')) --end;
-  if (end < MIN_CHUNK) return;
-
-  std::cout << "{\"source\":\"" << json_escape(source)
-            << "\",\"type\":\""  << type_name(type)
-            << "\",\"text\":\""  << json_escape(text.substr(0, end))
-            << "\"}\n";
-}
-
-/* ── state machine ─────────────────────────────────────────── */
-
-enum class State {
-  Idle, BlockComment, LineComment, Declaration, Struct, Defines
-};
-
-static void process_file(const fs::path &path) {
-  std::ifstream f(path);
-  if (!f) { std::cerr << "cannot open: " << path << "\n"; return; }
-
-  const std::string source = path.filename().string();
-
-  State     state       = State::Idle;
-  std::string chunk;
-  ChunkType chunk_type  = ChunkType::Other;
-  int       brace_depth = 0;
-  int       paren_depth = 0;
-  int       define_count = 0;
-
-  auto flush = [&](ChunkType t) {
-    emit_chunk(source, t, chunk);
-    chunk.clear();
-    state       = State::Idle;
-    brace_depth = 0;
-    paren_depth = 0;
-  };
-
-  std::string line;
-  while (std::getline(f, line)) {
-    /* trim trailing CR */
-    if (!line.empty() && line.back() == '\r') line.pop_back();
-
-    /* find first non-whitespace for prefix checks */
-    size_t trim_pos = 0;
-    while (trim_pos < line.size() &&
-           (line[trim_pos] == ' ' || line[trim_pos] == '\t')) ++trim_pos;
-    const std::string trimmed = line.substr(trim_pos);
-
-    /* ── #define handling ─────────────────────────────────── */
-    if (starts_with(trimmed, "#define ")) {
-      if (state == State::BlockComment || state == State::LineComment) {
-        chunk += line + "\n";
-        state = State::Defines;
-        define_count = 1;
-      } else if (state == State::Defines) {
-        chunk += line + "\n";
-        define_count++;
-      } else {
-        if (chunk.size() >= MIN_CHUNK) emit_chunk(source, chunk_type, chunk);
-        chunk.clear();
-        chunk += line + "\n";
-        state = State::Defines;
-        define_count = 1;
-      }
-      continue;
-    }
-
-    /* non-define while in define group */
-    if (state == State::Defines) {
-      flush(ChunkType::Defines);
-      define_count = 0;
-      /* fall through to process this line normally */
-    }
-
-    /* ── block comment start ──────────────────────────────── */
-    if ((starts_with(trimmed, "/*") || starts_with(trimmed, "/**")) &&
-        state == State::Idle) {
-      if (chunk.size() >= MIN_CHUNK) emit_chunk(source, chunk_type, chunk);
-      chunk.clear();
-      chunk_type = ChunkType::Other;
-      chunk += line + "\n";
-      state = (trimmed.find("*/", 2) != std::string::npos)
-              ? State::LineComment
-              : State::BlockComment;
-      continue;
-    }
-
-    /* ── inside block comment ─────────────────────────────── */
-    if (state == State::BlockComment) {
-      chunk += line + "\n";
-      if (trimmed.find("*/") != std::string::npos)
-        state = State::LineComment;
-      continue;
-    }
-
-    /* ── // line comment ──────────────────────────────────── */
-    if (starts_with(trimmed, "//")) {
-      if (state == State::Idle) {
-        if (chunk.size() >= MIN_CHUNK) emit_chunk(source, chunk_type, chunk);
-        chunk.clear();
-        chunk += line + "\n";
-        state = State::LineComment;
-      } else if (state == State::LineComment) {
-        chunk += line + "\n";
-      }
-      continue;
-    }
-
-    /* ── blank line ───────────────────────────────────────── */
-    if (is_blank(trimmed)) {
-      if (state == State::LineComment)
-        flush(ChunkType::Other);
-      else if (state == State::Idle && chunk.size() >= MIN_CHUNK)
-        flush(chunk_type);
-      continue;
-    }
-
-    /* ── skip preprocessor noise ──────────────────────────── */
-    if (starts_with(trimmed, "#ifndef") || starts_with(trimmed, "#ifdef")  ||
-        starts_with(trimmed, "#endif")  || starts_with(trimmed, "#pragma") ||
-        starts_with(trimmed, "#include")) {
-      if (state == State::LineComment || state == State::BlockComment) {
-        chunk.clear();
-        state = State::Idle;
-      }
-      continue;
-    }
-
-    /* ── typedef struct / enum start ─────────────────────── */
-    if ((starts_with(trimmed, "typedef struct") ||
-         starts_with(trimmed, "typedef enum")   ||
-         starts_with(trimmed, "struct ")         ||
-         starts_with(trimmed, "enum "))          &&
-        (state == State::Idle || state == State::LineComment)) {
-
-      if (state == State::Idle && chunk.size() >= MIN_CHUNK)
-        emit_chunk(source, chunk_type, chunk);
-
-      /* preserve any comment already in chunk */
-      if (state == State::Idle) chunk.clear();
-
-      chunk += line + "\n";
-      chunk_type = starts_with(trimmed, "typedef") ? ChunkType::Typedef
-                 : starts_with(trimmed, "enum ")   ? ChunkType::Enum
-                                                    : ChunkType::Struct;
-      state = State::Struct;
-      for (char c : line) {
-        if (c == '{') ++brace_depth;
-        if (c == '}') --brace_depth;
-      }
-      if (brace_depth <= 0 && line.find(';') != std::string::npos)
-        flush(chunk_type);
-      continue;
-    }
-
-    /* ── inside struct/enum body ──────────────────────────── */
-    if (state == State::Struct) {
-      chunk += line + "\n";
-      for (char c : line) {
-        if (c == '{') ++brace_depth;
-        if (c == '}') --brace_depth;
-      }
-      if (brace_depth <= 0 && line.find(';') != std::string::npos)
-        flush(chunk_type);
-      continue;
-    }
-
-    /* ── function / other declaration ────────────────────── */
-    if (state == State::LineComment || state == State::Idle) {
-      if (state == State::Idle && chunk.size() >= MIN_CHUNK) {
-        emit_chunk(source, chunk_type, chunk);
-        chunk.clear();
-      }
-      chunk += line + "\n";
-      chunk_type = ChunkType::Function;
-      state = State::Declaration;
-      for (char c : line) {
-        if (c == '(') ++paren_depth;
-        if (c == ')') --paren_depth;
-      }
-      if (paren_depth <= 0 && line.find(';') != std::string::npos)
-        flush(ChunkType::Function);
-      continue;
-    }
-
-    /* ── multi-line declaration ───────────────────────────── */
-    if (state == State::Declaration) {
-      chunk += line + "\n";
-      for (char c : line) {
-        if (c == '(') ++paren_depth;
-        if (c == ')') --paren_depth;
-      }
-      if (paren_depth <= 0 && line.find(';') != std::string::npos)
-        flush(ChunkType::Function);
-      continue;
-    }
-  }
-
-  /* flush remainder */
-  if (chunk.size() >= MIN_CHUNK) emit_chunk(source, chunk_type, chunk);
-}
-
-/* ── directory walker ──────────────────────────────────────── */
-
-static void process_path(const fs::path &path) {
-  if (fs::is_directory(path)) {
-    /* sorted for deterministic output */
-    std::vector<fs::path> entries;
-    for (auto &e : fs::recursive_directory_iterator(path))
-      entries.push_back(e.path());
-    std::sort(entries.begin(), entries.end());
-    for (auto &e : entries) {
-      if (!fs::is_regular_file(e)) continue;
-      auto ext = e.extension().string();
-      if (ext == ".h" || ext == ".hpp" || ext == ".c" || ext == ".cpp")
-        process_file(e);
-    }
-  } else if (fs::is_regular_file(path)) {
-    process_file(path);
-  }
-}
-
-/* ── main ──────────────────────────────────────────────────── */
-
-int main(int argc, char **argv) {
-  if (argc < 2) {
-    std::cerr << "usage: " << argv[0]
-              << " <header.h|dir> [header2.h ...]\n";
-    return 1;
-  }
-  for (int i = 1; i < argc; i++)
-    process_path(fs::path(argv[i]));
-  return 0;
-}
diff --git a/llama/rag_index.cpp b/llama/rag_index.cpp
deleted file mode 100644
index 9f8a4fe..0000000
--- a/llama/rag_index.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * rag_index.cpp
- *
- * Reads chunks.jsonl produced by chunk_headers, embeds each chunk
- * using a GGUF embedding model via llama.h, saves a binary .db file.
- *
- * No fixed limits on chunk count or chunk length.
- *
- * Build:
- *   c++ -std=c++17 -o rag_index rag_index.cpp -lllama -lm
- *
- * Usage:
- *   ./rag_index \
- *     --model  nomic-embed-text-v1.5.Q4_K_M.gguf \
- *     --input  chunks.jsonl \
- *     --output notcurses.db
- */
-
-#include "llama-sb.h"
-
-#include <cmath>
-#include <cstdint>
-#include <cstring>
-#include <fstream>
-#include <iostream>
-#include <sstream>
-#include <stdexcept>
-#include <string>
-#include <vector>
-
-/* ── tunables ──────────────────────────────────────────────── */
-static constexpr int BATCH_SIZE = 512;
-/* ─────────────────────────────────────────────────────────── */
-
-/* ── on-disk chunk (variable-length text) ──────────────────── */
-/*
- * db header  (16 bytes):
- *   uint32  magic      = 0x52414744  "RAGD"
- *   uint32  version    = 2
- *   uint32  n_chunks
- *   uint32  embed_dim
- *
- * per chunk:
- *   uint32  text_len
- *   char[]  text          (text_len bytes, no null)
- *   uint16  source_len
- *   char[]  source        (source_len bytes, no null)
- *   uint8   type_len
- *   char[]  type          (type_len bytes, no null)
- *   float[] embedding     (embed_dim floats)
- */
-
-struct Chunk {
-  std::string         text;
-  std::string         source;
-  std::string         type;
-  std::vector<float>  embedding;
-};
-
-/* ── tiny JSON string extractor ────────────────────────────── */
-static bool json_get_string(const std::string &json,
-                            const std::string &key,
-                            std::string       &out) {
-  std::string search = "\"" + key + "\":";
-  size_t pos = json.find(search);
-  if (pos == std::string::npos) return false;
-  pos += search.size();
-  while (pos < json.size() && json[pos] == ' ') ++pos;
-  if (pos >= json.size() || json[pos] != '"') return false;
-  ++pos; /* skip opening quote */
-  out.clear();
-  while (pos < json.size()) {
-    char c = json[pos++];
-    if (c == '\\' && pos < json.size()) {
-      char e = json[pos++];
-      switch (e) {
-      case 'n':  out += '\n'; break;
-      case 't':  out += '\t'; break;
-      case '"':  out += '"';  break;
-      case '\\': out += '\\'; break;
-      default:   out += e;    break;
-      }
-    } else if (c == '"') {
-      break;
-    } else {
-      out += c;
-    }
-  }
-  return true;
-}
-
-/* ── db save ───────────────────────────────────────────────── */
-static bool save_db(const std::string        &path,
-                    const std::vector<Chunk> &chunks,
-                    int                       embed_dim) {
-  std::ofstream f(path, std::ios::binary);
-  if (!f) { std::cerr << "cannot open for write: " << path << "\n"; return false; }
-
-  auto write32 = [&](uint32_t v) { f.write((char*)&v, 4); };
-  auto write16 = [&](uint16_t v) { f.write((char*)&v, 2); };
-  auto write8  = [&](uint8_t  v) { f.write((char*)&v, 1); };
-  auto writestr = [&](const std::string &s, size_t max_len) {
-    size_t len = std::min(s.size(), max_len);
-    f.write(s.c_str(), (std::streamsize)len);
-  };
-
-  write32(0x52414744);              /* magic "RAGD" */
-  write32(2);                       /* version      */
-  write32((uint32_t)chunks.size()); /* n_chunks     */
-  write32((uint32_t)embed_dim);     /* embed_dim    */
-
-  for (const Chunk &c : chunks) {
-    write32((uint32_t)c.text.size());
-    f.write(c.text.c_str(), (std::streamsize)c.text.size());
-
-    uint16_t src_len = (uint16_t)std::min(c.source.size(), (size_t)65535);
-    write16(src_len);
-    writestr(c.source, src_len);
-
-    uint8_t type_len = (uint8_t)std::min(c.type.size(), (size_t)255);
-    write8(type_len);
-    writestr(c.type, type_len);
-
-    f.write((char*)c.embedding.data(),
-            (std::streamsize)(embed_dim * sizeof(float)));
-  }
-
-  return f.good();
-}
-
-/* ── main ──────────────────────────────────────────────────── */
-int main(int argc, char **argv) {
-  std::string model_path;
-  std::string input_path;
-  std::string output_path = "corpus.db";
-
-  for (int i = 1; i < argc; i++) {
-    if (!strcmp(argv[i], "--model")  && i+1 < argc) model_path  = argv[++i];
-    if (!strcmp(argv[i], "--input")  && i+1 < argc) input_path  = argv[++i];
-    if (!strcmp(argv[i], "--output") && i+1 < argc) output_path = argv[++i];
-  }
-
-  if (model_path.empty() || input_path.empty()) {
-    std::cerr << "usage: rag_index --model <embed.gguf> "
-      "--input <chunks.jsonl> [--output <db>]\n";
-    return 1;
-  }
-
-  /* ── load embedding model ─────────────────────────────── */
-
-  Llama llama;
-  if (!llama.load_embedding_model(model_path)) {
-    return 1;
-  }
-
-  int embed_dim = llama.get_embed_dim();
-  std::cerr << "embedding dim: " << embed_dim << "\n";
-
-  /* ── read and embed chunks ────────────────────────────── */
-  std::vector<Chunk> chunks;
-  std::ifstream fin(input_path);
-  if (!fin) {
-    std::cerr << "cannot open: " << input_path << "\n";
-    return 1;
-  }
-
-  std::string line;
-  int skipped = 0;
-
-  while (std::getline(fin, line)) {
-    if (line.empty() || line[0] != '{') {
-      continue;
-    }
-
-    Chunk c;
-    if (!json_get_string(line, "text",   c.text)   ||
-        !json_get_string(line, "source", c.source)) {
-      ++skipped;
-      continue;
-    }
-    json_get_string(line, "type", c.type);
-
-    std::cerr << "\r[" << chunks.size() << "] embedding: "
-              << c.text.substr(0, 40) << "...";
-
-    std::string text = "Instruct: Represent this API documentation for code retrieval\nQuery: " + c.text;
-    if (!llama.embed_text(text, c.embedding, embed_dim)) {
-      ++skipped;
-      continue;
-    }
-
-    chunks.push_back(std::move(c));
-  }
-  std::cerr << "\n";
-  std::cerr << "embedded " << chunks.size()
-            << " chunks (" << skipped << " skipped)\n";
-
-  /* ── save ─────────────────────────────────────────────── */
-  if (!save_db(output_path, chunks, embed_dim)) {
-    std::cerr << "failed to save db\n";
-    return 1;
-  }
-  std::cerr << "saved → " << output_path << "\n";
-
-  return 0;
-}

From 8406b7f05961a4864a7396d3086f5aba17b6c492 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Fri, 29 May 2026 19:44:49 +0930
Subject: [PATCH 50/54] LLAMA: nitro read local settings, replace disclose with
 unwrap fn

---
 llama/nitro.cpp | 116 +++++++++++++++++++++++++++++++++++++++---------
 1 file changed, 96 insertions(+), 20 deletions(-)

diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index 9b47e20..fcfa3fd 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -50,8 +50,11 @@
 #include <string>
 #include <vector>
 #include <curl/curl.h>
+#include <iostream>
+
 #include "llama-sb.h"
 #include "llama-sb-rag.h"
+
 #include <notcurses/notcurses.h>
 
 namespace fs = std::filesystem;
@@ -344,12 +347,13 @@ static const std::vector<std::string> CODE_EXTENSIONS = {
 
 //
 // Settings persistence  (~/.config/nitro/nitro.settings.json)
-//
-// A minimal hand-rolled JSON reader/writer for the flat key-value settings
-// we care about.  We deliberately avoid a full JSON library dependency.
-
 // Returns the canonical settings path: ~/.config/nitro/settings.json
+//
 static std::string settings_path() {
+  // Attempt to read settings from the current working directory first
+  if (fs::exists("settings.json")) {
+    return "settings.json";
+  }
   const char *home = getenv("HOME");
   std::string base = home ? std::string(home) : ".";
   return base + "/.config/nitro/settings.json";
@@ -362,6 +366,10 @@ static std::string history_path() {
   return base + "/.config/nitro/history.txt";
 }
 
+//
+// A minimal hand-rolled JSON reader/writer for the flat key-value settings
+// we care about.  We deliberately avoid a full JSON library dependency.
+//
 static bool json_get_string(const std::string &json,
                             const std::string &key,
                             std::string       &out) {
@@ -551,25 +559,93 @@ static std::string trim(std::string_view str) {
   return std::string(str.substr(start, end - start + 1));
 }
 
-//
-// Removes any front and back characters
-//
-static std::string disclose(const std::string &input, char c1, char c2) {
-  // Check if string has at least 2 characters
-  if (input.length() < 2) {
+/*
+ * unwrap() - Remove a matching outer "wrapper" from a string.
+ *
+ * Trims leading/trailing whitespace first, then checks (in order):
+ *
+ *  1. Same-character pairs   "..."  '...'  |...|  `...`
+ *  2. Mirror pairs           (...)  [...]  {...}
+ *  3. HTML-like tags         <tag>...</tag>
+ *  4. Plain angle brackets   <...>          (fallback if tags don't match)
+ *
+ * If none of the above apply, returns the whitespace-trimmed input unchanged.
+ *
+ * Examples:
+ *   unwrap("\"hello\"")        -> "hello"
+ *   unwrap("  [foo]  ")        -> "foo"
+ *   unwrap("<b>bold</b>")      -> "bold"
+ *   unwrap("<file>x</file>")   -> "x"
+ *   unwrap("<hello>")          -> "hello"
+ *   unwrap("plain")            -> "plain"
+ *   unwrap("")                 -> ""
+ */
+std::string unwrap(const std::string &input) {
+  if (input.empty()) {
     return input;
   }
+ 
+  size_t left = 0;
+  size_t right = input.length() - 1;
+
+  while (left <= right && std::isspace(static_cast<unsigned char>(input[left]))) {
+    left++;
+  }
+  while (left <= right && std::isspace(static_cast<unsigned char>(input[right]))) {
+    right--;
+  }
+
+  if (left > right) {
+    return "";
+  }
+
+  // Same-character pairs: "", '', ||, ``
+  // Note: [], {} are NOT same-char pairs — they belong in mirror pairs only
+  if (input[left] == input[right]) {
+    if (input[left] == '"'  || input[left] == '\'' ||
+        input[left] == '|'  || input[left] == '`') {
+      return input.substr(left + 1, right - left - 1);
+    }
+  }
+
+  // Mirror pairs: (), [], {}, but NOT <> (handled below as possible HTML tags)
+  if (input[left] != input[right]) {
+    if ((input[left] == '(' && input[right] == ')') ||
+        (input[left] == '[' && input[right] == ']') ||
+        (input[left] == '{' && input[right] == '}')) {
+      return input.substr(left + 1, right - left - 1);
+    }
+  }
+
+  // HTML-like tags: <tag>content</tag>
+  // Also handles plain <...> as a fallback at the end
+  if (input[left] == '<' && input[right] == '>') {
+    // Find end of opening tag
+    size_t openTagEnd = left + 1;
+    while (openTagEnd <= right && input[openTagEnd] != '>') openTagEnd++;
+
+    if (openTagEnd < right) {
+      std::string openTagName = input.substr(left + 1, openTagEnd - left - 1);
+
+      // Find start of closing tag (search backwards for '<')
+      size_t closeTagStart = right;
+      while (closeTagStart > openTagEnd && input[closeTagStart] != '<') closeTagStart--;
+
+      if (closeTagStart > openTagEnd && input[closeTagStart + 1] == '/') {
+        std::string closeTagName = input.substr(closeTagStart + 2, right - closeTagStart - 2);
+
+        if (!openTagName.empty() && openTagName == closeTagName) {
+          // Return content between the tags
+          return input.substr(openTagEnd + 1, closeTagStart - openTagEnd - 1);
+        }
+      }
+    }
 
-  // Check if first and last characters match the specified delimiters
-  if (input[0] == c1 && input[input.length() - 1] == c2) {
-    // Remove first and last characters
-    std::string result = input;
-    result.erase(0, 1);
-    result.erase(input.length() - 1, 1);
-    return result;
+    // Fallback: plain <...> with no matching HTML tags — unwrap the angle brackets
+    return input.substr(left + 1, right - left - 1);
   }
 
-  return input;
+  return input.substr(left, right - left + 1);
 }
 
 // ─── colour helpers ──────────────────────────────────────────────────────
@@ -1810,7 +1886,7 @@ std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &
     if (p[0] == '/') {
       return p;
     }
-    return join_path(sandbox, disclose(disclose(p, '<', '>'), '[', ']'));
+    return join_path(sandbox, unwrap(p));
   };
 
   tui.append_line("[tool] → " + op);
@@ -1855,7 +1931,7 @@ std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &
     if (!tui.confirm_dialog(std::format("Allow model to write {}?", p))) {
       return "ERROR: action prevented by user";
     }
-    std::string content = disclose(disclose(strip_code_fences(arg1, arg2), '`', '`'), '"', '"');
+    std::string content = unwrap(strip_code_fences(arg1, arg2));
     return write_file(p, content) ? "OK: written to " + arg1 : "ERROR: write failed for " + arg1;
   }
   if (op == "TOOL:MKDIR") {

From 966cda00075fa8d4fa80f35738f25cc0315a4fb0 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Sat, 30 May 2026 11:41:18 +0930
Subject: [PATCH 51/54] LLAMA: nitro - update prompt

---
 llama/nitro.cpp | 131 ++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 104 insertions(+), 27 deletions(-)

diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index fcfa3fd..3ce5629 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -79,6 +79,7 @@ struct NitroConfig {
   int   penalty_last_n = 256;
   std::vector<std::string> knowledge_files;
   int   rag_top_k      = 5;
+  bool  thinking       = true;
   // TOOL:RUN allowlist — if non-empty, only these program basenames may run.
   // Empty means "allow anything inside the sandbox" (original behaviour).
   std::vector<std::string> run_allowed;
@@ -467,6 +468,8 @@ static void load_settings(NitroConfig &cfg) {
   std::ostringstream oss; oss << f.rdbuf();
   std::string json = oss.str();
 
+  cfg.thinking = true;
+  
   // String fields
   settings_get_str(json, "model_path",  cfg.model_path);
   settings_get_str(json, "embed_path",  cfg.embed_path);
@@ -584,7 +587,7 @@ std::string unwrap(const std::string &input) {
   if (input.empty()) {
     return input;
   }
- 
+
   size_t left = 0;
   size_t right = input.length() - 1;
 
@@ -740,13 +743,46 @@ static bool make_dir(const std::string &path) {
 static std::string build_system_prompt(const std::vector<std::string> &knowledge_files,
                                        const std::string &sandbox) {
   std::string p;
-  p += "You are Nitro, an agentic AI assistant for software development.\n"
+  p +=
+    "You are Nitro, an agentic AI assistant for software development. "
+    "Proceed with caution, guided by logic and the pursuit of knowledge.\n\n"
+
     "Your sandbox (project directory) is: " + sandbox + "\n\n"
-    "## Tool protocol\n"
-    " - Emit tool calls on their own new line. for example:\n\n"
+
+    "## Core Principle\n"
+    "Always follow this loop: THINK → DECIDE → ACT → RESPOND\n\n"
+
+    "## Reasoning Protocol\n"
+    "Use <|think|> to reason BEFORE acting. Keep it concise and structured.\n"
+    "Format:\n"
+    "<|think|>\n"
+    "- What is the user asking?\n"
+    "- Do I need external data (files, tools)?\n"
+    "- What is the safest and most correct action?\n"
+    "</|think|>\n\n"
+    "Rules:\n"
+    "- Do NOT call tools inside <|think|>\n"
+    "- Do NOT include the final answer inside <|think|>\n"
+    "- Always follow <|think|> with either a tool call OR a final answer\n"
+    "- Skip <|think|> only for trivial or conversational responses\n\n"
+
+    "## Tool Protocol\n"
+    "Emit ONE tool call at a time, immediately followed by NITRO_END_TOOL.\n"
+    "Do NOT add any commentary, explanation, or text between the tool call and NITRO_END_TOOL.\n"
+    "The host executes the tool and returns NITRO_TOOL_RESULT: <value>.\n"
+    "Wait for the result before continuing.\n"
+    "After receiving NITRO_TOOL_RESULT you may explain what you did.\n\n"
+    "Examples:\n\n"
     "TOOL:LIST\n"
-    " - The host executes the tool and returns TOOL_RESULT: <value> on the next line.\n\n"
-    "Available tools:\n"
+    "NITRO_END_TOOL\n\n"
+    "TOOL:READ readme.txt\n"
+    "NITRO_END_TOOL\n\n"
+    "TOOL:WRITE index.html <!DOCTYPE html><html>...</html>\n"
+    "NITRO_END_TOOL\n\n"
+    "TOOL:RUN ./build.sh\n"
+    "NITRO_END_TOOL\n\n"
+
+    "## Available Tools\n"
     "  TOOL:LIST   [dir]          list files (default: sandbox root)\n"
     "  TOOL:READ   <file>         read file contents\n"
     "  TOOL:WRITE  <file> <text>  write text to file\n"
@@ -755,25 +791,47 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
     "  TOOL:RUN    <prog> [args]  run program inside sandbox\n"
     "  TOOL:DATE                  current date\n"
     "  TOOL:TIME                  current time\n"
-    "  TOOL:RND                   random float\n"
+    "  TOOL:RND                   random float 0..1\n"
     "  TOOL:RAG    <query>        query the RAG index for additional context\n"
-    "  TOOL:INTROSPECT            introspect your settings, top_k etc\n"
-    "  TOOL:CURL   <url>          HTTP GET; returns response body (max 32 KB)\n\n"
-    "Rules:\n"
-    "- Never access files outside the sandbox.\n"
-    "- Only use one TOOL at a time. Never combine, always use each tool step by step\n"
-    "- Use TOOL:CURL to fetch documentation, APIs, or web content you need.\n"
-    "- Reason step-by-step inside <|think|> </|think|> (hidden from user).\n"
-    "- After each tool call, explain what you did in plain English.\n\n";
+    "  TOOL:INTROSPECT            show current model settings\n"
+    "  TOOL:CURL   <url>          HTTP GET, returns response body (max 32 KB)\n"
+    "  TOOL:PERMISSION            ask user for explicit permission\n\n"
+
+    "## Tool Decision Rules\n"
+    "Use tools ONLY if:\n"
+    "- The user explicitly references files or the project, OR\n"
+    "- The answer depends on local or project data, OR\n"
+    "- The user asks for date, time, or a random number\n"
+    "Otherwise answer directly using internal knowledge.\n\n"
+
+    "## Tool Rules\n"
+    "- NITRO_END_TOOL must immediately follow the tool call — no exceptions\n"
+    "- Never add commentary before NITRO_END_TOOL\n"
+    "- Only use one tool at a time, step by step\n"
+    "- Never access files outside the sandbox\n"
+    "- Use TOOL:PERMISSION before destructive or irreversible operations\n"
+    "- Do NOT hallucinate file contents\n"
+    "- Do NOT fabricate tool outputs\n"
+    "- Do NOT assume files exist — use TOOL:EXISTS to check first\n\n"
+
+    "## File Writing Rules\n"
+    "Use TOOL:WRITE only if explicitly requested.\n"
+    "- Write complete and valid content\n"
+    "- Do not overwrite without clear intent\n"
+    "- Use TOOL:PERMISSION before overwriting an existing file\n"
+    "- Format: TOOL:WRITE <filename> <complete file content>\n\n"
+
+    "## Interaction Guidelines\n"
+    "- Be precise and efficient\n"
+    "- Ask clarifying questions if the request is ambiguous or missing parameters\n"
+    "- Prefer direct answers when no tools are needed\n"
+    "- After each tool result, explain in plain English what was done\n"
+    "- If no user request is provided, respond with a brief readiness message\n\n";
+
   for (const auto &kf : knowledge_files) {
-    auto path = join_path(sandbox, kf);
-    std::ifstream f(path);
-    if (!f) {
-      continue;
-    }
-    log_write("loaded [%s]", path.c_str());
-    std::ostringstream oss;
-    oss << f.rdbuf();
+    std::ifstream f(kf);
+    if (!f) continue;
+    std::ostringstream oss; oss << f.rdbuf();
     p += "## Knowledge: " + kf + "\n" + oss.str() + "\n\n";
   }
   return p;
@@ -1727,7 +1785,9 @@ bool AgentState::setup_model(const NitroConfig &cfg, TuiState &tui) {
   tui.kv_total = mem.kv_total;
   tui.vram_used  = mem.vram_used;
   tui.vram_total = mem.vram_total;
-  tui.redraw_all();
+
+  tui.append_line(std::string("[sys] Thinking mode: ") + (cfg.thinking ? "enabled" : "disabled"));
+  tui.redraw_all();  
   return true;
 }
 
@@ -2009,14 +2069,29 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
 
   // in_think starts false — models that don't use <think> blocks emit
   // visible text immediately.  The spinner activates only while thinking.
-  enum {t_init, t_think, t_thunk} think_mode = t_init;
+  enum {t_init, t_think, t_thunk} think_mode = (cfg.thinking ? t_init : t_thunk);
   tui.set_thinking(false);
   std::string buffer;
 
-  auto invoke_tool = [&](const std::string &tool, const std::string_view template_str) -> void {
+  auto invoke_tool = [&](const std::string &buffer, const std::string_view template_str) -> void {
+    static constexpr std::string_view END_TOOL = "\nNITRO_END_TOOL";
+    static const std::string TOOL_RESULT = "NITRO_TOOL_RESULT: ";
+
+    std::string tool;
+    const auto pos = buffer.rfind(END_TOOL);
+    if (pos != std::string::npos) {
+      tool = buffer.substr(0, pos);
+      auto endTool = buffer.substr(pos);
+      if (endTool.length() > END_TOOL.length()) {
+        log_write("ERROR: trailing delimiter: [%s]", endTool.c_str());
+      }
+    } else {
+      tool = buffer;
+    }
+
     log_write("tool request: [%s]", tool.c_str());
     std::string result = process_tool(tool, cfg, tui);
-    std::string content = std::vformat(template_str, std::make_format_args(result));
+    std::string content = TOOL_RESULT + std::vformat(template_str, std::make_format_args(result));
     log_write("tool: [%s] result: [%s]", tool.c_str(), result.c_str());
     if (!llama->add_message(*iter, "tool_result", content)) {
       tui.append_line(std::string("[err] tool result inject: ") + llama->last_error());
@@ -2386,6 +2461,8 @@ int main(int argc, char **argv) {
       cfg.n_gpu_layers = std::stoi(take_next(a.c_str()));
     } else if (a == "-l" || a == "--log") {
       log_open();
+    } else if (a == "-t" || a == "--think") {
+      cfg.thinking = false;
     } else if (a == "-h" || a == "--help") {
       std::puts("Usage: nitro [options] [project_dir]\n"
                 "\n"

From 6c212c0db548a27ebf3c634b6cb32c1b26af9bf6 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Sat, 30 May 2026 14:05:58 +0930
Subject: [PATCH 52/54] LLAMA: nitro - update thinking display

---
 llama/nitro.cpp | 67 +++++++++++++++++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 21 deletions(-)

diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index 3ce5629..0a302be 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -41,6 +41,7 @@
 
 #include <algorithm>
 #include <chrono>
+#include <cmath>
 #include <ctime>
 #include <filesystem>
 #include <fstream>
@@ -469,7 +470,7 @@ static void load_settings(NitroConfig &cfg) {
   std::string json = oss.str();
 
   cfg.thinking = true;
-  
+
   // String fields
   settings_get_str(json, "model_path",  cfg.model_path);
   settings_get_str(json, "embed_path",  cfg.embed_path);
@@ -1170,23 +1171,30 @@ void TuiState::redraw_input() const {
   ncplane_erase(inputpl);
 
   if (thinking) {
-    static constexpr const char *ROBOT_RIGHT = "🤖➡";
-    static constexpr const char *ROBOT_LEFT  = "⬅🤖";
-    // 15 steps each way = 20 frame cycle
-    static constexpr int STEPS = 15;
-    int cycle = spinner_frame % (STEPS * 2);
-    bool going_right = (cycle < STEPS);
-    int pos = going_right ? cycle : (STEPS * 2 - 1 - cycle);
-
-    std::string sep(term_cols, '-');
-    // blank 4 cols to fit robot + arrow (each emoji is 2 cols wide)
-    for (int i = pos; i < std::min(pos + 4, term_cols); ++i) sep[i] = ' ';
-    ncplane_set_channels(inputpl, inp_ch(80, 120, 160));
-    ncplane_putstr_yx(inputpl, 0, 0, sep.c_str());
-
-    ncplane_set_channels(inputpl, NCCHANNELS_INITIALIZER(255, 220, 80, BG_INP_R, BG_INP_G, BG_INP_B));
-    ncplane_putstr_yx(inputpl, 0, pos, going_right ? ROBOT_RIGHT : ROBOT_LEFT);
-
+    static constexpr const char *BLOCKS[] = { "-", "~", "≈", "~", "-" };
+    static constexpr int    N_BLOCKS = 5;
+    static constexpr double FREQ     = 0.25;  // gentler wave
+    static constexpr double SPEED    = 0.15;  // slower scroll
+    static constexpr int    DELAY    = 12;    // frames before animation starts
+
+    if (spinner_frame < DELAY) {
+      // still just a plain separator during the pause
+      ncplane_set_channels(inputpl, inp_ch(80, 120, 160));
+      std::string sep(term_cols, '-');
+      ncplane_putstr_yx(inputpl, 0, 0, sep.c_str());
+    } else {
+      int frame = spinner_frame - DELAY;  // animation frame relative to start
+      for (int col = 0; col < term_cols; ++col) {
+        double phase = (col * FREQ) - (frame * SPEED);
+        int idx = (int)((std::sin(phase) + 1.0) * 0.5 * (N_BLOCKS - 1));
+        idx = std::max(0, std::min(idx, N_BLOCKS - 1));
+        // subtle brightness shift — blue-grey, not full glow
+        int brightness = 80 + idx * 20;
+        ncplane_set_channels(inputpl, NCCHANNELS_INITIALIZER(brightness, brightness + 20, brightness + 40,
+                                                             BG_INP_R, BG_INP_G, BG_INP_B));
+        ncplane_putstr_yx(inputpl, 0, col, BLOCKS[idx]);
+      }
+    }
     ncplane_set_channels(inputpl, inp_ch(140, 140, 180));
     ncplane_putstr_yx(inputpl, 1, 2, "thinking…");
   } else {
@@ -1787,7 +1795,7 @@ bool AgentState::setup_model(const NitroConfig &cfg, TuiState &tui) {
   tui.vram_total = mem.vram_total;
 
   tui.append_line(std::string("[sys] Thinking mode: ") + (cfg.thinking ? "enabled" : "disabled"));
-  tui.redraw_all();  
+  tui.redraw_all();
   return true;
 }
 
@@ -2070,6 +2078,7 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
   // in_think starts false — models that don't use <think> blocks emit
   // visible text immediately.  The spinner activates only while thinking.
   enum {t_init, t_think, t_thunk} think_mode = (cfg.thinking ? t_init : t_thunk);
+
   tui.set_thinking(false);
   std::string buffer;
 
@@ -2089,7 +2098,7 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
       tool = buffer;
     }
 
-    log_write("tool request: [%s]", tool.c_str());
+    log_write("tool request: mode:[%d] [%s]", think_mode, tool.c_str());
     std::string result = process_tool(tool, cfg, tui);
     std::string content = TOOL_RESULT + std::vformat(template_str, std::make_format_args(result));
     log_write("tool: [%s] result: [%s]", tool.c_str(), result.c_str());
@@ -2137,7 +2146,23 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
       return false;
     }
     std::string tok = llama->next(*iter);
-    buffer += tok;
+    if (tok == "<") {
+      // fetch the complete tag
+      std::string tag = tok;
+      while (iter->_has_next && tag.find(">") == std::string::npos) {
+        tag += llama->next(*iter);
+      }
+      if (tag == "<|think|>") {
+        think_mode = t_think;
+        tui.set_thinking(true);
+        continue;
+      } else {
+        buffer += tag;
+      }
+    } else {
+      buffer += tok;
+    }
+
     if (think_mode == t_init) {
       start_think("<think>");
       start_think("<|think|>");

From 0a38fe4294707d3ed390f6949eceb1ec67174bf5 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Mon, 1 Jun 2026 20:07:20 +0930
Subject: [PATCH 53/54] LLAMA: nitro - now displays thinking text

- fixed exiting on false positive
---
 llama/llama-sb.cpp |  78 ++++----------
 llama/llama-sb.h   |   2 +-
 llama/llama.cpp    |   2 +-
 llama/nitro.cpp    | 247 +++++++++++++++++++++++++++++++++------------
 4 files changed, 206 insertions(+), 123 deletions(-)

diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index 83e881f..c28b189 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -14,7 +14,7 @@
 #include "llama.h"
 #include "llama-sb.h"
 
-constexpr int MAX_REPEAT = 5;
+constexpr int MAX_REPEAT = 50;
 
 static bool read_vram(size_t &used, size_t &total) {
   size_t free = 0;
@@ -391,9 +391,18 @@ LlamaMemoryInfo Llama::memory_info() {
     info.vram_percent = 100.0f * info.vram_used / info.vram_total;
   }
 
+  info.model_native_max_ctx = llama_model_n_ctx_train(_model);
+
   // Advice
   ostringstream advice;
 
+  // Check structural limits & model configuration quirks
+  if (info.kv_total > info.model_native_max_ctx) {
+    advice << "WARNING: Configured context size (" << info.kv_total
+           << ") exceeds model native training length (" << info.model_native_max_ctx
+           << "). Logic flaws or repetition bugs will occur unless RoPE scaling options are enabled. ";
+  }
+
   if (n_gpu_layers < info.n_layers_total) {
     advice << "Only " << n_gpu_layers << "/" << info.n_layers_total
            << " layers on GPU - increase n_gpu_layers if VRAM allows. ";
@@ -519,49 +528,6 @@ bool Llama::configure_sampler() {
   return true;
 }
 
-bool Llama::ends_with_sentence_boundary(const string &text) {
-  if (text.empty()) {
-    return false;
-  }
-
-  // Get last few characters (in case of whitespace after punctuation)
-  size_t check_len = std::min(text.length(), (size_t)5);
-  std::string ending = text.substr(text.length() - check_len);
-
-  // Check for various sentence endings
-  // Period followed by space or end
-  if (ending.find(". ") != std::string::npos ||
-      ending.back() == '.') {
-    return true;
-  }
-
-  // Exclamation mark
-  if (ending.find("! ") != std::string::npos ||
-      ending.back() == '!') {
-    return true;
-  }
-
-  // Question mark
-  if (ending.find("? ") != std::string::npos ||
-      ending.back() == '?') {
-    return true;
-  }
-
-  // Newline (paragraph break)
-  if (ending.find('\n') != std::string::npos) {
-    return true;
-  }
-
-  // Quote followed by period: "something."
-  if (ending.find(".\"") != std::string::npos ||
-      ending.find("!\"") != std::string::npos ||
-      ending.find("?\"") != std::string::npos) {
-    return true;
-  }
-
-  return false;
-}
-
 // Makes space in the context for n_tokens by removing old tokens if necessary
 // Returns true if successful, false if impossible to make space
 //
@@ -641,23 +607,23 @@ string Llama::token_to_string(LlamaIter &iter, llama_token tok) {
   char buf[512];
   int n = llama_token_to_piece(_vocab, tok, buf, sizeof(buf), 0, false);
   if (n > 0) {
-    // detect repetition
-    if (iter._last_word == buf) {
-      if (++iter._repetition_count == MAX_REPEAT) {
-        iter._has_next = false;
+    // detect repetition - only on non-whitespace tokens, otherwise
+    // spaces/newlines trigger false positives almost immediately.
+    string piece(buf, n);
+    bool is_trivial = piece.find_first_not_of(" \t\n\r") == string::npos;
+    if (!is_trivial) {
+      if (iter._last_word == piece) {
+        if (++iter._repetition_count >= MAX_REPEAT) {
+          iter._has_next = false;
+        }
+      } else {
+        iter._repetition_count = 0;
+        iter._last_word = piece;
       }
-    } else {
-      iter._repetition_count = 0;
-      iter._last_word = buf;
     }
 
     result.append(buf, n);
 
-    // detect end of max-tokens
-    if (++iter._tokens_generated > _max_tokens && ends_with_sentence_boundary(result)) {
-      iter._has_next = false;
-    }
-
     // detect stop words
     if (iter._has_next) {
       for (const auto &stop : _stop_sequences) {
diff --git a/llama/llama-sb.h b/llama/llama-sb.h
index 02e2359..b01ed2e 100644
--- a/llama/llama-sb.h
+++ b/llama/llama-sb.h
@@ -33,6 +33,7 @@ struct LlamaMemoryInfo {
   int     n_layers_total; // total model layers
   int     n_layers_gpu;   // layers offloaded to GPU
   int     n_layers_cpu;   // layers on CPU
+  int     model_native_max_ctx;
 
   // Advice
   string  advice;
@@ -117,7 +118,6 @@ struct Llama {
   bool batch_decode_tokens(vector<llama_token> &tokens);
   bool configure_sampler();
   void dirty() {_sampler_dirty = true; }
-  bool ends_with_sentence_boundary(const string &out);
   bool make_space_for_tokens(int n_tokens);
   vector<llama_token> tokenize(const string &prompt);
   string token_to_string(LlamaIter &iter, llama_token tok);
diff --git a/llama/llama.cpp b/llama/llama.cpp
index 4d8cc0c..d749821 160000
--- a/llama/llama.cpp
+++ b/llama/llama.cpp
@@ -1 +1 @@
-Subproject commit 4d8cc0c56ffba3f8b7fdb0130627fed2a6f71958
+Subproject commit d749821db3bd587932d1ed57d43626cd552c9909
diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index 0a302be..21d6cef 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -40,18 +40,21 @@
 //
 
 #include <algorithm>
+#include <array>
 #include <chrono>
 #include <cmath>
+#include <cstddef>
 #include <ctime>
+#include <curl/curl.h>
 #include <filesystem>
 #include <fstream>
+#include <iomanip>
 #include <memory>
 #include <mutex>
+#include <random>
 #include <sstream>
 #include <string>
 #include <vector>
-#include <curl/curl.h>
-#include <iostream>
 
 #include "llama-sb.h"
 #include "llama-sb-rag.h"
@@ -67,6 +70,7 @@ struct NitroConfig {
   std::string model_path;
   std::string embed_path;
   std::string sandbox;
+  std::string agent_id;
   int   n_ctx          = 65536;
   int   n_batch        = 512;
   int   n_gpu_layers   = 32;
@@ -81,6 +85,7 @@ struct NitroConfig {
   std::vector<std::string> knowledge_files;
   int   rag_top_k      = 5;
   bool  thinking       = true;
+  bool  permission_prompt = false;
   // TOOL:RUN allowlist — if non-empty, only these program basenames may run.
   // Empty means "allow anything inside the sandbox" (original behaviour).
   std::vector<std::string> run_allowed;
@@ -105,11 +110,11 @@ class InputHistory {
     if (input.empty()) return;
     if (!history_stack.empty() && history_stack.back() == input) {
       // Don't push duplicate of last entry; just reset nav position.
-      current_index = (int)history_stack.size();
+      current_index = static_cast<int>(history_stack.size());
       return;
     }
     history_stack.push_back(input);
-    current_index = (int)history_stack.size();
+    current_index = static_cast<int>(history_stack.size());
   }
 
   /**
@@ -132,8 +137,8 @@ class InputHistory {
   bool down(std::string &out) {
     if (history_stack.empty()) return false;
     ++current_index;
-    if (current_index >= (int)history_stack.size()) {
-      current_index = (int)history_stack.size();
+    if (current_index >= static_cast<int>(history_stack.size())) {
+      current_index = static_cast<int>(history_stack.size());
       out.clear();
       return false; // signal: restore blank input
     }
@@ -143,7 +148,7 @@ class InputHistory {
 
   /** Reset navigation position without modifying the stack. */
   void reset_nav() {
-    current_index = (int)history_stack.size();
+    current_index = static_cast<int>(history_stack.size());
   }
 
   /**
@@ -157,7 +162,7 @@ class InputHistory {
     while (std::getline(f, line)) {
       if (!line.empty()) history_stack.push_back(line);
     }
-    current_index = (int)history_stack.size();
+    current_index = static_cast<int>(history_stack.size());
   }
 
   /**
@@ -174,8 +179,8 @@ class InputHistory {
     if (!f) return;
 
     static constexpr int MAX_PERSIST = 500;
-    int start = std::max(0, (int)history_stack.size() - MAX_PERSIST);
-    for (int i = start; i < (int)history_stack.size(); ++i) {
+    int start = std::max(0, static_cast<int>(history_stack.size()) - MAX_PERSIST);
+    for (int i = start; i < static_cast<int>(history_stack.size()); ++i) {
       // Escape embedded newlines so each entry stays on one line.
       for (char c : history_stack[i]) {
         if (c == '\n') f << "\\n";
@@ -325,7 +330,9 @@ static void log_close() {
 
 static void log_write(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
 static void log_write(const char *fmt, ...) {
-  if (!g_logfile) return;
+  if (!g_logfile) {
+    return;
+  }
   // timestamp
   time_t t = time(nullptr);
   char ts[32];
@@ -336,9 +343,86 @@ static void log_write(const char *fmt, ...) {
   vfprintf(g_logfile, fmt, ap);
   va_end(ap);
   fputc('\n', g_logfile);
-  fflush(g_logfile);  // flush immediately so tail -f works
+  // flush immediately so tail -f works
+  fflush(g_logfile);
+}
+
+//
+// Agent uniqueId
+//
+inline std::string encode_base64(const std::vector<char>& data) {
+  static const char base64_chars[] =
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+
+  std::string encoded;
+  encoded.reserve((data.size() + 2) / 3 * 4);
+
+  size_t i = 0;
+  while (i < data.size()) {
+    uint32_t val = static_cast<uint32_t>(data[i] << 16) |
+      (i + 1 < data.size() ? static_cast<uint32_t>(data[i+1]) << 8 : 0) |
+      (i + 2 < data.size() ? static_cast<uint32_t>(data[i+2]) : 0);
+
+    encoded.push_back(base64_chars[(val >> 18) & 0x3F]);
+    encoded.push_back(base64_chars[(val >> 12) & 0x3F]);
+    encoded.push_back((i + 1 < data.size()) ? base64_chars[(val >> 6) & 0x3F] : '=');
+    encoded.push_back((i + 2 < data.size()) ? base64_chars[val & 0x3F] : '=');
+    i += 3;
+  }
+  return encoded;
 }
 
+class AgentSessionId {
+  public:
+  // Static method: Generates ID once, then returns it
+  static std::string uniqueId() {
+    // Yoda condition: static variable initialized only once
+    static std::string s_id;
+
+    if (s_id.empty()) {
+      // 1. Get high-resolution timestamp (nanoseconds since epoch)
+      auto now = std::chrono::steady_clock::now();
+      auto nanos = std::chrono::duration_cast<std::chrono::nanoseconds>(now.time_since_epoch()).count();
+
+      // 2. Generate 48 bits of randomness
+      std::random_device rd;
+      std::mt19937_64 rng(rd());
+      std::uniform_int_distribution<uint64_t> dist(0, UINT64_MAX);
+
+      // Fill with random bytes
+      std::array<char, 6> random_bytes;
+      for (auto& b : random_bytes) {
+        b = static_cast<char>(dist(rng) & 0xFF);
+
+      }
+
+      // 3. Combine timestamp (48 bits) and random (48 bits) into a 96-bit integer
+      std::vector<char> data;
+      data.reserve(12); // 96 bits = 12 bytes
+
+      // Pack timestamp (upper 48 bits)
+      data.push_back(static_cast<char>((nanos >> 40) & 0xFF));
+      data.push_back(static_cast<char>((nanos >> 32) & 0xFF));
+      data.push_back(static_cast<char>((nanos >> 24) & 0xFF));
+      data.push_back(static_cast<char>((nanos >> 16) & 0xFF));
+      data.push_back(static_cast<char>((nanos >> 8) & 0xFF));
+      data.push_back(static_cast<char>(nanos & 0xFF));
+
+      // Pack random (lower 48 bits)
+      data.push_back(static_cast<char>((dist(rng) >> 40) & 0xFF));
+      data.push_back(static_cast<char>((dist(rng) >> 32) & 0xFF));
+      data.push_back(static_cast<char>((dist(rng) >> 24) & 0xFF));
+      data.push_back(static_cast<char>((dist(rng) >> 16) & 0xFF));
+      data.push_back(static_cast<char>((dist(rng) >> 8) & 0xFF));
+      data.push_back(static_cast<char>(dist(rng) & 0xFF));
+
+      // 4. Encode to Base64
+      s_id = encode_base64(data);
+    }
+    return s_id;
+  }
+};
+
 //
 // handling for strip_code_fences
 //
@@ -470,6 +554,7 @@ static void load_settings(NitroConfig &cfg) {
   std::string json = oss.str();
 
   cfg.thinking = true;
+  cfg.agent_id = AgentSessionId::uniqueId();
 
   // String fields
   settings_get_str(json, "model_path",  cfg.model_path);
@@ -741,14 +826,13 @@ static bool make_dir(const std::string &path) {
 //
 // System prompt
 //
-static std::string build_system_prompt(const std::vector<std::string> &knowledge_files,
-                                       const std::string &sandbox) {
+static std::string build_system_prompt(NitroConfig &cfg) {
   std::string p;
   p +=
     "You are Nitro, an agentic AI assistant for software development. "
     "Proceed with caution, guided by logic and the pursuit of knowledge.\n\n"
 
-    "Your sandbox (project directory) is: " + sandbox + "\n\n"
+    "Your sandbox (project directory) is: " + cfg.sandbox + "\n\n"
 
     "## Core Principle\n"
     "Always follow this loop: THINK → DECIDE → ACT → RESPOND\n\n"
@@ -794,6 +878,7 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
     "  TOOL:TIME                  current time\n"
     "  TOOL:RND                   random float 0..1\n"
     "  TOOL:RAG    <query>        query the RAG index for additional context\n"
+    "  TOOL:ASK    <query>        ask the user for clarification or additional context\n"
     "  TOOL:INTROSPECT            show current model settings\n"
     "  TOOL:CURL   <url>          HTTP GET, returns response body (max 32 KB)\n"
     "  TOOL:PERMISSION            ask user for explicit permission\n\n"
@@ -829,7 +914,7 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
     "- After each tool result, explain in plain English what was done\n"
     "- If no user request is provided, respond with a brief readiness message\n\n";
 
-  for (const auto &kf : knowledge_files) {
+  for (const auto &kf : cfg.knowledge_files) {
     std::ifstream f(kf);
     if (!f) continue;
     std::ostringstream oss; oss << f.rdbuf();
@@ -841,16 +926,23 @@ static std::string build_system_prompt(const std::vector<std::string> &knowledge
 static std::string strip_code_fences(const std::string &filename,
                                      const std::string &src) {
   auto ext = fs::path(filename).extension().string();
-  bool is_code = ranges::any_of(CODE_EXTENSIONS,
-                                [&](const std::string &e){ return ext == e; });
-  if (!is_code) return src;
+  bool is_code = ranges::any_of(CODE_EXTENSIONS, [&](const std::string &e){ return ext == e; });
+  if (!is_code) {
+    return unwrap(src);
+  }
   auto pos = src.find("```");
-  if (pos == std::string::npos) return src;
+  if (pos == std::string::npos) {
+    return unwrap(src);
+  }
   auto nl = src.find('\n', pos + 3);
-  if (nl == std::string::npos) return src;
+  if (nl == std::string::npos) {
+    return unwrap(src);
+  }
   std::string inner = src.substr(nl + 1);
   auto end = inner.rfind("```");
-  if (end != std::string::npos) inner = inner.substr(0, end);
+  if (end != std::string::npos) {
+    inner = inner.substr(0, end);
+  }
   return inner;
 }
 
@@ -1097,11 +1189,11 @@ void TuiState::destroy() {
 
 void TuiState::resize() {
   notcurses_term_dim_yx(nc, (unsigned *)&term_rows, (unsigned *)&term_cols);
-  ncplane_resize_simple(header,  1,                       (unsigned)term_cols);
+  ncplane_resize_simple(header, 1, (unsigned)term_cols);
   int cr = std::max(1, term_rows - 3);
-  ncplane_resize_simple(chatpl,  (unsigned)cr,            (unsigned)term_cols);
+  ncplane_resize_simple(chatpl, (unsigned)cr, (unsigned)term_cols);
   ncplane_move_yx(inputpl, term_rows - 2, 0);
-  ncplane_resize_simple(inputpl, 2,                       (unsigned)term_cols);
+  ncplane_resize_simple(inputpl, 2, (unsigned)term_cols);
   redraw_all();
 }
 
@@ -1133,8 +1225,8 @@ void TuiState::redraw_chat() {
   unsigned rows, cols;
   ncplane_dim_yx(chatpl, &rows, &cols);
   std::lock_guard<std::mutex> lk(lines_mutex);
-  int total   = (int)chat_lines.size();
-  int visible = (int)rows;
+  int total   = static_cast<int>(chat_lines.size());
+  int visible = static_cast<int>(rows);
   int start   = std::max(0, total - visible - scroll_offset);
   int end     = std::min(total, start + visible);
   for (int i = start, row = 0; i < end; ++i, ++row) {
@@ -1154,10 +1246,11 @@ void TuiState::redraw_chat() {
     }
     else if (line.rfind("You: ",   0) == 0) ch = chat_ch(100, 200, 255);
     else if (line.rfind("Nitro: ", 0) == 0) ch = chat_ch(180, 255, 180);
-    else if (line.rfind("[tool]",  0) == 0) ch = chat_ch(255, 180,  80);
-    else if (line.rfind("[err]",   0) == 0) ch = chat_ch(255,  80,  80);
+    else if (line.rfind("[🔧]",  0) == 0) ch = chat_ch(255, 180,  80);
+    else if (line.rfind("[⚠]",   0) == 0) ch = chat_ch(255,  80,  80);
     else if (line.rfind("[sys]",   0) == 0) ch = chat_ch(140, 140, 200);
-    else                                     ch = chat_ch(210, 210, 210);
+    else if (line.rfind("[🤔]",    0) == 0) ch = chat_ch(140, 140, 200);
+    else                                    ch = chat_ch(210, 210, 210);
     ncplane_set_channels(chatpl, ch);
     // Strip the [logo_N] prefix before rendering.
     std::string display = (line.rfind("[logo_", 0) == 0 && line.size() > 8)
@@ -1186,7 +1279,7 @@ void TuiState::redraw_input() const {
       int frame = spinner_frame - DELAY;  // animation frame relative to start
       for (int col = 0; col < term_cols; ++col) {
         double phase = (col * FREQ) - (frame * SPEED);
-        int idx = (int)((std::sin(phase) + 1.0) * 0.5 * (N_BLOCKS - 1));
+        int idx = static_cast<int>(((std::sin(phase) + 1.0) * 0.5 * (N_BLOCKS - 1)));
         idx = std::max(0, std::min(idx, N_BLOCKS - 1));
         // subtle brightness shift — blue-grey, not full glow
         int brightness = 80 + idx * 20;
@@ -1208,11 +1301,11 @@ void TuiState::redraw_input() const {
     int max_w = std::max(0, term_cols - prompt_cols - 1);
     std::string visible = input_buf;
     int view_offset = 0;
-    if ((int)visible.size() > max_w && max_w > 0) {
-      view_offset = (int)visible.size() - max_w;
+    if (visible.size() > max_w && max_w > 0) {
+      view_offset = static_cast<int>(visible.size() - max_w);
       visible = visible.substr(view_offset);
     }
-    int cur_in_view = std::max(0, (int)cursor_pos - view_offset);
+    int cur_in_view = std::max(0, static_cast<int>(cursor_pos - view_offset));
     cur_in_view = std::min(cur_in_view, (int)visible.size());
     std::string before = visible.substr(0, cur_in_view);
     std::string after  = cur_in_view < (int)visible.size()
@@ -1779,7 +1872,7 @@ bool AgentState::setup_model(const NitroConfig &cfg, TuiState &tui) {
   if (!llama->load_model(cfg.model_path, cfg.n_ctx, cfg.n_batch,
                          cfg.n_gpu_layers, cfg.log_level)) {
     tui.dismiss_modal_popup();
-    tui.append_line(std::string("[err] ") + llama->last_error());
+    tui.append_line(std::string("[⚠] ") + llama->last_error());
     tui.redraw_all();
     return false;
   }
@@ -1805,7 +1898,7 @@ bool AgentState::setup_embed(const std::string &path, TuiState &tui) {
   embed_llama = std::make_unique<Llama>();
   if (!embed_llama->load_embedding_model(path)) {
     tui.dismiss_modal_popup();
-    tui.append_line(std::string("[err] ") + embed_llama->last_error());
+    tui.append_line(std::string("[⚠] ") + embed_llama->last_error());
     tui.redraw_all();
     embed_llama.reset();
     return false;
@@ -1824,7 +1917,7 @@ void AgentState::reset_conversation(const std::string &sysprompt, TuiState &tui)
   apply_generation_params(NitroConfig{});
   iter = std::make_unique<LlamaIter>();
   if (!llama->add_message(*iter, "system", system_prompt)) {
-    tui.append_line(std::string("[err] System prompt injection: ") + llama->last_error());
+    tui.append_line(std::string("[⚠] System prompt injection: ") + llama->last_error());
     tui.redraw_all();
   }
 }
@@ -1868,7 +1961,7 @@ std::string AgentState::rag_tool(const NitroConfig &cfg, const std::string &agen
 
 bool AgentState::rag_load_index(const std::string &path, TuiState &tui) const {
   if (!embed_llama || !rag_db) {
-    tui.append_line("[err] Load an embedding model first: /embed <path>");
+    tui.append_line("[⚠] Load an embedding model first: /embed <path>");
     tui.redraw_all();
     return false;
   }
@@ -1883,7 +1976,7 @@ bool AgentState::rag_load_index(const std::string &path, TuiState &tui) const {
 
 bool AgentState::rag_index(const std::string &path, const NitroConfig &cfg, TuiState &tui) const {
   if (!embed_llama || !rag_db) {
-    tui.append_line("[err] Load an embedding model first: /embed <path>");
+    tui.append_line("[⚠] Load an embedding model first: /embed <path>");
     tui.redraw_all();
     return false;
   }
@@ -1892,7 +1985,7 @@ bool AgentState::rag_index(const std::string &path, const NitroConfig &cfg, TuiS
     tui.append_line("[sys]   indexing: " + filepath);
     tui.redraw_all();
     if (!embed_llama->rag_index(*rag_db, filepath)) {
-      tui.append_line(std::string("[err] rag_load: ") + embed_llama->last_error());
+      tui.append_line(std::string("[⚠] rag_load: ") + embed_llama->last_error());
       tui.redraw_all();
     }
   };
@@ -1957,64 +2050,81 @@ std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &
     return join_path(sandbox, unwrap(p));
   };
 
-  tui.append_line("[tool] → " + op);
-  tui.redraw_all();
+  auto show_tool = [&](const std::string &tool) -> void {
+    tui.append_line("[🔧] → " + tool);
+    tui.redraw_all();
+  };
 
   if (op == "TOOL:DATE") {
+    show_tool(op);
     char buf[32]; time_t t = time(nullptr);
     strftime(buf, sizeof(buf), "%Y-%m-%d", localtime(&t));
     return buf;
   }
   if (op == "TOOL:TIME") {
+    show_tool(op);
     char buf[32]; time_t t = time(nullptr);
     strftime(buf, sizeof(buf), "%H:%M:%S", localtime(&t));
     return buf;
   }
   if (op == "TOOL:RND") {
+    show_tool(op);
     return std::to_string((double)rand() / RAND_MAX);
   }
   if (op == "TOOL:RAG") {
+    show_tool(op);
     return rag_tool(cfg, arg1);
   }
   if (op == "TOOL:LIST") {
     std::string dir = resolve(arg1);
+    show_tool("listing: " + dir);
     if (!path_in_sandbox(sandbox, dir)) return "ERROR: path outside sandbox";
     return list_dir(dir);
   }
   if (op == "TOOL:EXISTS") {
     std::string p = resolve(arg1);
+    show_tool("checking: " + p);
     if (!path_in_sandbox(sandbox, p)) return "NO";
     return fs::exists(p) ? "YES" : "NO";
   }
   if (op == "TOOL:READ") {
+    show_tool("reading: " + arg1);
     std::string p = resolve(arg1);
     if (!path_in_sandbox(sandbox, p)) return "ERROR: path outside sandbox";
     return read_file(p);
   }
   if (op == "TOOL:WRITE") {
+    show_tool("writing: " + arg1);
     std::string p = resolve(arg1);
     if (!path_in_sandbox(sandbox, p)) {
       return "ERROR: path outside sandbox";
     }
-    if (!tui.confirm_dialog(std::format("Allow model to write {}?", p))) {
+    if (cfg.permission_prompt && !tui.confirm_dialog(std::format("Allow model to write {}?", p))) {
       return "ERROR: action prevented by user";
     }
-    std::string content = unwrap(strip_code_fences(arg1, arg2));
+    std::string content = strip_code_fences(arg1, arg2);
     return write_file(p, content) ? "OK: written to " + arg1 : "ERROR: write failed for " + arg1;
   }
   if (op == "TOOL:MKDIR") {
     std::string p = resolve(arg1);
+    show_tool("mkdir: " + arg1);
     if (!path_in_sandbox(sandbox, p)) {
       return "ERROR: path outside sandbox";
     }
     return make_dir(p) ? "OK: created " + arg1 : "ERROR: mkdir failed for " + arg1;
   }
   if (op == "TOOL:CURL") {
+    show_tool("curl: " + arg1);
     return tool_curl(arg1);
   }
   if (op == "TOOL:INTROSPECT") {
+    show_tool("introspecting: " + arg1);
     return introspect(cfg);
   }
+  if (op == "TOOL:ASK") {
+    show_tool("asking: " + arg1);
+    return tui.readline_blocking();
+  }
   if (op == "TOOL:RUN") {
     if (!run_allowed.empty()) {
       bool permitted = ranges::any_of(run_allowed, [&](const std::string &a) {return a == arg1;});
@@ -2022,10 +2132,11 @@ std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &
         return "ERROR: '" + arg1 + "' is not in the TOOL:RUN allowlist. "
           "Use /set run_allowed <name> to permit it.";
       }
-    } else if (!tui.confirm_dialog(std::format("Allow {} {} to run?", arg1, arg2))) {
+    } else if (cfg.permission_prompt && !tui.confirm_dialog(std::format("Allow {} {} to run?", arg1, arg2))) {
       return "ERROR: prevented by user";
     }
     std::string command = arg1 + " " + arg2 + " 2>&1";
+    show_tool("running: " + command);
     FILE *fp = popen(command.c_str(), "r");
     if (!fp) {
       return "ERROR: popen failed";
@@ -2049,7 +2160,7 @@ std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &
 //
 bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cfg, TuiState &tui) const {
   if (!model_loaded) {
-    tui.append_line("[err] No model loaded. Use /model <path>");
+    tui.append_line("[⚠] No model loaded. Use /model <path>");
     tui.redraw_all();
     return false;
   }
@@ -2064,12 +2175,12 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
     }
   }
   if (!iter) {
-    tui.append_line("[err] Conversation not initialised (call /clear to reset)");
+    tui.append_line("[⚠] Conversation not initialised (call /clear to reset)");
     tui.redraw_all();
     return false;
   }
   if (!llama->add_message(*iter, "user", effective_message)) {
-    tui.append_line(std::string("[err] add_message: ") + llama->last_error());
+    tui.append_line(std::string("[⚠] add_message: ") + llama->last_error());
     tui.redraw_all();
     return false;
   }
@@ -2079,7 +2190,7 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
   // visible text immediately.  The spinner activates only while thinking.
   enum {t_init, t_think, t_thunk} think_mode = (cfg.thinking ? t_init : t_thunk);
 
-  tui.set_thinking(false);
+  tui.set_thinking(true);
   std::string buffer;
 
   auto invoke_tool = [&](const std::string &buffer, const std::string_view template_str) -> void {
@@ -2102,14 +2213,14 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
     std::string result = process_tool(tool, cfg, tui);
     std::string content = TOOL_RESULT + std::vformat(template_str, std::make_format_args(result));
     log_write("tool: [%s] result: [%s]", tool.c_str(), result.c_str());
+
     if (!llama->add_message(*iter, "tool_result", content)) {
-      tui.append_line(std::string("[err] tool result inject: ") + llama->last_error());
-      tui.redraw_all();
+      tui.append_line(std::string("[⚠] tool result inject: ") + llama->last_error());
     }
     if (!iter->_has_next) {
-      tui.append_line(std::string("[err] failed to evoke tool response: ") + llama->last_error());
-      tui.redraw_all();
+      tui.append_line(std::string("[⚠] failed to evoke tool response: ") + llama->last_error());
     }
+    tui.redraw_all();
   };
 
   auto start_think = [&](const std::string &tag) {
@@ -2117,7 +2228,6 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
       auto pos = buffer.find(tag);
       if (pos != std::string::npos) {
         think_mode = t_think;
-        tui.set_thinking(true);
         // display prededing text
         buffer = buffer.substr(0, pos);
       }
@@ -2129,7 +2239,6 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
       auto pos = buffer.find(tag);
       if (pos != std::string::npos) {
         think_mode = t_thunk;
-        tui.set_thinking(false);
         // display remaining text
         buffer = buffer.substr(pos + tag.length());
       }
@@ -2141,9 +2250,9 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
     notcurses_get_nblock(tui.nc, &ni);
     if (ni.id == NCKEY_ESC) {
       tui.set_thinking(false);
-      tui.append_line("[err] Generation cancelled by user (Escape)");
+      tui.append_line("[⚠] Generation cancelled by user (Escape)");
       tui.redraw_all();
-      return false;
+      break;
     }
     std::string tok = llama->next(*iter);
     if (tok == "<") {
@@ -2154,7 +2263,6 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
       }
       if (tag == "<|think|>") {
         think_mode = t_think;
-        tui.set_thinking(true);
         continue;
       } else {
         buffer += tag;
@@ -2173,6 +2281,7 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
       tui.tick_spinner();
       end_think("</think>");
       end_think("</|think|>");
+      end_think("</|think>");
       end_think("<think|>");
       end_think("<channel|>");
     }
@@ -2207,6 +2316,12 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
         tui.append_token(buffer.substr(0, pos + 1));
         buffer = buffer.substr(pos + 1);
       }
+    } else {
+      auto pos = buffer.find('\n');
+      if (pos != std::string::npos) {
+        tui.append_token("[🤔] " + buffer.substr(0, pos + 1));
+        buffer = buffer.substr(pos + 1);
+      }
     }
   }
 
@@ -2267,7 +2382,7 @@ static void handle_slash(const std::string &input,
     }
     cfg.model_path = rest;
     if (agent.setup_model(cfg, tui)) {
-      std::string sysp = build_system_prompt(cfg.knowledge_files, cfg.sandbox);
+      std::string sysp = build_system_prompt(cfg);
       agent.reset_conversation(sysp, tui);
       save_settings(cfg);
     }
@@ -2333,7 +2448,7 @@ static void handle_slash(const std::string &input,
   if (verb == "/clear") {
     { std::lock_guard<std::mutex> lk(tui.lines_mutex);
       tui.chat_lines.clear(); }
-    std::string sysp = build_system_prompt(cfg.knowledge_files, cfg.sandbox);
+    std::string sysp = build_system_prompt(cfg);
     agent.reset_conversation(sysp, tui);
     tui.append_line("[sys] Conversation cleared.");
     tui.redraw_all();
@@ -2368,7 +2483,7 @@ static void handle_slash(const std::string &input,
     val.erase(0, val.find_first_not_of(" \t"));
 
     if (key.empty() || val.empty()) {
-      tui.append_line("[err] Usage: /set <key> <value>");
+      tui.append_line("[⚠] Usage: /set <key> <value>");
       tui.redraw_all(); return;
     }
 
@@ -2406,11 +2521,11 @@ static void handle_slash(const std::string &input,
           tui.append_line("[sys] run_allowed: " + list);
         }
       } else {
-        tui.append_line("[err] Unknown key '" + key + "'.  Try /help for list.");
+        tui.append_line("[⚠] Unknown key '" + key + "'.  Try /help for list.");
         ok = false;
       }
     } catch (const std::exception &ex) {
-      tui.append_line(std::string("[err] /set: ") + ex.what());
+      tui.append_line(std::string("[⚠] /set: ") + ex.what());
       ok = false;
     }
 
@@ -2425,7 +2540,7 @@ static void handle_slash(const std::string &input,
     return;
   }
 
-  tui.append_line("[err] Unknown command: " + verb + "  (try /help)");
+  tui.append_line("[⚠] Unknown command: " + verb + "  (try /help)");
   tui.redraw_all();
 }
 
@@ -2488,6 +2603,8 @@ int main(int argc, char **argv) {
       log_open();
     } else if (a == "-t" || a == "--think") {
       cfg.thinking = false;
+    } else if (a == "-p" || a == "--prompt-permission") {
+      cfg.permission_prompt = true;
     } else if (a == "-h" || a == "--help") {
       std::puts("Usage: nitro [options] [project_dir]\n"
                 "\n"
@@ -2550,7 +2667,7 @@ int main(int argc, char **argv) {
   AgentState agent;
   if (!cfg.model_path.empty()) {
     if (agent.setup_model(cfg, tui)) {
-      std::string sysp = build_system_prompt(cfg.knowledge_files, cfg.sandbox);
+      std::string sysp = build_system_prompt(cfg);
       agent.reset_conversation(sysp, tui);
     }
     if (!cfg.embed_path.empty()) {

From a8183844687f619ee0780d2f315cc22686ff33c5 Mon Sep 17 00:00:00 2001
From: Chris Warren-Smith <chrisws@redsilver>
Date: Tue, 2 Jun 2026 17:40:56 +0930
Subject: [PATCH 54/54] LLAMA: nitro - huge max-tokens for generating large
 files. update icons

---
 llama/llama-sb.cpp |   5 ++
 llama/nitro.cpp    | 211 ++++++++++++++++++++++++---------------------
 2 files changed, 118 insertions(+), 98 deletions(-)

diff --git a/llama/llama-sb.cpp b/llama/llama-sb.cpp
index c28b189..0c26712 100644
--- a/llama/llama-sb.cpp
+++ b/llama/llama-sb.cpp
@@ -624,6 +624,11 @@ string Llama::token_to_string(LlamaIter &iter, llama_token tok) {
 
     result.append(buf, n);
 
+    // detect end of max-tokens
+    if (++iter._tokens_generated > _max_tokens) {
+      iter._has_next = false;
+    }
+
     // detect stop words
     if (iter._has_next) {
       for (const auto &stop : _stop_sequences) {
diff --git a/llama/nitro.cpp b/llama/nitro.cpp
index 21d6cef..5a5f45a 100644
--- a/llama/nitro.cpp
+++ b/llama/nitro.cpp
@@ -74,7 +74,6 @@ struct NitroConfig {
   int   n_ctx          = 65536;
   int   n_batch        = 512;
   int   n_gpu_layers   = 32;
-  int   n_max_tokens   = 4096;
   int   log_level      = GGML_LOG_LEVEL_CONT;
   float temperature    = 0.6f;
   float top_p          = 0.95f;
@@ -565,7 +564,6 @@ static void load_settings(NitroConfig &cfg) {
   settings_get_int(json, "n_ctx",          cfg.n_ctx);
   settings_get_int(json, "n_batch",        cfg.n_batch);
   settings_get_int(json, "n_gpu_layers",   cfg.n_gpu_layers);
-  settings_get_int(json, "n_max_tokens",   cfg.n_max_tokens);
   settings_get_int(json, "top_k",          cfg.top_k);
   settings_get_int(json, "penalty_last_n", cfg.penalty_last_n);
   settings_get_int(json, "rag_top_k",      cfg.rag_top_k);
@@ -577,6 +575,14 @@ static void load_settings(NitroConfig &cfg) {
   settings_get_float(json, "penalty_repeat", cfg.penalty_repeat);
 }
 
+//
+// icons
+//
+static constexpr std::string ICON_ERR   = " ⚡ ▏";
+static constexpr std::string ICON_THINK = " 🤔 ▏";
+static constexpr std::string ICON_TOOL  = " 🔧 ▏";
+static constexpr std::string ICON_SYS   = " 🤖 ▏";
+
 static std::string introspect(const NitroConfig &cfg) {
   static constexpr std::string_view tmpl =
     "{{\n"
@@ -586,7 +592,6 @@ static std::string introspect(const NitroConfig &cfg) {
     "  \"n_ctx\":          {},\n"
     "  \"n_batch\":        {},\n"
     "  \"n_gpu_layers\":   {},\n"
-    "  \"n_max_tokens\":   {},\n"
     "  \"temperature\":    {},\n"
     "  \"top_p\":          {},\n"
     "  \"min_p\":          {},\n"
@@ -602,7 +607,6 @@ static std::string introspect(const NitroConfig &cfg) {
                      cfg.n_ctx,
                      cfg.n_batch,
                      cfg.n_gpu_layers,
-                     cfg.n_max_tokens,
                      cfg.temperature,
                      cfg.top_p,
                      cfg.min_p,
@@ -932,11 +936,11 @@ static std::string strip_code_fences(const std::string &filename,
   }
   auto pos = src.find("```");
   if (pos == std::string::npos) {
-    return unwrap(src);
+    return src;
   }
   auto nl = src.find('\n', pos + 3);
   if (nl == std::string::npos) {
-    return unwrap(src);
+    return src;
   }
   std::string inner = src.substr(nl + 1);
   auto end = inner.rfind("```");
@@ -1244,13 +1248,13 @@ void TuiState::redraw_chat() {
       int gi = std::max(0, std::min(logo_row, 6));
       ch = chat_ch(GRAD_R[gi], GRAD_G[gi], GRAD_B[gi]);
     }
-    else if (line.rfind("You: ",   0) == 0) ch = chat_ch(100, 200, 255);
-    else if (line.rfind("Nitro: ", 0) == 0) ch = chat_ch(180, 255, 180);
-    else if (line.rfind("[🔧]",  0) == 0) ch = chat_ch(255, 180,  80);
-    else if (line.rfind("[⚠]",   0) == 0) ch = chat_ch(255,  80,  80);
-    else if (line.rfind("[sys]",   0) == 0) ch = chat_ch(140, 140, 200);
-    else if (line.rfind("[🤔]",    0) == 0) ch = chat_ch(140, 140, 200);
-    else                                    ch = chat_ch(210, 210, 210);
+    else if (line.rfind("You: ",   0) == 0)  ch = chat_ch(100, 200, 255);
+    else if (line.rfind("Nitro: ", 0) == 0)  ch = chat_ch(180, 255, 180);
+    else if (line.rfind(ICON_SYS,  0) == 0)  ch = chat_ch(140, 140, 200);
+    else if (line.rfind(ICON_TOOL, 0) == 0)  ch = chat_ch(255, 180,  80);
+    else if (line.rfind(ICON_ERR,  0) == 0)  ch = chat_ch(255,  80,  80);
+    else if (line.rfind(ICON_THINK, 0) == 0) ch = chat_ch(140, 140, 200);
+    else                                     ch = chat_ch(210, 210, 210);
     ncplane_set_channels(chatpl, ch);
     // Strip the [logo_N] prefix before rendering.
     std::string display = (line.rfind("[logo_", 0) == 0 && line.size() > 8)
@@ -1453,20 +1457,20 @@ void TuiState::show_modal_popup(const std::string &message) {
 }
 
 void TuiState::show_help() {
-  append_line("[sys] Commands:");
-  append_line("[sys]   /model  [path]           load a GGUF model (picker if no path)");
-  append_line("[sys]   /embed  [path]           load an embedding model (picker if no path)");
-  append_line("[sys]   /rag    [path]           index file or directory (picker if no path)");
-  append_line("[sys]   /memory                  KV / VRAM / layer stats");
-  append_line("[sys]   /clear                   reset conversation");
-  append_line("[sys]   /settings                show current settings");
-  append_line("[sys]   /set    <key> <value>    change a setting live");
-  append_line("[sys]   /help                    this message");
-  append_line("[sys]   exit / quit              exit Nitro");
-  append_line("[sys] Settable keys (via /set):");
-  append_line("[sys]   temperature  top_p  top_k  min_p  penalty_repeat");
-  append_line("[sys]   n_max_tokens  penalty_last_n  rag_top_k  n_gpu_layers");
-  append_line("[sys]   run_allowed  (comma-separated list, e.g. python3,make)");
+  append_line(ICON_SYS + "Commands:");
+  append_line(ICON_SYS + "  /model  [path]           load a GGUF model (picker if no path)");
+  append_line(ICON_SYS + "  /embed  [path]           load an embedding model (picker if no path)");
+  append_line(ICON_SYS + "  /rag    [path]           index file or directory (picker if no path)");
+  append_line(ICON_SYS + "  /memory                  KV / VRAM / layer stats");
+  append_line(ICON_SYS + "  /clear                   reset conversation");
+  append_line(ICON_SYS + "  /settings                show current settings");
+  append_line(ICON_SYS + "  /set    <key> <value>    change a setting live");
+  append_line(ICON_SYS + "  /help                    this message");
+  append_line(ICON_SYS + "  exit / quit              exit Nitro");
+  append_line(ICON_SYS + "Settable keys (via /set):");
+  append_line(ICON_SYS + "  temperature  top_p  top_k  min_p  penalty_repeat");
+  append_line(ICON_SYS + "  penalty_last_n  rag_top_k  n_gpu_layers");
+  append_line(ICON_SYS + "  run_allowed  (comma-separated list, e.g. python3,make)");
   redraw_all();
 }
 
@@ -1840,7 +1844,7 @@ std::string TuiState::readline_blocking() {
 void AgentState::apply_generation_params(const NitroConfig &cfg) const {
   llama->add_stop("<|turn|>");
   llama->add_stop("<|im_end|>");
-  llama->set_max_tokens(cfg.n_max_tokens);
+  llama->set_max_tokens(512000);
   llama->set_temperature(cfg.temperature);
   llama->set_top_k(cfg.top_k);
   llama->set_top_p(cfg.top_p);
@@ -1855,7 +1859,7 @@ void AgentState::apply_generation_params(const NitroConfig &cfg) const {
 //
 bool AgentState::setup_model(const NitroConfig &cfg, TuiState &tui) {
   if (cfg.model_path.empty()) {
-    tui.append_line("[sys] No model loaded.  Use /model <path> to load a GGUF.");
+    tui.append_line(ICON_SYS + "No model loaded.  Use /model <path> to load a GGUF.");
     tui.redraw_all();
     return false;
   }
@@ -1872,22 +1876,21 @@ bool AgentState::setup_model(const NitroConfig &cfg, TuiState &tui) {
   if (!llama->load_model(cfg.model_path, cfg.n_ctx, cfg.n_batch,
                          cfg.n_gpu_layers, cfg.log_level)) {
     tui.dismiss_modal_popup();
-    tui.append_line(std::string("[⚠] ") + llama->last_error());
+    tui.append_line(ICON_ERR + llama->last_error());
     tui.redraw_all();
     return false;
   }
   tui.dismiss_modal_popup();
   model_loaded = true;
   tui.current_model = model_name;
-  tui.append_line("[sys] Model ready: " + tui.current_model);
+  tui.append_line(ICON_SYS + "Model ready: " + tui.current_model);
   LlamaMemoryInfo mem = llama->memory_info();
-  tui.append_line("[sys] " + mem.advice);
+  tui.append_line(ICON_SYS + "" + mem.advice);
   tui.kv_used  = mem.kv_used;
   tui.kv_total = mem.kv_total;
   tui.vram_used  = mem.vram_used;
   tui.vram_total = mem.vram_total;
-
-  tui.append_line(std::string("[sys] Thinking mode: ") + (cfg.thinking ? "enabled" : "disabled"));
+  tui.append_line(ICON_SYS + "Thinking mode: " + (cfg.thinking ? "enabled" : "disabled"));
   tui.redraw_all();
   return true;
 }
@@ -1898,7 +1901,7 @@ bool AgentState::setup_embed(const std::string &path, TuiState &tui) {
   embed_llama = std::make_unique<Llama>();
   if (!embed_llama->load_embedding_model(path)) {
     tui.dismiss_modal_popup();
-    tui.append_line(std::string("[⚠] ") + embed_llama->last_error());
+    tui.append_line(ICON_ERR + embed_llama->last_error());
     tui.redraw_all();
     embed_llama.reset();
     return false;
@@ -1906,7 +1909,7 @@ bool AgentState::setup_embed(const std::string &path, TuiState &tui) {
   tui.dismiss_modal_popup();
   rag_db      = std::make_unique<RagDB>();
   rag_session = std::make_unique<RagSession>();
-  tui.append_line("[sys] Embedding model ready.");
+  tui.append_line(ICON_SYS + "Embedding model ready.");
   tui.redraw_all();
   return true;
 }
@@ -1917,7 +1920,7 @@ void AgentState::reset_conversation(const std::string &sysprompt, TuiState &tui)
   apply_generation_params(NitroConfig{});
   iter = std::make_unique<LlamaIter>();
   if (!llama->add_message(*iter, "system", system_prompt)) {
-    tui.append_line(std::string("[⚠] System prompt injection: ") + llama->last_error());
+    tui.append_line(ICON_ERR + "System prompt injection: " + llama->last_error());
     tui.redraw_all();
   }
 }
@@ -1961,13 +1964,13 @@ std::string AgentState::rag_tool(const NitroConfig &cfg, const std::string &agen
 
 bool AgentState::rag_load_index(const std::string &path, TuiState &tui) const {
   if (!embed_llama || !rag_db) {
-    tui.append_line("[⚠] Load an embedding model first: /embed <path>");
+    tui.append_line(ICON_ERR + "Load an embedding model first: /embed <path>");
     tui.redraw_all();
     return false;
   }
 
   if (!rag_db->load(path)) {
-    tui.append_line("[sys] failed to load");
+    tui.append_line(ICON_SYS + "failed to load");
     tui.redraw_all();
   }
 
@@ -1976,16 +1979,16 @@ bool AgentState::rag_load_index(const std::string &path, TuiState &tui) const {
 
 bool AgentState::rag_index(const std::string &path, const NitroConfig &cfg, TuiState &tui) const {
   if (!embed_llama || !rag_db) {
-    tui.append_line("[⚠] Load an embedding model first: /embed <path>");
+    tui.append_line(ICON_ERR + "Load an embedding model first: /embed <path>");
     tui.redraw_all();
     return false;
   }
 
   auto index_one = [&](const std::string &filepath) {
-    tui.append_line("[sys]   indexing: " + filepath);
+    tui.append_line(ICON_SYS + "  indexing: " + filepath);
     tui.redraw_all();
     if (!embed_llama->rag_index(*rag_db, filepath)) {
-      tui.append_line(std::string("[⚠] rag_load: ") + embed_llama->last_error());
+      tui.append_line(ICON_ERR + "rag_load: " + embed_llama->last_error());
       tui.redraw_all();
     }
   };
@@ -2006,7 +2009,7 @@ bool AgentState::rag_index(const std::string &path, const NitroConfig &cfg, TuiS
   }
 
   std::string save_path = join_path(cfg.sandbox, "rag-index.bin");
-  tui.append_line("[sys] saving index: " + save_path);
+  tui.append_line(ICON_SYS + "saving index: " + save_path);
   tui.redraw_all();
   rag_db->save(save_path);
 
@@ -2051,7 +2054,7 @@ std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &
   };
 
   auto show_tool = [&](const std::string &tool) -> void {
-    tui.append_line("[🔧] → " + tool);
+    tui.append_line(ICON_TOOL + "→ " + tool);
     tui.redraw_all();
   };
 
@@ -2160,7 +2163,7 @@ std::string AgentState::process_tool(const std::string &cmd, const NitroConfig &
 //
 bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cfg, TuiState &tui) const {
   if (!model_loaded) {
-    tui.append_line("[⚠] No model loaded. Use /model <path>");
+    tui.append_line(ICON_ERR + "No model loaded. Use /model <path>");
     tui.redraw_all();
     return false;
   }
@@ -2175,12 +2178,12 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
     }
   }
   if (!iter) {
-    tui.append_line("[⚠] Conversation not initialised (call /clear to reset)");
+    tui.append_line(ICON_ERR + "Conversation not initialised (call /clear to reset)");
     tui.redraw_all();
     return false;
   }
   if (!llama->add_message(*iter, "user", effective_message)) {
-    tui.append_line(std::string("[⚠] add_message: ") + llama->last_error());
+    tui.append_line(ICON_ERR + "add_message: " + llama->last_error());
     tui.redraw_all();
     return false;
   }
@@ -2215,15 +2218,15 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
     log_write("tool: [%s] result: [%s]", tool.c_str(), result.c_str());
 
     if (!llama->add_message(*iter, "tool_result", content)) {
-      tui.append_line(std::string("[⚠] tool result inject: ") + llama->last_error());
+      tui.append_line(ICON_ERR + "tool result inject: " + llama->last_error());
     }
     if (!iter->_has_next) {
-      tui.append_line(std::string("[⚠] failed to evoke tool response: ") + llama->last_error());
+      tui.append_line(ICON_ERR + "failed to evoke tool response: " + llama->last_error());
     }
     tui.redraw_all();
   };
 
-  auto start_think = [&](const std::string &tag) {
+  auto start_think = [&](const std::string &tag) -> void {
     if (think_mode == t_init) {
       auto pos = buffer.find(tag);
       if (pos != std::string::npos) {
@@ -2234,7 +2237,7 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
     }
   };
 
-  auto end_think = [&](const std::string &tag) {
+  auto end_think = [&](const std::string &tag) -> void {
     if (think_mode == t_think) {
       auto pos = buffer.find(tag);
       if (pos != std::string::npos) {
@@ -2245,15 +2248,26 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
     }
   };
 
-  while (iter->_has_next) {
+  auto is_escape = [&]() -> bool {
     ncinput ni{};
     notcurses_get_nblock(tui.nc, &ni);
     if (ni.id == NCKEY_ESC) {
       tui.set_thinking(false);
-      tui.append_line("[⚠] Generation cancelled by user (Escape)");
+      tui.append_line(ICON_ERR + "Generation cancelled by user (Escape)");
       tui.redraw_all();
-      break;
     }
+    return ni.id == NCKEY_ESC;
+  };
+  
+  auto fetch_all = [&]() -> void {
+    while (iter->_has_next && !is_escape()) {
+      std::string tok = llama->next(*iter);
+      buffer += tok;
+      tui.tick_spinner();
+    }
+  };
+
+  while (iter->_has_next && !is_escape()) {
     std::string tok = llama->next(*iter);
     if (tok == "<") {
       // fetch the complete tag
@@ -2270,7 +2284,6 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
     } else {
       buffer += tok;
     }
-
     if (think_mode == t_init) {
       start_think("<think>");
       start_think("<|think|>");
@@ -2288,16 +2301,16 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
     if (think_mode == t_thunk) {
       auto tool_start = buffer.find("TOOL:");
       if (tool_start == 0) {
-        // fetch all remaining tokens
-        invoke_tool(trim(buffer + llama->all(*iter)), "TOOL_RESULT: {}");
+        fetch_all();
+        invoke_tool(trim(buffer), "TOOL_RESULT: {}");
         buffer.clear();
         think_mode = t_init;
         continue;
       }
-      // see https://ai.google.dev/gemma/docs/core/prompt-formatting-gemma4
       tool_start = buffer.find("<|tool_call>call:");
       if (tool_start != std::string::npos) {
-        buffer += llama->all(*iter);
+        // see https://ai.google.dev/gemma/docs/core/prompt-formatting-gemma4
+        fetch_all();
         auto pos = buffer.find_last_not_of("}<tool_call|>");
         if (pos != std::string::npos) {
           buffer = buffer.substr(0, pos);
@@ -2319,7 +2332,10 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
     } else {
       auto pos = buffer.find('\n');
       if (pos != std::string::npos) {
-        tui.append_token("[🤔] " + buffer.substr(0, pos + 1));
+        auto thought = buffer.substr(0, pos + 1);
+        if (thought.length() > 1) {
+          tui.append_token(ICON_THINK + thought);
+        }
         buffer = buffer.substr(pos + 1);
       }
     }
@@ -2338,7 +2354,8 @@ bool AgentState::run_turn(const std::string &user_message, const NitroConfig &cf
   tui.vram_used  = mem.vram_used;
   tui.vram_total = mem.vram_total;
   char stat[128];
-  std::snprintf(stat, sizeof(stat), "[sys] %.1f tok/s  (%d tokens)  KV %.1f%%",
+  auto patterm = ICON_SYS + "%.1f tok/s  (%d tokens)  KV %.1f%%";
+  std::snprintf(stat, sizeof(stat), patterm.c_str(),
                 (double)tui.tokens_per_sec,
                 iter->_tokens_generated,
                 (double)mem.kv_percent);
@@ -2371,11 +2388,11 @@ static void handle_slash(const std::string &input,
   // GGUF.  The picker starts in the current sandbox directory.
   if (verb == "/model") {
     if (rest.empty()) {
-      tui.append_line("[sys] Opening model picker…");
+      tui.append_line(ICON_SYS + "Opening model picker…");
       tui.redraw_all();
       rest = tui.file_picker(cfg.sandbox, "Model File");
       if (rest.empty()) {
-        tui.append_line("[sys] /model cancelled.");
+        tui.append_line(ICON_SYS + "/model cancelled.");
         tui.redraw_all();
         return;
       }
@@ -2395,11 +2412,11 @@ static void handle_slash(const std::string &input,
   // embedding GGUF.
   if (verb == "/embed") {
     if (rest.empty()) {
-      tui.append_line("[sys] Opening embedding model picker…");
+      tui.append_line(ICON_SYS + "Opening embedding model picker…");
       tui.redraw_all();
       rest = tui.file_picker(cfg.sandbox, "Embedding Model");
       if (rest.empty()) {
-        tui.append_line("[sys] /embed cancelled.");
+        tui.append_line(ICON_SYS + "/embed cancelled.");
         tui.redraw_all();
         return;
       }
@@ -2418,21 +2435,21 @@ static void handle_slash(const std::string &input,
       // Launch the interactive folder picker starting from the sandbox.
       path = tui.rag_folder_picker(cfg.sandbox);
       if (path.empty()) {
-        tui.append_line("[sys] RAG indexing cancelled.");
+        tui.append_line(ICON_SYS + "RAG indexing cancelled.");
         tui.redraw_all();
         return;
       }
     }
     if (path.find_last_not_of(".bin") != std::string::npos) {
-      tui.append_line("[sys] Loading index: " + path);
+      tui.append_line(ICON_SYS + "Loading index: " + path);
       tui.redraw_all();
       agent.rag_load_index(path, tui);
     } else {
-      tui.append_line("[sys] Indexing: " + path);
+      tui.append_line(ICON_SYS + "Indexing: " + path);
       tui.redraw_all();
       agent.rag_index(path, cfg, tui);
     }
-    tui.append_line("[sys] done");
+    tui.append_line(ICON_SYS + "done");
     tui.redraw_all();
     return;
   }
@@ -2440,7 +2457,7 @@ static void handle_slash(const std::string &input,
   if (verb == "/memory") {
     std::istringstream iss(agent.memory_info_text());
     std::string line;
-    while (std::getline(iss, line)) tui.append_line("[sys] " + line);
+    while (std::getline(iss, line)) tui.append_line(ICON_SYS + "" + line);
     tui.redraw_all();
     return;
   }
@@ -2450,25 +2467,24 @@ static void handle_slash(const std::string &input,
       tui.chat_lines.clear(); }
     std::string sysp = build_system_prompt(cfg);
     agent.reset_conversation(sysp, tui);
-    tui.append_line("[sys] Conversation cleared.");
+    tui.append_line(ICON_SYS + "Conversation cleared.");
     tui.redraw_all();
     return;
   }
 
   if (verb == "/settings") {
-    tui.append_line("[sys] Current settings:");
-    tui.append_line("[sys]   model_path    : " + cfg.model_path);
-    tui.append_line("[sys]   embed_path    : " + cfg.embed_path);
-    tui.append_line("[sys]   sandbox       : " + cfg.sandbox);
-    tui.append_line("[sys]   n_ctx         : " + std::to_string(cfg.n_ctx));
-    tui.append_line("[sys]   n_gpu_layers  : " + std::to_string(cfg.n_gpu_layers));
-    tui.append_line("[sys]   n_max_tokens  : " + std::to_string(cfg.n_max_tokens));
-    tui.append_line("[sys]   temperature   : " + std::to_string(cfg.temperature));
-    tui.append_line("[sys]   top_p         : " + std::to_string(cfg.top_p));
-    tui.append_line("[sys]   top_k         : " + std::to_string(cfg.top_k));
-    tui.append_line("[sys]   penalty_repeat: " + std::to_string(cfg.penalty_repeat));
-    tui.append_line("[sys]   rag_top_k     : " + std::to_string(cfg.rag_top_k));
-    tui.append_line("[sys]   saved to      : " + settings_path());
+    tui.append_line(ICON_SYS + "Current settings:");
+    tui.append_line(ICON_SYS + "  model_path    : " + cfg.model_path);
+    tui.append_line(ICON_SYS + "  embed_path    : " + cfg.embed_path);
+    tui.append_line(ICON_SYS + "  sandbox       : " + cfg.sandbox);
+    tui.append_line(ICON_SYS + "  n_ctx         : " + std::to_string(cfg.n_ctx));
+    tui.append_line(ICON_SYS + "  n_gpu_layers  : " + std::to_string(cfg.n_gpu_layers));
+    tui.append_line(ICON_SYS + "  temperature   : " + std::to_string(cfg.temperature));
+    tui.append_line(ICON_SYS + "  top_p         : " + std::to_string(cfg.top_p));
+    tui.append_line(ICON_SYS + "  top_k         : " + std::to_string(cfg.top_k));
+    tui.append_line(ICON_SYS + "  penalty_repeat: " + std::to_string(cfg.penalty_repeat));
+    tui.append_line(ICON_SYS + "  rag_top_k     : " + std::to_string(cfg.rag_top_k));
+    tui.append_line(ICON_SYS + "  saved to      : " + settings_path());
     tui.redraw_all();
     return;
   }
@@ -2483,7 +2499,7 @@ static void handle_slash(const std::string &input,
     val.erase(0, val.find_first_not_of(" \t"));
 
     if (key.empty() || val.empty()) {
-      tui.append_line("[⚠] Usage: /set <key> <value>");
+      tui.append_line(ICON_ERR + "Usage: /set <key> <value>");
       tui.redraw_all(); return;
     }
 
@@ -2496,11 +2512,10 @@ static void handle_slash(const std::string &input,
       else if (key == "top_k")     { cfg.top_k          = std::stoi(val); needs_reparam = true; }
       else if (key == "penalty_repeat") { cfg.penalty_repeat = std::stof(val); needs_reparam = true; }
       else if (key == "penalty_last_n") { cfg.penalty_last_n = std::stoi(val); needs_reparam = true; }
-      else if (key == "n_max_tokens")   { cfg.n_max_tokens   = std::stoi(val); needs_reparam = true; }
       else if (key == "rag_top_k")      { cfg.rag_top_k      = std::stoi(val); }
       else if (key == "n_gpu_layers")   {
         cfg.n_gpu_layers = std::stoi(val);
-        tui.append_line("[sys] n_gpu_layers will take effect on next /model load.");
+        tui.append_line(ICON_SYS + "n_gpu_layers will take effect on next /model load.");
       } else if (key == "run_allowed") {
         // Accept a comma-separated list of basenames, or "none" to clear.
         cfg.run_allowed.clear();
@@ -2514,18 +2529,18 @@ static void handle_slash(const std::string &input,
           }
         }
         if (cfg.run_allowed.empty()) {
-          tui.append_line("[sys] run_allowed cleared — all sandbox programs permitted.");
+          tui.append_line(ICON_SYS + "run_allowed cleared — all sandbox programs permitted.");
         } else {
           std::string list;
           for (const auto &e : cfg.run_allowed) list += e + " ";
-          tui.append_line("[sys] run_allowed: " + list);
+          tui.append_line(ICON_SYS + "run_allowed: " + list);
         }
       } else {
-        tui.append_line("[⚠] Unknown key '" + key + "'.  Try /help for list.");
+        tui.append_line(ICON_ERR + "Unknown key '" + key + "'.  Try /help for list.");
         ok = false;
       }
     } catch (const std::exception &ex) {
-      tui.append_line(std::string("[⚠] /set: ") + ex.what());
+      tui.append_line(ICON_ERR + "/set: " + ex.what());
       ok = false;
     }
 
@@ -2534,13 +2549,13 @@ static void handle_slash(const std::string &input,
         agent.apply_generation_params(cfg);
       }
       save_settings(cfg);
-      tui.append_line("[sys] " + key + " = " + val);
+      tui.append_line(ICON_SYS + "" + key + " = " + val);
     }
     tui.redraw_all();
     return;
   }
 
-  tui.append_line("[⚠] Unknown command: " + verb + "  (try /help)");
+  tui.append_line(ICON_ERR + "Unknown command: " + verb + "  (try /help)");
   tui.redraw_all();
 }
 
@@ -2557,8 +2572,8 @@ static void welcome(TuiState &tui, const std::string &sandbox) {
   tui.append_line("[logo_5]  ╚═╝  ╚═══╝╚═╝   ╚═╝   ╚═╝  ╚═╝ ╚═════╝ ");
   tui.append_line("[logo_6]  ─────────── agentic LLM shell v1.0 ──────────────");
   tui.append_line("");
-  tui.append_line("[sys]  Sandbox : " + sandbox);
-  tui.append_line("[sys]  /help for commands  ·  exit to quit");
+  tui.append_line(ICON_SYS + " Sandbox : " + sandbox);
+  tui.append_line(ICON_SYS + " /help for commands  ·  exit to quit");
   tui.append_line("");
   tui.redraw_all();
 }
@@ -2674,9 +2689,9 @@ int main(int argc, char **argv) {
       agent.setup_embed(cfg.embed_path, tui);
     }
   } else {
-    tui.append_line("[sys] No model specified.  Use /model to open the file picker,");
-    tui.append_line("[sys] or /model <path> to load directly.");
-    tui.append_line("[sys] Example: /model ~/models/qwen2.5-7b-q4_k_m.gguf");
+    tui.append_line(ICON_SYS + "No model specified.  Use /model to open the file picker,");
+    tui.append_line(ICON_SYS + "or /model <path> to load directly.");
+    tui.append_line(ICON_SYS + "Example: /model ~/models/qwen2.5-7b-q4_k_m.gguf");
     tui.redraw_all();
   }