From cbfc0bcd5bdefe6a4f061bf857709bdb1d6a8735 Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Fri, 15 May 2026 06:59:11 +0000
Subject: [PATCH 01/12] feat(cache): add GetFileByPath lookup

---
 internal/cache/lookup.go      | 11 ++++++++
 internal/cache/lookup_test.go | 49 +++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+)
 create mode 100644 internal/cache/lookup.go
 create mode 100644 internal/cache/lookup_test.go

diff --git a/internal/cache/lookup.go b/internal/cache/lookup.go
new file mode 100644
index 00000000..3466fd10
--- /dev/null
+++ b/internal/cache/lookup.go
@@ -0,0 +1,11 @@
+package cache
+
+// GetFileByPath returns the cached (content_hash, parsed_at) for the given
+// file path. Returns ok=false when no row exists for that path.
+func (c *Cache) GetFileByPath(path string) (hash, parsedAt string, ok bool) {
+	row := c.db.QueryRow(`SELECT content_hash, parsed_at FROM files WHERE path = ? LIMIT 1`, path)
+	if err := row.Scan(&hash, &parsedAt); err != nil {
+		return "", "", false
+	}
+	return hash, parsedAt, true
+}
diff --git a/internal/cache/lookup_test.go b/internal/cache/lookup_test.go
new file mode 100644
index 00000000..64e7967c
--- /dev/null
+++ b/internal/cache/lookup_test.go
@@ -0,0 +1,49 @@
+package cache
+
+import (
+	"path/filepath"
+	"testing"
+
+	"github.com/randomcodespace/codeiq/internal/model"
+)
+
+func TestGetFileByPathReturnsHashWhenPresent(t *testing.T) {
+	c, err := Open(filepath.Join(t.TempDir(), "c.sqlite"))
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	defer c.Close()
+
+	e := &Entry{
+		ContentHash: "abc123",
+		Path:        "foo/bar.go",
+		Language:    "go",
+		ParsedAt:    "2026-05-15T00:00:00Z",
+		Nodes:       []*model.CodeNode{model.NewCodeNode("n1", model.NodeClass, "Bar")},
+	}
+	if err := c.Put(e); err != nil {
+		t.Fatalf("put: %v", err)
+	}
+
+	hash, parsedAt, ok := c.GetFileByPath("foo/bar.go")
+	if !ok {
+		t.Fatal("ok = false, want true")
+	}
+	if hash != "abc123" {
+		t.Fatalf("hash = %q, want %q", hash, "abc123")
+	}
+	if parsedAt != "2026-05-15T00:00:00Z" {
+		t.Fatalf("parsedAt = %q, want %q", parsedAt, "2026-05-15T00:00:00Z")
+	}
+}
+
+func TestGetFileByPathReturnsFalseWhenAbsent(t *testing.T) {
+	c, err := Open(filepath.Join(t.TempDir(), "c.sqlite"))
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	defer c.Close()
+	if _, _, ok := c.GetFileByPath("does/not/exist"); ok {
+		t.Fatal("ok = true, want false for missing path")
+	}
+}

From e380162fa0a6020c1934a85bde3e1bc81f464b6c Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Fri, 15 May 2026 06:59:53 +0000
Subject: [PATCH 02/12] feat(cache): add AllFiles streaming iterator

---
 internal/cache/lookup.go      | 21 +++++++++++
 internal/cache/lookup_test.go | 65 +++++++++++++++++++++++++++++++++++
 2 files changed, 86 insertions(+)

diff --git a/internal/cache/lookup.go b/internal/cache/lookup.go
index 3466fd10..bcf81ccb 100644
--- a/internal/cache/lookup.go
+++ b/internal/cache/lookup.go
@@ -9,3 +9,24 @@ func (c *Cache) GetFileByPath(path string) (hash, parsedAt string, ok bool) {
 	}
 	return hash, parsedAt, true
 }
+
+// AllFiles invokes fn once per cached file in path order. fn returning a
+// non-nil error stops iteration and propagates the error. Stream-iterated
+// via rows.Next(); the whole cache never lives in memory at once.
+func (c *Cache) AllFiles(fn func(path, hash string) error) error {
+	rows, err := c.db.Query(`SELECT path, content_hash FROM files ORDER BY path`)
+	if err != nil {
+		return err
+	}
+	defer rows.Close()
+	for rows.Next() {
+		var path, hash string
+		if err := rows.Scan(&path, &hash); err != nil {
+			return err
+		}
+		if err := fn(path, hash); err != nil {
+			return err
+		}
+	}
+	return rows.Err()
+}
diff --git a/internal/cache/lookup_test.go b/internal/cache/lookup_test.go
index 64e7967c..dec36408 100644
--- a/internal/cache/lookup_test.go
+++ b/internal/cache/lookup_test.go
@@ -47,3 +47,68 @@ func TestGetFileByPathReturnsFalseWhenAbsent(t *testing.T) {
 		t.Fatal("ok = true, want false for missing path")
 	}
 }
+
+func TestAllFilesYieldsEveryRow(t *testing.T) {
+	c, err := Open(filepath.Join(t.TempDir(), "c.sqlite"))
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	defer c.Close()
+	for _, e := range []*Entry{
+		{ContentHash: "h1", Path: "a.go", Language: "go", ParsedAt: "t"},
+		{ContentHash: "h2", Path: "b.go", Language: "go", ParsedAt: "t"},
+		{ContentHash: "h3", Path: "c.go", Language: "go", ParsedAt: "t"},
+	} {
+		if err := c.Put(e); err != nil {
+			t.Fatalf("put: %v", err)
+		}
+	}
+	got := map[string]string{}
+	if err := c.AllFiles(func(path, hash string) error {
+		got[path] = hash
+		return nil
+	}); err != nil {
+		t.Fatalf("AllFiles: %v", err)
+	}
+	want := map[string]string{"a.go": "h1", "b.go": "h2", "c.go": "h3"}
+	if len(got) != len(want) {
+		t.Fatalf("got %d entries, want %d: %v", len(got), len(want), got)
+	}
+	for k, v := range want {
+		if got[k] != v {
+			t.Errorf("got[%q]=%q, want %q", k, got[k], v)
+		}
+	}
+}
+
+func TestAllFilesIteratesInPathOrder(t *testing.T) {
+	c, err := Open(filepath.Join(t.TempDir(), "c.sqlite"))
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	defer c.Close()
+	// Insert in non-alphabetical order on purpose.
+	for _, e := range []*Entry{
+		{ContentHash: "h-c", Path: "c.go", Language: "go", ParsedAt: "t"},
+		{ContentHash: "h-a", Path: "a.go", Language: "go", ParsedAt: "t"},
+		{ContentHash: "h-b", Path: "b.go", Language: "go", ParsedAt: "t"},
+	} {
+		if err := c.Put(e); err != nil {
+			t.Fatalf("put: %v", err)
+		}
+	}
+	var paths []string
+	_ = c.AllFiles(func(path, _ string) error {
+		paths = append(paths, path)
+		return nil
+	})
+	want := []string{"a.go", "b.go", "c.go"}
+	if len(paths) != len(want) {
+		t.Fatalf("len(paths)=%d, want %d", len(paths), len(want))
+	}
+	for i := range paths {
+		if paths[i] != want[i] {
+			t.Errorf("paths[%d]=%q, want %q", i, paths[i], want[i])
+		}
+	}
+}

From e9cc8a52c02ba69aaf08f96aea2ca2a2245653d4 Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Fri, 15 May 2026 07:00:33 +0000
Subject: [PATCH 03/12] feat(cache): add PurgeByPath for incremental cleanup

---
 internal/cache/lookup.go      | 25 +++++++++++++
 internal/cache/lookup_test.go | 70 +++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+)

diff --git a/internal/cache/lookup.go b/internal/cache/lookup.go
index bcf81ccb..49edc4f1 100644
--- a/internal/cache/lookup.go
+++ b/internal/cache/lookup.go
@@ -10,6 +10,31 @@ func (c *Cache) GetFileByPath(path string) (hash, parsedAt string, ok bool) {
 	return hash, parsedAt, true
 }
 
+// PurgeByPath deletes every row associated with the file at path: the
+// files row, all nodes joined by its content_hash, and all edges joined
+// by its content_hash. Idempotent — a missing path returns nil.
+func (c *Cache) PurgeByPath(path string) error {
+	tx, err := c.db.Begin()
+	if err != nil {
+		return err
+	}
+	defer tx.Rollback()
+	var hash string
+	if err := tx.QueryRow(`SELECT content_hash FROM files WHERE path = ?`, path).Scan(&hash); err != nil {
+		return nil
+	}
+	if _, err := tx.Exec(`DELETE FROM nodes WHERE content_hash = ?`, hash); err != nil {
+		return err
+	}
+	if _, err := tx.Exec(`DELETE FROM edges WHERE content_hash = ?`, hash); err != nil {
+		return err
+	}
+	if _, err := tx.Exec(`DELETE FROM files WHERE path = ?`, path); err != nil {
+		return err
+	}
+	return tx.Commit()
+}
+
 // AllFiles invokes fn once per cached file in path order. fn returning a
 // non-nil error stops iteration and propagates the error. Stream-iterated
 // via rows.Next(); the whole cache never lives in memory at once.
diff --git a/internal/cache/lookup_test.go b/internal/cache/lookup_test.go
index dec36408..bdc5ea83 100644
--- a/internal/cache/lookup_test.go
+++ b/internal/cache/lookup_test.go
@@ -81,6 +81,76 @@ func TestAllFilesYieldsEveryRow(t *testing.T) {
 	}
 }
 
+func TestPurgeByPathRemovesAllRows(t *testing.T) {
+	c, err := Open(filepath.Join(t.TempDir(), "c.sqlite"))
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	defer c.Close()
+	e := &Entry{
+		ContentHash: "abc",
+		Path:        "foo.go",
+		Language:    "go",
+		ParsedAt:    "t",
+		Nodes:       []*model.CodeNode{model.NewCodeNode("n1", model.NodeClass, "Foo")},
+		Edges:       []*model.CodeEdge{model.NewCodeEdge("e1", model.EdgeContains, "n1", "n2")},
+	}
+	if err := c.Put(e); err != nil {
+		t.Fatalf("put: %v", err)
+	}
+	if err := c.PurgeByPath("foo.go"); err != nil {
+		t.Fatalf("purge: %v", err)
+	}
+	if _, _, ok := c.GetFileByPath("foo.go"); ok {
+		t.Fatal("file row still present after purge")
+	}
+	if c.Has("abc") {
+		t.Fatal("cache.Has returns true after purge")
+	}
+}
+
+func TestPurgeByPathIsNoOpForMissingPath(t *testing.T) {
+	c, err := Open(filepath.Join(t.TempDir(), "c.sqlite"))
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	defer c.Close()
+	if err := c.PurgeByPath("nope.go"); err != nil {
+		t.Fatalf("purge of missing path should be no-op, got: %v", err)
+	}
+}
+
+func TestPurgeByPathLeavesUnrelatedRows(t *testing.T) {
+	c, err := Open(filepath.Join(t.TempDir(), "c.sqlite"))
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	defer c.Close()
+	_ = c.Put(&Entry{
+		ContentHash: "h-keep",
+		Path:        "keep.go",
+		Language:    "go",
+		ParsedAt:    "t",
+		Nodes:       []*model.CodeNode{model.NewCodeNode("k", model.NodeClass, "K")},
+	})
+	_ = c.Put(&Entry{
+		ContentHash: "h-drop",
+		Path:        "drop.go",
+		Language:    "go",
+		ParsedAt:    "t",
+		Nodes:       []*model.CodeNode{model.NewCodeNode("d", model.NodeClass, "D")},
+	})
+	if err := c.PurgeByPath("drop.go"); err != nil {
+		t.Fatal(err)
+	}
+	if !c.Has("h-keep") {
+		t.Fatal("unrelated file purged")
+	}
+	if c.Has("h-drop") {
+		t.Fatal("target hash still present")
+	}
+}
+
 func TestAllFilesIteratesInPathOrder(t *testing.T) {
 	c, err := Open(filepath.Join(t.TempDir(), "c.sqlite"))
 	if err != nil {

From cbdffd9030b05e3a536ac8ca65a82fc50d8691d3 Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Fri, 15 May 2026 07:01:07 +0000
Subject: [PATCH 04/12] feat(cache): add ManifestHash for graph-freshness
 short-circuit

---
 internal/cache/manifest.go      | 31 +++++++++++
 internal/cache/manifest_test.go | 93 +++++++++++++++++++++++++++++++++
 2 files changed, 124 insertions(+)
 create mode 100644 internal/cache/manifest.go
 create mode 100644 internal/cache/manifest_test.go

diff --git a/internal/cache/manifest.go b/internal/cache/manifest.go
new file mode 100644
index 00000000..47dbe8cc
--- /dev/null
+++ b/internal/cache/manifest.go
@@ -0,0 +1,31 @@
+package cache
+
+import (
+	"crypto/sha256"
+	"encoding/hex"
+)
+
+// ManifestHash returns a deterministic SHA-256 hex digest over the sorted
+// (path, content_hash) tuples in the cache. Two caches produce the same
+// manifest iff they hold the same set of file→hash bindings.
+//
+// Used by the enrich pipeline to short-circuit: if the graph's stored
+// manifest matches the cache's current manifest, the graph is fresh and
+// enrich can exit immediately.
+//
+// Format: "<path>\x00<hash>\x00" per row, concatenated in path order.
+// The NUL separator makes collisions impossible regardless of path chars.
+func (c *Cache) ManifestHash() (string, error) {
+	h := sha256.New()
+	err := c.AllFiles(func(path, hash string) error {
+		h.Write([]byte(path))
+		h.Write([]byte{0})
+		h.Write([]byte(hash))
+		h.Write([]byte{0})
+		return nil
+	})
+	if err != nil {
+		return "", err
+	}
+	return hex.EncodeToString(h.Sum(nil)), nil
+}
diff --git a/internal/cache/manifest_test.go b/internal/cache/manifest_test.go
new file mode 100644
index 00000000..ec3fac1d
--- /dev/null
+++ b/internal/cache/manifest_test.go
@@ -0,0 +1,93 @@
+package cache
+
+import (
+	"path/filepath"
+	"testing"
+)
+
+func TestManifestHashIsDeterministic(t *testing.T) {
+	c, err := Open(filepath.Join(t.TempDir(), "c.sqlite"))
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	defer c.Close()
+	for _, e := range []*Entry{
+		{ContentHash: "h1", Path: "a.go", Language: "go", ParsedAt: "t"},
+		{ContentHash: "h2", Path: "b.go", Language: "go", ParsedAt: "t"},
+	} {
+		if err := c.Put(e); err != nil {
+			t.Fatalf("put: %v", err)
+		}
+	}
+	a, err := c.ManifestHash()
+	if err != nil {
+		t.Fatalf("manifest: %v", err)
+	}
+	b, _ := c.ManifestHash()
+	if a != b {
+		t.Fatalf("ManifestHash not deterministic: %s != %s", a, b)
+	}
+	if len(a) != 64 {
+		t.Fatalf("manifest hash len = %d, want 64 (sha256 hex)", len(a))
+	}
+}
+
+func TestManifestHashChangesWhenFileChanges(t *testing.T) {
+	c, err := Open(filepath.Join(t.TempDir(), "c.sqlite"))
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	defer c.Close()
+	_ = c.Put(&Entry{ContentHash: "h1", Path: "a.go", Language: "go", ParsedAt: "t"})
+	before, _ := c.ManifestHash()
+	if err := c.PurgeByPath("a.go"); err != nil {
+		t.Fatal(err)
+	}
+	_ = c.Put(&Entry{ContentHash: "h2", Path: "a.go", Language: "go", ParsedAt: "t"})
+	after, _ := c.ManifestHash()
+	if before == after {
+		t.Fatal("ManifestHash unchanged after file mutation")
+	}
+}
+
+func TestManifestHashEmptyCache(t *testing.T) {
+	c, err := Open(filepath.Join(t.TempDir(), "c.sqlite"))
+	if err != nil {
+		t.Fatalf("open: %v", err)
+	}
+	defer c.Close()
+	h, _ := c.ManifestHash()
+	want := "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
+	if h != want {
+		t.Fatalf("empty manifest = %q, want %q", h, want)
+	}
+}
+
+func TestManifestHashIndependentOfInsertOrder(t *testing.T) {
+	a, err := Open(filepath.Join(t.TempDir(), "a.sqlite"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer a.Close()
+	b, err := Open(filepath.Join(t.TempDir(), "b.sqlite"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer b.Close()
+	files := []*Entry{
+		{ContentHash: "h1", Path: "a.go", Language: "go", ParsedAt: "t"},
+		{ContentHash: "h2", Path: "b.go", Language: "go", ParsedAt: "t"},
+		{ContentHash: "h3", Path: "c.go", Language: "go", ParsedAt: "t"},
+	}
+	for _, e := range files {
+		_ = a.Put(e)
+	}
+	for i := len(files) - 1; i >= 0; i-- {
+		_ = b.Put(files[i])
+	}
+	ha, _ := a.ManifestHash()
+	hb, _ := b.ManifestHash()
+	if ha != hb {
+		t.Fatalf("manifest depends on insert order: %s != %s", ha, hb)
+	}
+}

From 44f9936bb1ab8294effbcba4887deb9d4fec20fd Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Fri, 15 May 2026 07:02:52 +0000
Subject: [PATCH 05/12] feat(analyzer): add Diff for incremental file
 classification

---
 internal/analyzer/diff.go      |  66 ++++++++++++++
 internal/analyzer/diff_test.go | 155 +++++++++++++++++++++++++++++++++
 2 files changed, 221 insertions(+)
 create mode 100644 internal/analyzer/diff.go
 create mode 100644 internal/analyzer/diff_test.go

diff --git a/internal/analyzer/diff.go b/internal/analyzer/diff.go
new file mode 100644
index 00000000..840516e4
--- /dev/null
+++ b/internal/analyzer/diff.go
@@ -0,0 +1,66 @@
+package analyzer
+
+import (
+	"fmt"
+	"os"
+
+	"github.com/randomcodespace/codeiq/internal/cache"
+)
+
+// Delta is the result of comparing the on-disk file set to the cache state.
+// All slices are sorted by path (FileDiscovery sorts; AllFiles iterates in
+// path order) so callers can rely on stable order.
+type Delta struct {
+	Added     []string // on disk, not in cache
+	Modified  []string // path in cache but content_hash differs from disk
+	Deleted   []string // in cache, missing from disk
+	Unchanged []string // path + content_hash match cache exactly
+}
+
+// Diff walks the project root via FileDiscovery and classifies each file
+// against the cache. UNCHANGED files cost one hash per file; nothing else
+// is parsed or detected.
+//
+// Returns Delta with empty slices (not nil) when there is no work in a
+// bucket.
+func (a *Analyzer) Diff(root string) (Delta, error) {
+	d := Delta{}
+	if a.opts.Cache == nil {
+		return d, fmt.Errorf("diff: cache is required")
+	}
+	disc := NewFileDiscovery()
+	files, err := disc.Discover(root)
+	if err != nil {
+		return d, fmt.Errorf("file discovery: %w", err)
+	}
+
+	seen := make(map[string]bool, len(files))
+	for _, f := range files {
+		seen[f.RelPath] = true
+		content, err := os.ReadFile(f.AbsPath)
+		if err != nil {
+			fmt.Fprintf(os.Stderr, "codeiq: diff: %s: %v\n", f.RelPath, err)
+			continue
+		}
+		curHash := cache.HashString(string(content))
+		cachedHash, _, ok := a.opts.Cache.GetFileByPath(f.RelPath)
+		switch {
+		case !ok:
+			d.Added = append(d.Added, f.RelPath)
+		case cachedHash == curHash:
+			d.Unchanged = append(d.Unchanged, f.RelPath)
+		default:
+			d.Modified = append(d.Modified, f.RelPath)
+		}
+	}
+
+	if err := a.opts.Cache.AllFiles(func(path, _ string) error {
+		if !seen[path] {
+			d.Deleted = append(d.Deleted, path)
+		}
+		return nil
+	}); err != nil {
+		return d, err
+	}
+	return d, nil
+}
diff --git a/internal/analyzer/diff_test.go b/internal/analyzer/diff_test.go
new file mode 100644
index 00000000..14b897d8
--- /dev/null
+++ b/internal/analyzer/diff_test.go
@@ -0,0 +1,155 @@
+package analyzer
+
+import (
+	"os"
+	"path/filepath"
+	"sort"
+	"testing"
+
+	"github.com/randomcodespace/codeiq/internal/cache"
+	"github.com/randomcodespace/codeiq/internal/detector"
+
+	// Register a couple of phase-1 detectors so the Analyzer Registry
+	// doesn't sit empty (Diff doesn't actually run detectors but the
+	// Analyzer requires a non-nil Registry, satisfied here via blank imports).
+	_ "github.com/randomcodespace/codeiq/internal/detector/jvm/java"
+	_ "github.com/randomcodespace/codeiq/internal/detector/python"
+)
+
+func mustOpenCache(t *testing.T) (*cache.Cache, string) {
+	t.Helper()
+	dir := t.TempDir()
+	c, err := cache.Open(filepath.Join(dir, "c.sqlite"))
+	if err != nil {
+		t.Fatalf("cache: %v", err)
+	}
+	return c, dir
+}
+
+func writeDiffFile(t *testing.T, root, rel, content string) {
+	t.Helper()
+	full := filepath.Join(root, rel)
+	if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.WriteFile(full, []byte(content), 0o644); err != nil {
+		t.Fatal(err)
+	}
+}
+
+func TestDiffEmptyCacheAndEmptyRoot(t *testing.T) {
+	c, root := mustOpenCache(t)
+	defer c.Close()
+	a := NewAnalyzer(Options{Cache: c, Registry: detector.Default})
+	d, err := a.Diff(root)
+	if err != nil {
+		t.Fatalf("diff: %v", err)
+	}
+	if len(d.Added)+len(d.Modified)+len(d.Deleted)+len(d.Unchanged) != 0 {
+		t.Fatalf("empty root + empty cache: got %+v", d)
+	}
+}
+
+func TestDiffDetectsAddedFile(t *testing.T) {
+	c, root := mustOpenCache(t)
+	defer c.Close()
+	writeDiffFile(t, root, "X.java", "public class X {}")
+	a := NewAnalyzer(Options{Cache: c, Registry: detector.Default})
+	d, err := a.Diff(root)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(d.Added) != 1 || d.Added[0] != "X.java" {
+		t.Fatalf("Added = %v, want [X.java]", d.Added)
+	}
+	if len(d.Modified)+len(d.Deleted)+len(d.Unchanged) != 0 {
+		t.Fatalf("other buckets non-empty: %+v", d)
+	}
+}
+
+func TestDiffDetectsUnchangedFile(t *testing.T) {
+	c, root := mustOpenCache(t)
+	defer c.Close()
+	src := "public class X {}"
+	writeDiffFile(t, root, "X.java", src)
+	_ = c.Put(&cache.Entry{ContentHash: cache.HashString(src), Path: "X.java", Language: "java", ParsedAt: "t"})
+	a := NewAnalyzer(Options{Cache: c, Registry: detector.Default})
+	d, err := a.Diff(root)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(d.Unchanged) != 1 || d.Unchanged[0] != "X.java" {
+		t.Fatalf("Unchanged = %v, want [X.java]", d.Unchanged)
+	}
+	if len(d.Added)+len(d.Modified)+len(d.Deleted) != 0 {
+		t.Fatalf("other buckets non-empty: %+v", d)
+	}
+}
+
+func TestDiffDetectsModifiedFile(t *testing.T) {
+	c, root := mustOpenCache(t)
+	defer c.Close()
+	writeDiffFile(t, root, "X.java", "public class X { int v = 2; }")
+	_ = c.Put(&cache.Entry{
+		ContentHash: cache.HashString("public class X {}"),
+		Path:        "X.java",
+		Language:    "java",
+		ParsedAt:    "t",
+	})
+	a := NewAnalyzer(Options{Cache: c, Registry: detector.Default})
+	d, err := a.Diff(root)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(d.Modified) != 1 || d.Modified[0] != "X.java" {
+		t.Fatalf("Modified = %v, want [X.java]", d.Modified)
+	}
+}
+
+func TestDiffDetectsDeletedFile(t *testing.T) {
+	c, root := mustOpenCache(t)
+	defer c.Close()
+	_ = c.Put(&cache.Entry{ContentHash: "h", Path: "Y.java", Language: "java", ParsedAt: "t"})
+	a := NewAnalyzer(Options{Cache: c, Registry: detector.Default})
+	d, err := a.Diff(root)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(d.Deleted) != 1 || d.Deleted[0] != "Y.java" {
+		t.Fatalf("Deleted = %v, want [Y.java]", d.Deleted)
+	}
+}
+
+func TestDiffMixedScenario(t *testing.T) {
+	c, root := mustOpenCache(t)
+	defer c.Close()
+	writeDiffFile(t, root, "A.java", "class A {}")
+	writeDiffFile(t, root, "B.java", "class B {}")
+	writeDiffFile(t, root, "C.java", "class C v2 {}")
+	_ = c.Put(&cache.Entry{ContentHash: cache.HashString("class A {}"), Path: "A.java", Language: "java", ParsedAt: "t"})
+	_ = c.Put(&cache.Entry{ContentHash: cache.HashString("class C {}"), Path: "C.java", Language: "java", ParsedAt: "t"})
+	_ = c.Put(&cache.Entry{ContentHash: "h-d", Path: "D.java", Language: "java", ParsedAt: "t"})
+	a := NewAnalyzer(Options{Cache: c, Registry: detector.Default})
+	d, err := a.Diff(root)
+	if err != nil {
+		t.Fatal(err)
+	}
+	check := func(label string, got, want []string) {
+		t.Helper()
+		sort.Strings(got)
+		sort.Strings(want)
+		if len(got) != len(want) {
+			t.Errorf("%s: got %v, want %v", label, got, want)
+			return
+		}
+		for i := range got {
+			if got[i] != want[i] {
+				t.Errorf("%s[%d]: got %q, want %q", label, i, got[i], want[i])
+			}
+		}
+	}
+	check("Added", d.Added, []string{"B.java"})
+	check("Modified", d.Modified, []string{"C.java"})
+	check("Deleted", d.Deleted, []string{"D.java"})
+	check("Unchanged", d.Unchanged, []string{"A.java"})
+}

From d467b7ecefb03815827e1fb1608d6e469da78443 Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Fri, 15 May 2026 07:03:59 +0000
Subject: [PATCH 06/12] feat(analyzer): cache-hit early-exit + Diff/purge
 integration in Run

---
 internal/analyzer/analyzer.go      |  61 +++++++++++++-
 internal/analyzer/analyzer_test.go | 128 +++++++++++++++++++++++++++++
 2 files changed, 186 insertions(+), 3 deletions(-)

diff --git a/internal/analyzer/analyzer.go b/internal/analyzer/analyzer.go
index 4a70b496..ffb184d5 100644
--- a/internal/analyzer/analyzer.go
+++ b/internal/analyzer/analyzer.go
@@ -5,6 +5,7 @@ import (
 	"os"
 	"runtime"
 	"sync"
+	"sync/atomic"
 	"time"
 
 	"github.com/randomcodespace/codeiq/internal/cache"
@@ -19,13 +20,19 @@ const DefaultBatchSize = 500
 type Options struct {
 	Cache     *cache.Cache
 	Registry  *detector.Registry
-	BatchSize int // defaults to DefaultBatchSize
-	Workers   int // defaults to 2 * GOMAXPROCS
+	BatchSize int  // defaults to DefaultBatchSize
+	Workers   int  // defaults to 2 * GOMAXPROCS
+	Force     bool // bypass cache early-exit; re-parse every file
 }
 
 // Analyzer orchestrates the index pipeline.
 type Analyzer struct {
-	opts Options
+	opts    Options
+	counter runCounter
+}
+
+type runCounter struct {
+	cacheHits atomic.Int64
 }
 
 // NewAnalyzer returns an analyzer wired to opts.
@@ -47,6 +54,9 @@ func NewAnalyzer(opts Options) *Analyzer {
 // Plan §1.5 — DedupedNodes/DedupedEdges/DroppedEdges expose dedup activity
 // so operators can see "graph collapsed 312 duplicate nodes, dropped 14
 // phantom edges" — the visibility is what makes "meaningful" diagnosable.
+//
+// Added/Modified/Deleted/Unchanged/CacheHits are incremental counters,
+// zero on full `--force` runs.
 type Stats struct {
 	Files        int
 	Nodes        int
@@ -54,13 +64,38 @@ type Stats struct {
 	DedupedNodes int
 	DedupedEdges int
 	DroppedEdges int
+	Added        int
+	Modified     int
+	Deleted      int
+	Unchanged    int
+	CacheHits    int
 }
 
 // Run executes FileDiscovery → parse → detectors → GraphBuilder → cache writes
 // and returns aggregate stats. Errors from individual file processing are
 // logged to stderr but do not stop the run — partial output is better than no
 // output (matches Java's per-file try/catch behaviour).
+//
+// On non-Force runs with a cache present, Run first runs Diff() to classify
+// files, purges cache rows for deleted files, then proceeds. processFile
+// skips parse+detect for UNCHANGED files (content_hash hit in cache).
 func (a *Analyzer) Run(root string) (Stats, error) {
+	a.counter.cacheHits.Store(0)
+
+	var d Delta
+	if a.opts.Cache != nil && !a.opts.Force {
+		var err error
+		d, err = a.Diff(root)
+		if err != nil {
+			return Stats{}, err
+		}
+		for _, path := range d.Deleted {
+			if err := a.opts.Cache.PurgeByPath(path); err != nil {
+				fmt.Fprintf(os.Stderr, "codeiq: purge %s: %v\n", path, err)
+			}
+		}
+	}
+
 	disc := NewFileDiscovery()
 	files, err := disc.Discover(root)
 	if err != nil {
@@ -99,6 +134,11 @@ func (a *Analyzer) Run(root string) (Stats, error) {
 		DedupedNodes: snap.DedupedNodes,
 		DedupedEdges: snap.DedupedEdges,
 		DroppedEdges: snap.DroppedEdges,
+		Added:        len(d.Added),
+		Modified:     len(d.Modified),
+		Deleted:      len(d.Deleted),
+		Unchanged:    len(d.Unchanged),
+		CacheHits:    int(a.counter.cacheHits.Load()),
 	}, nil
 }
 
@@ -108,6 +148,18 @@ func (a *Analyzer) processFile(f DiscoveredFile, gb *GraphBuilder) error {
 		return err
 	}
 	hash := cache.HashString(string(content))
+
+	// Fast path: cache hit. Reuse the previous emissions; skip parse+detect.
+	if a.opts.Cache != nil && !a.opts.Force && a.opts.Cache.Has(hash) {
+		entry, gerr := a.opts.Cache.Get(hash)
+		if gerr == nil && entry != nil {
+			gb.Add(&detector.Result{Nodes: entry.Nodes, Edges: entry.Edges})
+			a.counter.cacheHits.Add(1)
+			return nil
+		}
+		// Has() true but Get() failed — pathological. Fall through to re-parse.
+	}
+
 	tree, err := parser.Parse(f.Language, content)
 	if err != nil {
 		// Continue with regex-only detectors when the parser bails — matches
@@ -142,6 +194,9 @@ func (a *Analyzer) processFile(f DiscoveredFile, gb *GraphBuilder) error {
 		entry.Edges = append(entry.Edges, r.Edges...)
 	}
 	if a.opts.Cache != nil {
+		// MODIFIED files: purge prior (path, old_hash) row so a single path
+		// never has two cache entries.
+		_ = a.opts.Cache.PurgeByPath(f.RelPath)
 		if err := a.opts.Cache.Put(entry); err != nil {
 			return fmt.Errorf("cache put: %w", err)
 		}
diff --git a/internal/analyzer/analyzer_test.go b/internal/analyzer/analyzer_test.go
index d7555506..85d886f7 100644
--- a/internal/analyzer/analyzer_test.go
+++ b/internal/analyzer/analyzer_test.go
@@ -66,6 +66,134 @@ func TestAnalyzerEndToEnd(t *testing.T) {
 	}
 }
 
+func TestStatsHasIncrementalCounters(t *testing.T) {
+	var s Stats
+	// Compile-time check that the new fields exist with the expected names.
+	_ = s.Added
+	_ = s.Modified
+	_ = s.Deleted
+	_ = s.Unchanged
+	_ = s.CacheHits
+}
+
+func TestProcessFileSkipsOnCacheHit(t *testing.T) {
+	root := t.TempDir()
+	cachePath := filepath.Join(root, ".codeiq", "cache.sqlite")
+	if err := os.MkdirAll(filepath.Dir(cachePath), 0o755); err != nil {
+		t.Fatal(err)
+	}
+	src := "public class A {}"
+	if err := os.WriteFile(filepath.Join(root, "A.java"), []byte(src), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	c, err := cache.Open(cachePath)
+	if err != nil {
+		t.Fatalf("cache: %v", err)
+	}
+	defer c.Close()
+
+	// Seed the cache with a row for this content hash. processFile MUST
+	// not re-parse the file when its hash already lives in the cache.
+	if err := c.Put(&cache.Entry{
+		ContentHash: cache.HashString(src),
+		Path:        "A.java",
+		Language:    "java",
+		ParsedAt:    "2026-01-01T00:00:00Z",
+	}); err != nil {
+		t.Fatal(err)
+	}
+
+	a := NewAnalyzer(Options{Cache: c, Registry: detector.Default, Workers: 1})
+	stats, err := a.Run(root)
+	if err != nil {
+		t.Fatalf("run: %v", err)
+	}
+	if stats.CacheHits != 1 {
+		t.Fatalf("CacheHits = %d, want 1", stats.CacheHits)
+	}
+	if stats.Files != 1 {
+		t.Fatalf("Files = %d, want 1", stats.Files)
+	}
+	if stats.Unchanged != 1 {
+		t.Fatalf("Unchanged = %d, want 1", stats.Unchanged)
+	}
+}
+
+func TestForceBypassesCacheHit(t *testing.T) {
+	root := t.TempDir()
+	cachePath := filepath.Join(root, ".codeiq", "cache.sqlite")
+	if err := os.MkdirAll(filepath.Dir(cachePath), 0o755); err != nil {
+		t.Fatal(err)
+	}
+	src := "public class A {}"
+	if err := os.WriteFile(filepath.Join(root, "A.java"), []byte(src), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	c, err := cache.Open(cachePath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer c.Close()
+	_ = c.Put(&cache.Entry{
+		ContentHash: cache.HashString(src),
+		Path:        "A.java",
+		Language:    "java",
+		ParsedAt:    "t",
+	})
+
+	a := NewAnalyzer(Options{Cache: c, Registry: detector.Default, Workers: 1, Force: true})
+	stats, err := a.Run(root)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if stats.CacheHits != 0 {
+		t.Fatalf("Force=true should bypass cache; CacheHits = %d", stats.CacheHits)
+	}
+}
+
+func TestRunPurgesDeletedFiles(t *testing.T) {
+	root := t.TempDir()
+	cachePath := filepath.Join(root, ".codeiq", "cache.sqlite")
+	if err := os.MkdirAll(filepath.Dir(cachePath), 0o755); err != nil {
+		t.Fatal(err)
+	}
+	c, err := cache.Open(cachePath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer c.Close()
+
+	// Seed a phantom file that's gone from disk.
+	if err := c.Put(&cache.Entry{
+		ContentHash: "ghost-hash",
+		Path:        "deleted.java",
+		Language:    "java",
+		ParsedAt:    "t",
+	}); err != nil {
+		t.Fatal(err)
+	}
+	if !c.Has("ghost-hash") {
+		t.Fatal("seed didn't take")
+	}
+	if err := os.WriteFile(filepath.Join(root, "real.java"), []byte("class R {}"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+
+	a := NewAnalyzer(Options{Cache: c, Registry: detector.Default, Workers: 1})
+	stats, err := a.Run(root)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if c.Has("ghost-hash") {
+		t.Fatal("deleted file's cache row not purged")
+	}
+	if stats.Deleted != 1 {
+		t.Fatalf("Deleted = %d, want 1", stats.Deleted)
+	}
+}
+
 func TestAnalyzerDeterminism(t *testing.T) {
 	dir := t.TempDir()
 	if err := os.WriteFile(filepath.Join(dir, "UserController.java"), []byte(fixtureJava), 0644); err != nil {

From 85a829639224215f147614cbb3cc6a8be7f63023 Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Fri, 15 May 2026 07:06:16 +0000
Subject: [PATCH 07/12] feat(graph): RemoveFile/InsertFile/ReplaceFile per-file
 mutation APIs

---
 internal/graph/refresh.go      |  66 +++++++++++++
 internal/graph/refresh_test.go | 165 +++++++++++++++++++++++++++++++++
 2 files changed, 231 insertions(+)
 create mode 100644 internal/graph/refresh.go
 create mode 100644 internal/graph/refresh_test.go

diff --git a/internal/graph/refresh.go b/internal/graph/refresh.go
new file mode 100644
index 00000000..855efc3d
--- /dev/null
+++ b/internal/graph/refresh.go
@@ -0,0 +1,66 @@
+package graph
+
+import (
+	"fmt"
+
+	"github.com/randomcodespace/codeiq/internal/model"
+)
+
+// RemoveFile deletes every CodeNode whose file_path matches path along with
+// every incident relationship across all rel tables. Idempotent — calling
+// with a path that has no matching nodes is a no-op that returns nil.
+//
+// Implementation note: we iterate per rel table because Kuzu (0.11.3) does
+// not yet support a heterogeneous "match any rel" DELETE across rel tables.
+// The DETACH DELETE on CodeNode then handles whatever remains.
+func (s *Store) RemoveFile(path string) error {
+	// First, drop every incident rel by walking each declared rel table.
+	// DETACH DELETE on CodeNode would handle this, but being explicit per
+	// table keeps the delete plan simple and predictable on Kuzu 0.11.3.
+	for _, kind := range model.AllEdgeKinds() {
+		q := fmt.Sprintf(
+			`MATCH (n:CodeNode)-[r:%s]->(m:CodeNode)
+			 WHERE n.file_path = $p OR m.file_path = $p
+			 DELETE r`,
+			relTableName(kind))
+		if _, err := s.Cypher(q, map[string]any{"p": path}); err != nil {
+			return fmt.Errorf("graph: remove edges for %s: %w", path, err)
+		}
+	}
+	// Now drop the nodes themselves.
+	if _, err := s.Cypher(
+		`MATCH (n:CodeNode) WHERE n.file_path = $p DELETE n`,
+		map[string]any{"p": path},
+	); err != nil {
+		return fmt.Errorf("graph: remove nodes for %s: %w", path, err)
+	}
+	return nil
+}
+
+// InsertFile bulk-loads nodes + edges for a single file. Equivalent to
+// BulkLoadNodes + BulkLoadEdges; the path parameter is for API symmetry
+// (file_path is on each node).
+func (s *Store) InsertFile(path string, nodes []*model.CodeNode, edges []*model.CodeEdge) error {
+	if len(nodes) == 0 && len(edges) == 0 {
+		return nil
+	}
+	if err := s.BulkLoadNodes(nodes); err != nil {
+		return fmt.Errorf("graph: insert file %s nodes: %w", path, err)
+	}
+	if err := s.BulkLoadEdges(edges); err != nil {
+		return fmt.Errorf("graph: insert file %s edges: %w", path, err)
+	}
+	return nil
+}
+
+// ReplaceFile is the MODIFIED-file path: RemoveFile followed by InsertFile.
+// There is a brief window between the two calls where the file's nodes are
+// absent from the graph; concurrent readers see either pre-state or
+// post-state but may briefly observe the file as missing. The window is
+// fine for incremental enrich since enrich is the single writer.
+func (s *Store) ReplaceFile(path string, nodes []*model.CodeNode, edges []*model.CodeEdge) error {
+	if err := s.RemoveFile(path); err != nil {
+		return err
+	}
+	return s.InsertFile(path, nodes, edges)
+}
diff --git a/internal/graph/refresh_test.go b/internal/graph/refresh_test.go
new file mode 100644
index 00000000..6016e86f
--- /dev/null
+++ b/internal/graph/refresh_test.go
@@ -0,0 +1,165 @@
+package graph_test
+
+import (
+	"path/filepath"
+	"testing"
+
+	"github.com/randomcodespace/codeiq/internal/graph"
+	"github.com/randomcodespace/codeiq/internal/model"
+)
+
+func openSchemaStore(t *testing.T) *graph.Store {
+	t.Helper()
+	s, err := graph.Open(filepath.Join(t.TempDir(), "g.kuzu"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := s.ApplySchema(); err != nil {
+		s.Close()
+		t.Fatal(err)
+	}
+	return s
+}
+
+func countCodeNodes(t *testing.T, s *graph.Store) int64 {
+	t.Helper()
+	rows, err := s.Cypher("MATCH (n:CodeNode) RETURN count(n) AS c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	return rows[0]["c"].(int64)
+}
+
+func TestRemoveFileDeletesAllNodesForPath(t *testing.T) {
+	s := openSchemaStore(t)
+	defer s.Close()
+
+	nodes := []*model.CodeNode{
+		{ID: "n1", Kind: model.NodeClass, Label: "A", FilePath: "A.java", Layer: model.LayerBackend},
+		{ID: "n2", Kind: model.NodeMethod, Label: "foo", FilePath: "A.java", Layer: model.LayerBackend},
+		{ID: "n3", Kind: model.NodeClass, Label: "B", FilePath: "B.java", Layer: model.LayerBackend},
+	}
+	if err := s.BulkLoadNodes(nodes); err != nil {
+		t.Fatal(err)
+	}
+	if err := s.RemoveFile("A.java"); err != nil {
+		t.Fatalf("RemoveFile: %v", err)
+	}
+	rows, err := s.Cypher("MATCH (n:CodeNode) RETURN n.id AS id ORDER BY id")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(rows) != 1 {
+		t.Fatalf("want 1 remaining (n3), got %d: %v", len(rows), rows)
+	}
+	if rows[0]["id"] != "n3" {
+		t.Fatalf("wrong survivor: %v", rows[0])
+	}
+}
+
+func TestRemoveFileIsIdempotent(t *testing.T) {
+	s := openSchemaStore(t)
+	defer s.Close()
+	if err := s.RemoveFile("never-existed.java"); err != nil {
+		t.Fatalf("RemoveFile on missing: %v", err)
+	}
+}
+
+func TestRemoveFileDeletesIncidentEdges(t *testing.T) {
+	s := openSchemaStore(t)
+	defer s.Close()
+	nodes := []*model.CodeNode{
+		{ID: "n1", Kind: model.NodeClass, Label: "A", FilePath: "A.java"},
+		{ID: "n2", Kind: model.NodeClass, Label: "B", FilePath: "B.java"},
+	}
+	if err := s.BulkLoadNodes(nodes); err != nil {
+		t.Fatal(err)
+	}
+	edges := []*model.CodeEdge{
+		{ID: "n1->n2", Kind: model.EdgeCalls, SourceID: "n1", TargetID: "n2",
+			Confidence: model.ConfidenceSyntactic},
+	}
+	if err := s.BulkLoadEdges(edges); err != nil {
+		t.Fatal(err)
+	}
+	if err := s.RemoveFile("A.java"); err != nil {
+		t.Fatalf("RemoveFile: %v", err)
+	}
+	rows, err := s.Cypher("MATCH ()-[r:CALLS]->() RETURN count(r) AS c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if rows[0]["c"].(int64) != 0 {
+		t.Fatalf("edge to deleted node survived: %v", rows[0])
+	}
+}
+
+func TestInsertFileAddsNodesAndEdges(t *testing.T) {
+	s := openSchemaStore(t)
+	defer s.Close()
+	nodes := []*model.CodeNode{
+		{ID: "j:file:C.java", Kind: model.NodeModule, Label: "C.java", FilePath: "C.java"},
+		{ID: "j:C.java:class:C", Kind: model.NodeClass, Label: "C", FilePath: "C.java"},
+	}
+	edges := []*model.CodeEdge{{
+		ID: "j:file:C.java->j:C.java:class:C", Kind: model.EdgeContains,
+		SourceID: "j:file:C.java", TargetID: "j:C.java:class:C",
+		Confidence: model.ConfidenceSyntactic, Source: "test",
+	}}
+	if err := s.InsertFile("C.java", nodes, edges); err != nil {
+		t.Fatalf("InsertFile: %v", err)
+	}
+	rows, err := s.Cypher("MATCH (n:CodeNode) WHERE n.file_path = 'C.java' RETURN count(n) AS c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if got := rows[0]["c"].(int64); got != 2 {
+		t.Fatalf("want 2 nodes, got %v", rows[0]["c"])
+	}
+	rels, err := s.Cypher("MATCH ()-[r:CONTAINS]->() RETURN count(r) AS c")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if got := rels[0]["c"].(int64); got != 1 {
+		t.Fatalf("want 1 CONTAINS edge, got %v", rels[0]["c"])
+	}
+}
+
+func TestInsertFileEmptyIsNoop(t *testing.T) {
+	s := openSchemaStore(t)
+	defer s.Close()
+	if err := s.InsertFile("empty.java", nil, nil); err != nil {
+		t.Fatalf("InsertFile with empty input should be no-op, got: %v", err)
+	}
+	if countCodeNodes(t, s) != 0 {
+		t.Fatal("empty insert created phantom nodes")
+	}
+}
+
+func TestReplaceFileSwapsContent(t *testing.T) {
+	s := openSchemaStore(t)
+	defer s.Close()
+
+	if err := s.InsertFile("D.java", []*model.CodeNode{
+		{ID: "old-d", Kind: model.NodeClass, Label: "OldD", FilePath: "D.java"},
+	}, nil); err != nil {
+		t.Fatal(err)
+	}
+	if err := s.ReplaceFile("D.java",
+		[]*model.CodeNode{
+			{ID: "new-d", Kind: model.NodeClass, Label: "NewD", FilePath: "D.java"},
+			{ID: "new-d-m", Kind: model.NodeMethod, Label: "m", FilePath: "D.java"},
+		}, nil); err != nil {
+		t.Fatalf("ReplaceFile: %v", err)
+	}
+	rows, err := s.Cypher("MATCH (n:CodeNode) WHERE n.file_path = 'D.java' RETURN n.id AS id ORDER BY n.id")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(rows) != 2 {
+		t.Fatalf("want 2 nodes after replace, got %d: %v", len(rows), rows)
+	}
+	if rows[0]["id"] != "new-d" || rows[1]["id"] != "new-d-m" {
+		t.Fatalf("nodes after replace: %v", rows)
+	}
+}

From 1702f176e09c3ce39d3b4c16d7d96fcc543e4011 Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Fri, 15 May 2026 07:07:50 +0000
Subject: [PATCH 08/12] feat(graph): GraphMeta table +
 ReadManifest/WriteManifest

---
 internal/graph/manifest.go      | 40 +++++++++++++++
 internal/graph/manifest_test.go | 90 +++++++++++++++++++++++++++++++++
 internal/graph/schema.go        | 10 ++++
 internal/graph/schema_test.go   | 23 ++++++---
 4 files changed, 156 insertions(+), 7 deletions(-)
 create mode 100644 internal/graph/manifest.go
 create mode 100644 internal/graph/manifest_test.go

diff --git a/internal/graph/manifest.go b/internal/graph/manifest.go
new file mode 100644
index 00000000..e70ad15e
--- /dev/null
+++ b/internal/graph/manifest.go
@@ -0,0 +1,40 @@
+package graph
+
+import "fmt"
+
+const metaKeyManifest = "manifest_hash"
+
+// ReadManifest returns the manifest_hash stored by the last successful
+// enrich, or "" if none exists (fresh graph). Used by enrich to short-
+// circuit when the cache and graph are already in sync.
+func (s *Store) ReadManifest() (string, error) {
+	rows, err := s.Cypher(
+		`MATCH (m:GraphMeta) WHERE m.meta_key = $k RETURN m.value AS v`,
+		map[string]any{"k": metaKeyManifest})
+	if err != nil {
+		return "", fmt.Errorf("graph: read manifest: %w", err)
+	}
+	if len(rows) == 0 {
+		return "", nil
+	}
+	v, _ := rows[0]["v"].(string)
+	return v, nil
+}
+
+// WriteManifest stores hash as the manifest_hash, replacing any existing
+// value. Called at the tail of every successful enrich.
+func (s *Store) WriteManifest(hash string) error {
+	if _, err := s.Cypher(
+		`MATCH (m:GraphMeta) WHERE m.meta_key = $k DELETE m`,
+		map[string]any{"k": metaKeyManifest},
+	); err != nil {
+		return fmt.Errorf("graph: clear manifest: %w", err)
+	}
+	if _, err := s.Cypher(
+		`CREATE (:GraphMeta {meta_key: $k, value: $v})`,
+		map[string]any{"k": metaKeyManifest, "v": hash},
+	); err != nil {
+		return fmt.Errorf("graph: write manifest: %w", err)
+	}
+	return nil
+}
diff --git a/internal/graph/manifest_test.go b/internal/graph/manifest_test.go
new file mode 100644
index 00000000..9a1dbd21
--- /dev/null
+++ b/internal/graph/manifest_test.go
@@ -0,0 +1,90 @@
+package graph_test
+
+import (
+	"path/filepath"
+	"testing"
+
+	"github.com/randomcodespace/codeiq/internal/graph"
+)
+
+func TestReadManifestEmptyOnFreshGraph(t *testing.T) {
+	s, err := graph.Open(filepath.Join(t.TempDir(), "g.kuzu"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer s.Close()
+	if err := s.ApplySchema(); err != nil {
+		t.Fatal(err)
+	}
+	got, err := s.ReadManifest()
+	if err != nil {
+		t.Fatalf("ReadManifest: %v", err)
+	}
+	if got != "" {
+		t.Fatalf("fresh manifest = %q, want \"\"", got)
+	}
+}
+
+func TestWriteThenReadManifestRoundTrip(t *testing.T) {
+	s, err := graph.Open(filepath.Join(t.TempDir(), "g.kuzu"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer s.Close()
+	if err := s.ApplySchema(); err != nil {
+		t.Fatal(err)
+	}
+	want := "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855"
+	if err := s.WriteManifest(want); err != nil {
+		t.Fatalf("WriteManifest: %v", err)
+	}
+	got, err := s.ReadManifest()
+	if err != nil {
+		t.Fatalf("ReadManifest: %v", err)
+	}
+	if got != want {
+		t.Fatalf("ReadManifest = %q, want %q", got, want)
+	}
+}
+
+func TestWriteManifestOverwrites(t *testing.T) {
+	s, err := graph.Open(filepath.Join(t.TempDir(), "g.kuzu"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer s.Close()
+	if err := s.ApplySchema(); err != nil {
+		t.Fatal(err)
+	}
+	if err := s.WriteManifest("h1"); err != nil {
+		t.Fatal(err)
+	}
+	if err := s.WriteManifest("h2"); err != nil {
+		t.Fatal(err)
+	}
+	got, _ := s.ReadManifest()
+	if got != "h2" {
+		t.Fatalf("got %q after overwrite, want h2", got)
+	}
+}
+
+func TestManifestRejectedOnReadOnly(t *testing.T) {
+	path := filepath.Join(t.TempDir(), "g.kuzu")
+	w, err := graph.Open(path)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if err := w.ApplySchema(); err != nil {
+		t.Fatal(err)
+	}
+	w.Close()
+
+	ro, err := graph.OpenReadOnly(path, 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer ro.Close()
+	if err := ro.WriteManifest("blocked"); err == nil {
+		t.Fatal("WriteManifest should fail on read-only store")
+	}
+}
diff --git a/internal/graph/schema.go b/internal/graph/schema.go
index a4011a99..65e78598 100644
--- a/internal/graph/schema.go
+++ b/internal/graph/schema.go
@@ -16,6 +16,16 @@ import (
 // a label. Properties round-trip through a JSON-serialised `props` column
 // plus a small set of first-class columns we want to index / project on.
 func (s *Store) ApplySchema() error {
+	// GraphMeta stores small key→value strings (e.g., manifest hash for the
+	// incremental enrich short-circuit). One row per key; PK enforces uniqueness.
+	metaDDL := `CREATE NODE TABLE IF NOT EXISTS GraphMeta(
+		meta_key STRING,
+		value STRING,
+		PRIMARY KEY(meta_key))`
+	if _, err := s.Cypher(metaDDL); err != nil {
+		return fmt.Errorf("graph: create GraphMeta: %w", err)
+	}
+
 	nodeDDL := `CREATE NODE TABLE IF NOT EXISTS CodeNode(
 		id STRING,
 		kind STRING,
diff --git a/internal/graph/schema_test.go b/internal/graph/schema_test.go
index 4ace1c5f..bcd47f59 100644
--- a/internal/graph/schema_test.go
+++ b/internal/graph/schema_test.go
@@ -8,9 +8,10 @@ import (
 	"github.com/randomcodespace/codeiq/internal/model"
 )
 
-// TestApplySchemaCreatesAllTables asserts ApplySchema produces exactly one
-// CodeNode node table and one rel table per EdgeKind. The Java side mirrors
-// this implicitly through SDN's label-driven schema; on Kuzu we declare it.
+// TestApplySchemaCreatesAllTables asserts ApplySchema produces the expected
+// node tables (CodeNode + GraphMeta) and one rel table per EdgeKind. The
+// Java side mirrors this implicitly through SDN's label-driven schema; on
+// Kuzu we declare it.
 func TestApplySchemaCreatesAllTables(t *testing.T) {
 	s, err := graph.Open(filepath.Join(t.TempDir(), "g.kuzu"))
 	if err != nil {
@@ -26,17 +27,25 @@ func TestApplySchemaCreatesAllTables(t *testing.T) {
 	if err != nil {
 		t.Fatalf("show tables: %v", err)
 	}
-	var nodeTables, relTables int
+	nodeTables := map[string]bool{}
+	relTables := 0
 	for _, r := range rows {
 		switch r["type"] {
 		case "NODE":
-			nodeTables++
+			name, _ := r["name"].(string)
+			nodeTables[name] = true
 		case "REL":
 			relTables++
 		}
 	}
-	if nodeTables != 1 {
-		t.Errorf("want 1 node table, got %d", nodeTables)
+	if !nodeTables["CodeNode"] {
+		t.Error("CodeNode node table missing")
+	}
+	if !nodeTables["GraphMeta"] {
+		t.Error("GraphMeta node table missing")
+	}
+	if len(nodeTables) != 2 {
+		t.Errorf("want 2 node tables (CodeNode, GraphMeta), got %d: %v", len(nodeTables), nodeTables)
 	}
 	if relTables != len(model.AllEdgeKinds()) {
 		t.Errorf("want %d rel tables, got %d", len(model.AllEdgeKinds()), relTables)

From f9074d88b8a3d36d36a7912ccb5a24b449524180 Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Fri, 15 May 2026 07:10:00 +0000
Subject: [PATCH 09/12] feat(linker+graph): source-tag linker emissions +
 WipeLinkerEdges API

---
 internal/analyzer/linker/entity_linker.go     |  1 +
 .../linker/module_containment_linker.go       |  2 +
 internal/analyzer/linker/source_tag.go        | 20 +++++
 internal/analyzer/linker/source_tag_test.go   | 85 +++++++++++++++++++
 internal/analyzer/linker/topic_linker.go      |  1 +
 internal/graph/refresh.go                     | 33 +++++++
 internal/graph/refresh_test.go                | 62 ++++++++++++++
 7 files changed, 204 insertions(+)
 create mode 100644 internal/analyzer/linker/source_tag.go
 create mode 100644 internal/analyzer/linker/source_tag_test.go

diff --git a/internal/analyzer/linker/entity_linker.go b/internal/analyzer/linker/entity_linker.go
index 57ca72a4..a4894728 100644
--- a/internal/analyzer/linker/entity_linker.go
+++ b/internal/analyzer/linker/entity_linker.go
@@ -86,6 +86,7 @@ func (l *EntityLinker) Link(nodes []*model.CodeNode, edges []*model.CodeEdge) Re
 				Kind:       model.EdgeQueries,
 				SourceID:   repo.ID,
 				TargetID:   ent.ID,
+				Source:     SrcEntityLinker,
 				Properties: map[string]any{"inferred": true},
 			})
 			break
diff --git a/internal/analyzer/linker/module_containment_linker.go b/internal/analyzer/linker/module_containment_linker.go
index c33830b3..87b8fa35 100644
--- a/internal/analyzer/linker/module_containment_linker.go
+++ b/internal/analyzer/linker/module_containment_linker.go
@@ -66,6 +66,7 @@ func (l *ModuleContainmentLinker) Link(nodes []*model.CodeNode, edges []*model.C
 				Label:  m,
 				FQN:    m,
 				Module: m,
+				Source: SrcModuleContainmentLinker,
 			})
 			existingModules[moduleID] = struct{}{}
 		}
@@ -81,6 +82,7 @@ func (l *ModuleContainmentLinker) Link(nodes []*model.CodeNode, edges []*model.C
 				Kind:       model.EdgeContains,
 				SourceID:   moduleID,
 				TargetID:   mem.ID,
+				Source:     SrcModuleContainmentLinker,
 				Properties: map[string]any{"inferred": true},
 			})
 			existingContains[key] = struct{}{}
diff --git a/internal/analyzer/linker/source_tag.go b/internal/analyzer/linker/source_tag.go
new file mode 100644
index 00000000..1d9861af
--- /dev/null
+++ b/internal/analyzer/linker/source_tag.go
@@ -0,0 +1,20 @@
+package linker
+
+// Source-tag constants stamped onto every edge a linker emits. They give
+// the incremental enrich pipeline a way to clear previous linker output
+// (via Store.WipeLinkerEdges) before re-running linkers, without touching
+// detector-emitted edges.
+const (
+	SrcTopicLinker             = "linker:topic"
+	SrcEntityLinker            = "linker:entity"
+	SrcModuleContainmentLinker = "linker:module_containment"
+)
+
+// AllSources lists every linker source tag. Store.WipeLinkerEdges takes
+// this slice (or a subset) when clearing linker output. Add new entries
+// here when adding a new linker.
+var AllSources = []string{
+	SrcTopicLinker,
+	SrcEntityLinker,
+	SrcModuleContainmentLinker,
+}
diff --git a/internal/analyzer/linker/source_tag_test.go b/internal/analyzer/linker/source_tag_test.go
new file mode 100644
index 00000000..c4d3e633
--- /dev/null
+++ b/internal/analyzer/linker/source_tag_test.go
@@ -0,0 +1,85 @@
+package linker
+
+import (
+	"testing"
+
+	"github.com/randomcodespace/codeiq/internal/model"
+)
+
+func TestTopicLinkerStampsSource(t *testing.T) {
+	nodes := []*model.CodeNode{
+		{ID: "svc:p", Kind: model.NodeService, Label: "p"},
+		{ID: "svc:c", Kind: model.NodeService, Label: "c"},
+		{ID: "topic:t", Kind: model.NodeTopic, Label: "t"},
+	}
+	edges := []*model.CodeEdge{
+		{ID: "p->t", Kind: model.EdgeProduces, SourceID: "svc:p", TargetID: "topic:t"},
+		{ID: "c->t", Kind: model.EdgeConsumes, SourceID: "svc:c", TargetID: "topic:t"},
+	}
+	r := NewTopicLinker().Link(nodes, edges)
+	if len(r.Edges) == 0 {
+		t.Fatal("TopicLinker emitted no edges")
+	}
+	for _, e := range r.Edges {
+		if e.Source != SrcTopicLinker {
+			t.Errorf("edge %s: Source = %q, want %q", e.ID, e.Source, SrcTopicLinker)
+		}
+	}
+}
+
+func TestEntityLinkerStampsSource(t *testing.T) {
+	nodes := []*model.CodeNode{
+		{ID: "ent:User", Kind: model.NodeEntity, Label: "User"},
+		{ID: "repo:UserRepo", Kind: model.NodeRepository, Label: "UserRepository"},
+	}
+	r := NewEntityLinker().Link(nodes, nil)
+	if len(r.Edges) == 0 {
+		t.Fatal("EntityLinker emitted no edges")
+	}
+	for _, e := range r.Edges {
+		if e.Source != SrcEntityLinker {
+			t.Errorf("edge %s: Source = %q, want %q", e.ID, e.Source, SrcEntityLinker)
+		}
+	}
+}
+
+func TestModuleContainmentLinkerStampsSource(t *testing.T) {
+	nodes := []*model.CodeNode{
+		{ID: "n1", Kind: model.NodeClass, Label: "A", Module: "auth"},
+		{ID: "n2", Kind: model.NodeClass, Label: "B", Module: "auth"},
+	}
+	r := NewModuleContainmentLinker().Link(nodes, nil)
+	if len(r.Edges) == 0 {
+		t.Fatal("ModuleContainmentLinker emitted no edges")
+	}
+	for _, e := range r.Edges {
+		if e.Source != SrcModuleContainmentLinker {
+			t.Errorf("edge %s: Source = %q, want %q", e.ID, e.Source, SrcModuleContainmentLinker)
+		}
+	}
+	for _, n := range r.Nodes {
+		if n.Source != SrcModuleContainmentLinker {
+			t.Errorf("node %s: Source = %q, want %q", n.ID, n.Source, SrcModuleContainmentLinker)
+		}
+	}
+}
+
+func TestAllSourcesIncludesEveryLinker(t *testing.T) {
+	want := map[string]bool{
+		SrcTopicLinker:             false,
+		SrcEntityLinker:            false,
+		SrcModuleContainmentLinker: false,
+	}
+	for _, s := range AllSources {
+		if _, ok := want[s]; !ok {
+			t.Errorf("AllSources contains unknown tag %q", s)
+			continue
+		}
+		want[s] = true
+	}
+	for tag, found := range want {
+		if !found {
+			t.Errorf("AllSources missing %q", tag)
+		}
+	}
+}
diff --git a/internal/analyzer/linker/topic_linker.go b/internal/analyzer/linker/topic_linker.go
index f620c8ac..29e33c17 100644
--- a/internal/analyzer/linker/topic_linker.go
+++ b/internal/analyzer/linker/topic_linker.go
@@ -93,6 +93,7 @@ func (l *TopicLinker) Link(nodes []*model.CodeNode, edges []*model.CodeEdge) Res
 					Kind:     model.EdgeCalls,
 					SourceID: p,
 					TargetID: c,
+					Source:   SrcTopicLinker,
 					Properties: map[string]any{
 						"inferred": true,
 						"topic":    label,
diff --git a/internal/graph/refresh.go b/internal/graph/refresh.go
index 855efc3d..be58155c 100644
--- a/internal/graph/refresh.go
+++ b/internal/graph/refresh.go
@@ -53,6 +53,39 @@ func (s *Store) InsertFile(path string, nodes []*model.CodeNode, edges []*model.
 	return nil
 }
 
+// WipeLinkerEdges deletes every relationship whose source property is in
+// the given sources set, across every declared rel table. Used by
+// incremental enrich to clear previous linker emissions before re-running
+// linker passes.
+//
+// Iterates per rel-type because Kuzu (0.11.3) doesn't support a
+// heterogeneous "match any rel" DELETE across rel tables.
+//
+// Linker-emitted nodes (e.g., MODULE nodes from ModuleContainmentLinker)
+// are also wiped when their source tag is in the set — caller can pass
+// only edge-relevant tags if they want to preserve nodes.
+func (s *Store) WipeLinkerEdges(sources []string) error {
+	if len(sources) == 0 {
+		return nil
+	}
+	for _, kind := range model.AllEdgeKinds() {
+		q := fmt.Sprintf(
+			`MATCH ()-[r:%s]->() WHERE r.source IN $sources DELETE r`,
+			relTableName(kind))
+		if _, err := s.Cypher(q, map[string]any{"sources": sources}); err != nil {
+			return fmt.Errorf("graph: wipe %s edges: %w", relTableName(kind), err)
+		}
+	}
+	// Linker-emitted nodes (module nodes) — drop any CodeNode whose source
+	// tag is in the set. These re-emit on the next linker pass.
+	if _, err := s.Cypher(
+		`MATCH (n:CodeNode) WHERE n.source IN $sources DELETE n`,
+		map[string]any{"sources": sources}); err != nil {
+		return fmt.Errorf("graph: wipe linker nodes: %w", err)
+	}
+	return nil
+}
+
 // ReplaceFile is the MODIFIED-file path: RemoveFile followed by InsertFile.
 // There is a brief window between the two calls where the file's nodes are
 // absent from the graph; concurrent readers see either pre-state or
diff --git a/internal/graph/refresh_test.go b/internal/graph/refresh_test.go
index 6016e86f..34058057 100644
--- a/internal/graph/refresh_test.go
+++ b/internal/graph/refresh_test.go
@@ -136,6 +136,68 @@ func TestInsertFileEmptyIsNoop(t *testing.T) {
 	}
 }
 
+func TestWipeLinkerEdgesByTag(t *testing.T) {
+	s := openSchemaStore(t)
+	defer s.Close()
+	if err := s.BulkLoadNodes([]*model.CodeNode{
+		{ID: "a", Kind: model.NodeService, Label: "A"},
+		{ID: "b", Kind: model.NodeService, Label: "B"},
+	}); err != nil {
+		t.Fatal(err)
+	}
+	if err := s.BulkLoadEdges([]*model.CodeEdge{
+		{ID: "e-det", Kind: model.EdgeDependsOn, SourceID: "a", TargetID: "b",
+			Source: "SomeDetector", Confidence: model.ConfidenceSyntactic},
+		{ID: "e-lnk", Kind: model.EdgeDependsOn, SourceID: "a", TargetID: "b",
+			Source: "linker:topic", Confidence: model.ConfidenceLexical},
+	}); err != nil {
+		t.Fatal(err)
+	}
+	if err := s.WipeLinkerEdges([]string{"linker:topic", "linker:entity", "linker:module_containment"}); err != nil {
+		t.Fatalf("WipeLinkerEdges: %v", err)
+	}
+	rows, err := s.Cypher("MATCH ()-[r:DEPENDS_ON]->() RETURN r.id AS id ORDER BY r.id")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(rows) != 1 {
+		t.Fatalf("want 1 surviving (detector), got %d: %v", len(rows), rows)
+	}
+	if rows[0]["id"] != "e-det" {
+		t.Fatalf("wrong edge survived: %v", rows[0])
+	}
+}
+
+func TestWipeLinkerEdgesEmptySources(t *testing.T) {
+	s := openSchemaStore(t)
+	defer s.Close()
+	// Empty sources is a no-op, not an error.
+	if err := s.WipeLinkerEdges(nil); err != nil {
+		t.Fatalf("WipeLinkerEdges(nil): %v", err)
+	}
+}
+
+func TestWipeLinkerEdgesAlsoDropsLinkerNodes(t *testing.T) {
+	s := openSchemaStore(t)
+	defer s.Close()
+	if err := s.BulkLoadNodes([]*model.CodeNode{
+		{ID: "m:auth", Kind: model.NodeModule, Label: "auth", Source: "linker:module_containment"},
+		{ID: "c:Foo", Kind: model.NodeClass, Label: "Foo", Source: "SomeDetector"},
+	}); err != nil {
+		t.Fatal(err)
+	}
+	if err := s.WipeLinkerEdges([]string{"linker:module_containment"}); err != nil {
+		t.Fatal(err)
+	}
+	rows, _ := s.Cypher("MATCH (n:CodeNode) RETURN n.id AS id ORDER BY n.id")
+	if len(rows) != 1 {
+		t.Fatalf("want 1 surviving (detector node), got %d: %v", len(rows), rows)
+	}
+	if rows[0]["id"] != "c:Foo" {
+		t.Fatalf("wrong node survived: %v", rows[0])
+	}
+}
+
 func TestReplaceFileSwapsContent(t *testing.T) {
 	s := openSchemaStore(t)
 	defer s.Close()

From da323e387f5ae3d31f18473b211115a6877c3644 Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Fri, 15 May 2026 07:14:09 +0000
Subject: [PATCH 10/12] feat(enrich): manifest short-circuit + Reset for
 re-runnable enrich

---
 internal/analyzer/enrich.go      |  89 ++++++++++++++++++-----
 internal/analyzer/enrich_test.go | 120 +++++++++++++++++++++++++++++++
 internal/graph/refresh.go        |  16 +++++
 internal/graph/refresh_test.go   |  44 ++++++++++++
 4 files changed, 252 insertions(+), 17 deletions(-)

diff --git a/internal/analyzer/enrich.go b/internal/analyzer/enrich.go
index 2b0b7555..6e0f6ae4 100644
--- a/internal/analyzer/enrich.go
+++ b/internal/analyzer/enrich.go
@@ -29,13 +29,28 @@ type EnrichOptions struct {
 	// StoreCopyThreads caps Kuzu COPY FROM parallelism. Zero -> graph
 	// package default (min(4, GOMAXPROCS)).
 	StoreCopyThreads uint64
+	// Force bypasses the incremental short-circuit. With Force=false (the
+	// default), Enrich short-circuits when the cache and graph manifest
+	// hashes match.
+	Force bool
 }
 
 // EnrichSummary reports per-run counters from a successful Enrich.
+//
+// ShortCircuited is true when the cache and graph were already in sync
+// and Enrich did no rebuild work. Nodes/Edges/Services report 0 in that
+// case — callers should check ShortCircuited before treating zero as
+// "empty graph".
+//
+// Mode is one of:
+//   - "short-circuit" — cache + graph manifests matched; no work done.
+//   - "full"          — fresh graph or non-matching manifest; full rebuild.
 type EnrichSummary struct {
-	Nodes    int
-	Edges    int
-	Services int
+	Nodes          int
+	Edges          int
+	Services       int
+	ShortCircuited bool
+	Mode           string
 }
 
 // Enrich loads the SQLite cache for `root`, runs the linker / classifier /
@@ -44,6 +59,16 @@ type EnrichSummary struct {
 // returned summary reports total nodes / edges / service nodes after every
 // pass has run.
 //
+// Incremental short-circuit: when a prior enrich wrote a manifest_hash to
+// the graph and that hash matches the cache's current manifest, Enrich
+// returns immediately with ShortCircuited=true. To force a full rebuild,
+// pass Force=true.
+//
+// Re-runnable: when the graph already holds prior data (non-empty
+// CodeNode table), Enrich calls Store.Reset() first so the subsequent
+// BulkLoad doesn't collide on primary keys. This makes `enrich` safe to
+// re-run after `index` picks up changes.
+//
 // Mirrors the `enrich` pipeline in Java (Analyzer.java + GraphStore.java).
 // The pipeline order matches the Java side exactly:
 //
@@ -53,6 +78,7 @@ type EnrichSummary struct {
 //  4. LanguageEnricher (Java, TypeScript, Python, Go extractors)
 //  5. ServiceDetector (filesystem walk for build files)
 //  6. graph.Store.BulkLoadNodes / BulkLoadEdges / CreateIndexes
+//  7. graph.Store.WriteManifest(cache.ManifestHash())
 //
 // All steps are deterministic — repeated calls against the same cache + root
 // produce identical Kuzu output.
@@ -61,10 +87,43 @@ func Enrich(root string, c *cache.Cache, opts EnrichOptions) (EnrichSummary, err
 		opts.GraphDir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu")
 	}
 
+	store, err := graph.OpenWithOptions(opts.GraphDir, graph.OpenOptions{
+		BufferPoolBytes: opts.StoreBufferPoolBytes,
+		MaxThreads:      opts.StoreCopyThreads,
+	})
+	if err != nil {
+		return EnrichSummary{}, fmt.Errorf("enrich: open graph: %w", err)
+	}
+	defer store.Close()
+	if err := store.ApplySchema(); err != nil {
+		return EnrichSummary{}, fmt.Errorf("enrich: apply schema: %w", err)
+	}
+
+	// Short-circuit when the cache and graph are already in sync.
+	if !opts.Force {
+		cacheManifest, mErr := c.ManifestHash()
+		if mErr != nil {
+			return EnrichSummary{}, fmt.Errorf("enrich: cache manifest: %w", mErr)
+		}
+		graphManifest, gErr := store.ReadManifest()
+		if gErr != nil {
+			return EnrichSummary{}, fmt.Errorf("enrich: graph manifest: %w", gErr)
+		}
+		if graphManifest != "" && cacheManifest == graphManifest {
+			return EnrichSummary{ShortCircuited: true, Mode: "short-circuit"}, nil
+		}
+	}
+
+	// Reset existing graph data so BulkLoad doesn't collide on PKs from a
+	// prior run. Reset on a fresh graph is a cheap no-op.
+	if err := store.Reset(); err != nil {
+		return EnrichSummary{}, fmt.Errorf("enrich: reset: %w", err)
+	}
+
 	// Re-hydrate the graph from cache. GraphBuilder dedupes by node/edge ID and
 	// produces a deterministic snapshot with dangling edges dropped.
 	builder := NewGraphBuilder()
-	err := c.IterateAll(func(r *cache.Entry) error {
+	err = c.IterateAll(func(r *cache.Entry) error {
 		builder.Add(&detector.Result{Nodes: r.Nodes, Edges: r.Edges})
 		return nil
 	})
@@ -113,19 +172,7 @@ func Enrich(root string, c *cache.Cache, opts EnrichOptions) (EnrichSummary, err
 	nodes = append(nodes, sres.Nodes...)
 	edges = append(edges, sres.Edges...)
 
-	// 6. Bulk-load Kuzu — schema + nodes + edges + indexes. The store is
-	//    closed when this function returns; read-side commands re-open it.
-	store, err := graph.OpenWithOptions(opts.GraphDir, graph.OpenOptions{
-		BufferPoolBytes: opts.StoreBufferPoolBytes,
-		MaxThreads:      opts.StoreCopyThreads,
-	})
-	if err != nil {
-		return EnrichSummary{}, fmt.Errorf("enrich: open graph: %w", err)
-	}
-	defer store.Close()
-	if err := store.ApplySchema(); err != nil {
-		return EnrichSummary{}, fmt.Errorf("enrich: apply schema: %w", err)
-	}
+	// 6. Bulk-load Kuzu — nodes + edges + indexes.
 	if err := store.BulkLoadNodes(nodes); err != nil {
 		return EnrichSummary{}, fmt.Errorf("enrich: bulk load nodes: %w", err)
 	}
@@ -136,10 +183,18 @@ func Enrich(root string, c *cache.Cache, opts EnrichOptions) (EnrichSummary, err
 		return EnrichSummary{}, fmt.Errorf("enrich: create indexes: %w", err)
 	}
 
+	// 7. Write the manifest hash so the next run can short-circuit. Best-effort
+	//    — a failed manifest write only forces the next run to re-do the work,
+	//    which is annoying but not incorrect.
+	if mh, mErr := c.ManifestHash(); mErr == nil {
+		_ = store.WriteManifest(mh)
+	}
+
 	return EnrichSummary{
 		Nodes:    len(nodes),
 		Edges:    len(edges),
 		Services: len(sres.Nodes),
+		Mode:     "full",
 	}, nil
 }
 
diff --git a/internal/analyzer/enrich_test.go b/internal/analyzer/enrich_test.go
index 851d6fe0..791c9eec 100644
--- a/internal/analyzer/enrich_test.go
+++ b/internal/analyzer/enrich_test.go
@@ -76,6 +76,126 @@ func TestEnrichEmptyCacheIsNoop(t *testing.T) {
 	}
 }
 
+// TestEnrichShortCircuitsWhenManifestMatches verifies the incremental
+// short-circuit: a second enrich against an unchanged cache returns
+// ShortCircuited=true without rebuilding the graph.
+func TestEnrichShortCircuitsWhenManifestMatches(t *testing.T) {
+	dir := t.TempDir()
+	if err := os.WriteFile(filepath.Join(dir, "X.java"), []byte("class X {}"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	c, err := cache.Open(filepath.Join(dir, "cache.sqlite"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer c.Close()
+	a := analyzer.NewAnalyzer(analyzer.Options{Cache: c})
+	if _, err := a.Run(dir); err != nil {
+		t.Fatal(err)
+	}
+	graphDir := filepath.Join(dir, "graph.kuzu")
+
+	// First run: full enrich, writes manifest.
+	first, err := analyzer.Enrich(dir, c, analyzer.EnrichOptions{GraphDir: graphDir})
+	if err != nil {
+		t.Fatalf("first enrich: %v", err)
+	}
+	if first.ShortCircuited {
+		t.Fatal("first enrich short-circuited; want full")
+	}
+	if first.Mode != "full" {
+		t.Fatalf("first Mode = %q, want full", first.Mode)
+	}
+	if first.Nodes == 0 {
+		t.Fatal("first enrich produced 0 nodes")
+	}
+
+	// Second run: no cache changes → must short-circuit.
+	second, err := analyzer.Enrich(dir, c, analyzer.EnrichOptions{GraphDir: graphDir})
+	if err != nil {
+		t.Fatalf("second enrich: %v", err)
+	}
+	if !second.ShortCircuited {
+		t.Fatalf("second enrich did NOT short-circuit: %+v", second)
+	}
+	if second.Mode != "short-circuit" {
+		t.Fatalf("second Mode = %q, want short-circuit", second.Mode)
+	}
+}
+
+// TestEnrichForceBypassesShortCircuit verifies Force=true re-runs the
+// full pipeline even when manifests match.
+func TestEnrichForceBypassesShortCircuit(t *testing.T) {
+	dir := t.TempDir()
+	if err := os.WriteFile(filepath.Join(dir, "Y.java"), []byte("class Y {}"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	c, err := cache.Open(filepath.Join(dir, "cache.sqlite"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer c.Close()
+	a := analyzer.NewAnalyzer(analyzer.Options{Cache: c})
+	if _, err := a.Run(dir); err != nil {
+		t.Fatal(err)
+	}
+	graphDir := filepath.Join(dir, "graph.kuzu")
+	if _, err := analyzer.Enrich(dir, c, analyzer.EnrichOptions{GraphDir: graphDir}); err != nil {
+		t.Fatal(err)
+	}
+	forced, err := analyzer.Enrich(dir, c, analyzer.EnrichOptions{GraphDir: graphDir, Force: true})
+	if err != nil {
+		t.Fatalf("forced enrich: %v", err)
+	}
+	if forced.ShortCircuited {
+		t.Fatal("Force=true should bypass short-circuit")
+	}
+	if forced.Mode != "full" {
+		t.Fatalf("forced Mode = %q, want full", forced.Mode)
+	}
+}
+
+// TestEnrichRerunAfterFileChange verifies a re-run after a file change
+// produces the right graph (no PK collisions, manifest updated).
+func TestEnrichRerunAfterFileChange(t *testing.T) {
+	dir := t.TempDir()
+	if err := os.WriteFile(filepath.Join(dir, "A.java"), []byte("class A {}"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	c, err := cache.Open(filepath.Join(dir, "cache.sqlite"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer c.Close()
+	a := analyzer.NewAnalyzer(analyzer.Options{Cache: c})
+	if _, err := a.Run(dir); err != nil {
+		t.Fatal(err)
+	}
+	graphDir := filepath.Join(dir, "graph.kuzu")
+	first, err := analyzer.Enrich(dir, c, analyzer.EnrichOptions{GraphDir: graphDir})
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Add a second file, re-index, re-enrich.
+	if err := os.WriteFile(filepath.Join(dir, "B.java"), []byte("class B {}"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := a.Run(dir); err != nil {
+		t.Fatal(err)
+	}
+	second, err := analyzer.Enrich(dir, c, analyzer.EnrichOptions{GraphDir: graphDir})
+	if err != nil {
+		t.Fatalf("rerun after change: %v", err)
+	}
+	if second.ShortCircuited {
+		t.Fatal("rerun after file add must NOT short-circuit")
+	}
+	if second.Nodes < first.Nodes {
+		t.Fatalf("rerun produced fewer nodes (%d) than first (%d)", second.Nodes, first.Nodes)
+	}
+}
+
 // TestEnrichFixtureMinimalProducesGraph runs the full index → enrich pipeline
 // against the fixture-minimal corpus and asserts the resulting graph has at
 // least the entity / endpoint / service nodes the fixture is expected to
diff --git a/internal/graph/refresh.go b/internal/graph/refresh.go
index be58155c..ee9392b7 100644
--- a/internal/graph/refresh.go
+++ b/internal/graph/refresh.go
@@ -53,6 +53,22 @@ func (s *Store) InsertFile(path string, nodes []*model.CodeNode, edges []*model.
 	return nil
 }
 
+// Reset clears every CodeNode (and its incident edges via DETACH DELETE)
+// plus every GraphMeta row, while preserving the schema. Used before a
+// full re-enrich on a graph that already holds prior state, so the
+// subsequent BulkLoad doesn't collide with stale primary keys.
+//
+// Calling Reset on a fresh (already empty) graph is a no-op.
+func (s *Store) Reset() error {
+	if _, err := s.Cypher(`MATCH (n:CodeNode) DETACH DELETE n`); err != nil {
+		return fmt.Errorf("graph: reset CodeNode: %w", err)
+	}
+	if _, err := s.Cypher(`MATCH (m:GraphMeta) DELETE m`); err != nil {
+		return fmt.Errorf("graph: reset GraphMeta: %w", err)
+	}
+	return nil
+}
+
 // WipeLinkerEdges deletes every relationship whose source property is in
 // the given sources set, across every declared rel table. Used by
 // incremental enrich to clear previous linker emissions before re-running
diff --git a/internal/graph/refresh_test.go b/internal/graph/refresh_test.go
index 34058057..046a663d 100644
--- a/internal/graph/refresh_test.go
+++ b/internal/graph/refresh_test.go
@@ -198,6 +198,50 @@ func TestWipeLinkerEdgesAlsoDropsLinkerNodes(t *testing.T) {
 	}
 }
 
+func TestResetClearsAllData(t *testing.T) {
+	s := openSchemaStore(t)
+	defer s.Close()
+	if err := s.BulkLoadNodes([]*model.CodeNode{
+		{ID: "n1", Kind: model.NodeClass, Label: "A", FilePath: "A.java"},
+		{ID: "n2", Kind: model.NodeClass, Label: "B", FilePath: "B.java"},
+	}); err != nil {
+		t.Fatal(err)
+	}
+	if err := s.WriteManifest("h1"); err != nil {
+		t.Fatal(err)
+	}
+	if countCodeNodes(t, s) != 2 {
+		t.Fatal("seed didn't take")
+	}
+	if err := s.Reset(); err != nil {
+		t.Fatalf("Reset: %v", err)
+	}
+	if countCodeNodes(t, s) != 0 {
+		t.Fatal("Reset left CodeNodes")
+	}
+	got, _ := s.ReadManifest()
+	if got != "" {
+		t.Fatalf("Reset left manifest %q", got)
+	}
+	// Schema must still be usable: we can bulk-load again without re-ApplySchema.
+	if err := s.BulkLoadNodes([]*model.CodeNode{
+		{ID: "x", Kind: model.NodeClass, Label: "X", FilePath: "X.java"},
+	}); err != nil {
+		t.Fatalf("BulkLoadNodes after Reset: %v", err)
+	}
+	if countCodeNodes(t, s) != 1 {
+		t.Fatal("post-Reset BulkLoad didn't take")
+	}
+}
+
+func TestResetOnFreshGraphIsNoop(t *testing.T) {
+	s := openSchemaStore(t)
+	defer s.Close()
+	if err := s.Reset(); err != nil {
+		t.Fatalf("Reset on fresh graph: %v", err)
+	}
+}
+
 func TestReplaceFileSwapsContent(t *testing.T) {
 	s := openSchemaStore(t)
 	defer s.Close()

From 9f8f88988fbecdcb6cf1bdb9e712e336ba1db0a2 Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Fri, 15 May 2026 07:17:42 +0000
Subject: [PATCH 11/12] feat(cli): index/enrich --force, enrich --diff, codeiq
 diff subcommand

---
 internal/cli/diff.go   | 86 ++++++++++++++++++++++++++++++++++++++++++
 internal/cli/enrich.go | 56 +++++++++++++++++++++++----
 internal/cli/index.go  | 15 ++++++++
 3 files changed, 149 insertions(+), 8 deletions(-)
 create mode 100644 internal/cli/diff.go

diff --git a/internal/cli/diff.go b/internal/cli/diff.go
new file mode 100644
index 00000000..b7463242
--- /dev/null
+++ b/internal/cli/diff.go
@@ -0,0 +1,86 @@
+package cli
+
+import (
+	"encoding/json"
+	"fmt"
+	"path/filepath"
+
+	"github.com/randomcodespace/codeiq/internal/analyzer"
+	"github.com/randomcodespace/codeiq/internal/cache"
+	"github.com/randomcodespace/codeiq/internal/detector"
+	"github.com/spf13/cobra"
+)
+
+func init() {
+	registerSubcommand(func() *cobra.Command {
+		var cachePath string
+		cmd := &cobra.Command{
+			Use:   "diff [path]",
+			Short: "Show the cache vs disk delta without touching the graph.",
+			Long: `Walk the project at [path] and classify each file against the
+SQLite analysis cache:
+
+  - Added     -- on disk, not in cache
+  - Modified  -- path in cache but content hash differs from disk
+  - Deleted   -- in cache, missing from disk
+  - Unchanged -- path + content hash match cache exactly
+
+Useful for previewing what an incremental ` + "`codeiq index`" + ` /
+` + "`codeiq enrich`" + ` run would do. The cache is not modified.`,
+			Example: `  codeiq diff .
+  codeiq diff /path/to/repo
+  codeiq diff /path/to/repo --cache-path /tmp/scratch.sqlite`,
+			Args: cobra.MaximumNArgs(1),
+			RunE: func(cmd *cobra.Command, args []string) error {
+				root, err := resolvePath(args)
+				if err != nil {
+					return err
+				}
+				cp := cachePath
+				if cp == "" {
+					cp = filepath.Join(root, ".codeiq", "cache", "codeiq.sqlite")
+				}
+				c, err := cache.Open(cp)
+				if err != nil {
+					return fmt.Errorf("open cache %s: %w", cp, err)
+				}
+				defer c.Close()
+				a := analyzer.NewAnalyzer(analyzer.Options{
+					Cache:    c,
+					Registry: detector.Default,
+				})
+				d, err := a.Diff(root)
+				if err != nil {
+					return err
+				}
+				out := map[string]any{
+					"added":     stringsOrEmpty(d.Added),
+					"modified":  stringsOrEmpty(d.Modified),
+					"deleted":   stringsOrEmpty(d.Deleted),
+					"unchanged": stringsOrEmpty(d.Unchanged),
+					"counts": map[string]int{
+						"added":     len(d.Added),
+						"modified":  len(d.Modified),
+						"deleted":   len(d.Deleted),
+						"unchanged": len(d.Unchanged),
+					},
+				}
+				enc := json.NewEncoder(cmd.OutOrStdout())
+				enc.SetIndent("", "  ")
+				return enc.Encode(out)
+			},
+		}
+		cmd.Flags().StringVar(&cachePath, "cache-path", "",
+			"Path to the cache file (default: <path>/.codeiq/cache/codeiq.sqlite).")
+		return cmd
+	})
+}
+
+// stringsOrEmpty replaces a nil slice with an empty one so JSON output is
+// `[]` instead of `null` for empty buckets.
+func stringsOrEmpty(s []string) []string {
+	if s == nil {
+		return []string{}
+	}
+	return s
+}
diff --git a/internal/cli/enrich.go b/internal/cli/enrich.go
index 67dbffff..e592c866 100644
--- a/internal/cli/enrich.go
+++ b/internal/cli/enrich.go
@@ -1,6 +1,7 @@
 package cli
 
 import (
+	"encoding/json"
 	"fmt"
 	"os"
 	"path/filepath"
@@ -9,16 +10,19 @@ import (
 
 	"github.com/randomcodespace/codeiq/internal/analyzer"
 	"github.com/randomcodespace/codeiq/internal/cache"
+	"github.com/randomcodespace/codeiq/internal/detector"
 	"github.com/spf13/cobra"
 )
 
 func init() {
 	registerSubcommand(func() *cobra.Command {
 		var (
-			graphDir       string
-			memProfile     string
-			maxBufferPool  int64
-			copyThreads    int
+			graphDir      string
+			memProfile    string
+			maxBufferPool int64
+			copyThreads   int
+			force         bool
+			diffOnly      bool
 		)
 		cmd := &cobra.Command{
 			Use:   "enrich [path]",
@@ -56,7 +60,34 @@ become available and the stdio MCP server can serve clients.`,
 					return fmt.Errorf("open cache %s: %w", cachePath, err)
 				}
 				defer c.Close()
-				opts := analyzer.EnrichOptions{GraphDir: graphDir}
+
+				// --diff: print Diff against the cache as JSON and exit.
+				// Does not touch the graph. Useful for previewing what an
+				// incremental enrich would do.
+				if diffOnly {
+					a := analyzer.NewAnalyzer(analyzer.Options{Cache: c, Registry: detector.Default})
+					d, dErr := a.Diff(root)
+					if dErr != nil {
+						return dErr
+					}
+					out := map[string]any{
+						"added":     d.Added,
+						"modified":  d.Modified,
+						"deleted":   d.Deleted,
+						"unchanged": d.Unchanged,
+						"counts": map[string]int{
+							"added":     len(d.Added),
+							"modified":  len(d.Modified),
+							"deleted":   len(d.Deleted),
+							"unchanged": len(d.Unchanged),
+						},
+					}
+					enc := json.NewEncoder(cmd.OutOrStdout())
+					enc.SetIndent("", "  ")
+					return enc.Encode(out)
+				}
+
+				opts := analyzer.EnrichOptions{GraphDir: graphDir, Force: force}
 				if maxBufferPool > 0 {
 					opts.StoreBufferPoolBytes = uint64(maxBufferPool)
 				}
@@ -79,9 +110,14 @@ become available and the stdio MCP server can serve clients.`,
 					}
 					fmt.Fprintf(cmd.ErrOrStderr(), "heap profile written to %s\n", memProfile)
 				}
-				fmt.Fprintf(cmd.OutOrStdout(),
-					"enrich complete: %d nodes, %d edges, %d services\n",
-					summary.Nodes, summary.Edges, summary.Services)
+				if summary.ShortCircuited {
+					fmt.Fprintln(cmd.OutOrStdout(),
+						"enrich short-circuited: graph already matches cache manifest")
+				} else {
+					fmt.Fprintf(cmd.OutOrStdout(),
+						"enrich complete: %d nodes, %d edges, %d services\n",
+						summary.Nodes, summary.Edges, summary.Services)
+				}
 				return nil
 			},
 		}
@@ -93,6 +129,10 @@ become available and the stdio MCP server can serve clients.`,
 			"Cap Kuzu BufferPoolSize in bytes (default: 2 GiB; 0 means default).")
 		cmd.Flags().IntVar(&copyThreads, "copy-threads", 0,
 			"Cap Kuzu COPY FROM parallelism (default: min(4, GOMAXPROCS); 0 means default).")
+		cmd.Flags().BoolVar(&force, "force", false,
+			"Bypass the incremental short-circuit; rebuild the graph from scratch.")
+		cmd.Flags().BoolVar(&diffOnly, "diff", false,
+			"Print the cache vs disk delta as JSON and exit without touching the graph.")
 		return cmd
 	})
 }
diff --git a/internal/cli/index.go b/internal/cli/index.go
index 2505990b..197e499f 100644
--- a/internal/cli/index.go
+++ b/internal/cli/index.go
@@ -22,6 +22,7 @@ func init() {
 		var (
 			batchSize int
 			workers   int
+			force     bool
 		)
 		cmd := &cobra.Command{
 			Use:   "index [path]",
@@ -68,6 +69,7 @@ Java and Python.`,
 					Registry:  detector.Default,
 					BatchSize: batchSize,
 					Workers:   workers,
+					Force:     force,
 				})
 				stats, err := a.Run(abs)
 				if err != nil {
@@ -81,6 +83,17 @@ Java and Python.`,
 						"Deduped: %d nodes, %d edges  Dropped: %d phantom edges\n",
 						stats.DedupedNodes, stats.DedupedEdges, stats.DroppedEdges)
 				}
+				// Incremental counters are only meaningful when the cache was
+				// consulted (i.e. not --force). Print them when any of them is
+				// non-zero so unchanged re-runs see "Unchanged: N (100%)".
+				if !force && (stats.Added+stats.Modified+stats.Deleted+stats.Unchanged) > 0 {
+					line := fmt.Sprintf("Added: %d  Modified: %d  Deleted: %d  Unchanged: %d  Cache hits: %d",
+						stats.Added, stats.Modified, stats.Deleted, stats.Unchanged, stats.CacheHits)
+					if stats.Files > 0 {
+						line += fmt.Sprintf(" (%.1f%%)", 100.0*float64(stats.CacheHits)/float64(stats.Files))
+					}
+					fmt.Fprintln(cmd.OutOrStdout(), line)
+				}
 				return nil
 			},
 		}
@@ -88,6 +101,8 @@ Java and Python.`,
 			"Number of files processed per batch (default: 500).")
 		cmd.Flags().IntVarP(&workers, "workers", "w", 0,
 			"Worker goroutine count (default: 2 * GOMAXPROCS).")
+		cmd.Flags().BoolVar(&force, "force", false,
+			"Bypass the incremental cache; re-parse every file even when the content hash hasn't changed.")
 		return cmd
 	})
 }

From 8c39591ba1da9e044a6c416c86fa52b507f1ffcb Mon Sep 17 00:00:00 2001
From: Amit Kumar <ak.nitrr13@gmail.com>
Date: Fri, 15 May 2026 07:21:01 +0000
Subject: [PATCH 12/12] test(integration): incremental==full determinism +
 idempotence + delete-then-add

---
 .../analyzer/incremental_integration_test.go  | 265 ++++++++++++++++++
 1 file changed, 265 insertions(+)
 create mode 100644 internal/analyzer/incremental_integration_test.go

diff --git a/internal/analyzer/incremental_integration_test.go b/internal/analyzer/incremental_integration_test.go
new file mode 100644
index 00000000..0e045b26
--- /dev/null
+++ b/internal/analyzer/incremental_integration_test.go
@@ -0,0 +1,265 @@
+package analyzer_test
+
+import (
+	"os"
+	"path/filepath"
+	"sort"
+	"testing"
+
+	"github.com/randomcodespace/codeiq/internal/analyzer"
+	"github.com/randomcodespace/codeiq/internal/cache"
+	"github.com/randomcodespace/codeiq/internal/graph"
+)
+
+// graphSnapshot returns a stable, comparable representation of the graph:
+// every node by id+kind+label and every edge by id+kind+source+target,
+// sorted. Excludes anything timestamped or otherwise legitimately variable.
+func graphSnapshot(t *testing.T, graphDir string) (nodes []string, edges []string) {
+	t.Helper()
+	s, err := graph.OpenReadOnly(graphDir, 0)
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer s.Close()
+	nodeRows, err := s.Cypher(
+		`MATCH (n:CodeNode) RETURN n.id AS id, n.kind AS kind, n.label AS label ORDER BY n.id`)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, r := range nodeRows {
+		nodes = append(nodes,
+			asString(r["id"])+"|"+asString(r["kind"])+"|"+asString(r["label"]))
+	}
+	edgeRows, err := s.Cypher(
+		`MATCH (a:CodeNode)-[r]->(b:CodeNode)
+		 RETURN r.id AS id, a.id AS src, b.id AS tgt ORDER BY r.id, a.id, b.id`)
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, r := range edgeRows {
+		edges = append(edges, asString(r["id"])+"|"+asString(r["src"])+"|"+asString(r["tgt"]))
+	}
+	sort.Strings(nodes)
+	sort.Strings(edges)
+	return nodes, edges
+}
+
+func asString(v any) string {
+	if s, ok := v.(string); ok {
+		return s
+	}
+	return ""
+}
+
+// TestIncrementalEqualsFull is the core determinism gate: a sequence of
+// incremental runs (file add → modify → delete) must produce a graph
+// identical (modulo metadata) to a clean full rebuild on the final state.
+//
+// Scenario:
+//   Stage 1: write A.java + B.java + C.java, full index + enrich.
+//   Stage 2: modify B.java, delete C.java, add D.java, index + enrich.
+//   Stage 3: blow away .codeiq/, run index + enrich --force on the final tree.
+//   Assert: snapshots from Stage 2 and Stage 3 are identical.
+func TestIncrementalEqualsFull(t *testing.T) {
+	if testing.Short() {
+		t.Skip("incremental integration test; -short")
+	}
+	src := t.TempDir()
+	mustWrite := func(rel, content string) {
+		t.Helper()
+		full := filepath.Join(src, rel)
+		if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
+			t.Fatal(err)
+		}
+		if err := os.WriteFile(full, []byte(content), 0o644); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	cachePath := filepath.Join(src, ".codeiq", "cache.sqlite")
+	graphDir := filepath.Join(src, ".codeiq", "graph", "codeiq.kuzu")
+	if err := os.MkdirAll(filepath.Dir(cachePath), 0o755); err != nil {
+		t.Fatal(err)
+	}
+
+	indexAndEnrich := func() {
+		t.Helper()
+		c, err := cache.Open(cachePath)
+		if err != nil {
+			t.Fatal(err)
+		}
+		defer c.Close()
+		a := analyzer.NewAnalyzer(analyzer.Options{Cache: c, Workers: 1})
+		if _, err := a.Run(src); err != nil {
+			t.Fatal(err)
+		}
+		if _, err := analyzer.Enrich(src, c, analyzer.EnrichOptions{GraphDir: graphDir}); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	// Stage 1: initial corpus.
+	mustWrite("A.java", "public class A {}")
+	mustWrite("B.java", "public class B {}")
+	mustWrite("C.java", "public class C {}")
+	indexAndEnrich()
+
+	// Stage 2: modify B, delete C, add D — incremental on top of Stage 1.
+	if err := os.WriteFile(filepath.Join(src, "B.java"), []byte("public class B { int v = 2; }"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.Remove(filepath.Join(src, "C.java")); err != nil {
+		t.Fatal(err)
+	}
+	mustWrite("D.java", "public class D {}")
+	indexAndEnrich()
+	incNodes, incEdges := graphSnapshot(t, graphDir)
+
+	// Stage 3: blow away .codeiq/ entirely, run again from scratch on the
+	// same final filesystem. Use --force on enrich for belt-and-braces.
+	if err := os.RemoveAll(filepath.Join(src, ".codeiq")); err != nil {
+		t.Fatal(err)
+	}
+	if err := os.MkdirAll(filepath.Dir(cachePath), 0o755); err != nil {
+		t.Fatal(err)
+	}
+	c, err := cache.Open(cachePath)
+	if err != nil {
+		t.Fatal(err)
+	}
+	a := analyzer.NewAnalyzer(analyzer.Options{Cache: c, Workers: 1, Force: true})
+	if _, err := a.Run(src); err != nil {
+		t.Fatal(err)
+	}
+	if _, err := analyzer.Enrich(src, c, analyzer.EnrichOptions{GraphDir: graphDir, Force: true}); err != nil {
+		t.Fatal(err)
+	}
+	c.Close()
+	fullNodes, fullEdges := graphSnapshot(t, graphDir)
+
+	compareSnap := func(label string, got, want []string) {
+		t.Helper()
+		if len(got) != len(want) {
+			t.Errorf("%s: len mismatch incremental=%d full=%d", label, len(got), len(want))
+		}
+		// Identify the symmetric difference.
+		gotSet := make(map[string]struct{}, len(got))
+		for _, v := range got {
+			gotSet[v] = struct{}{}
+		}
+		wantSet := make(map[string]struct{}, len(want))
+		for _, v := range want {
+			wantSet[v] = struct{}{}
+		}
+		for v := range gotSet {
+			if _, ok := wantSet[v]; !ok {
+				t.Errorf("%s only in incremental: %q", label, v)
+			}
+		}
+		for v := range wantSet {
+			if _, ok := gotSet[v]; !ok {
+				t.Errorf("%s only in full: %q", label, v)
+			}
+		}
+	}
+	compareSnap("nodes", incNodes, fullNodes)
+	compareSnap("edges", incEdges, fullEdges)
+}
+
+// TestIncrementalRerunIsIdempotent — three successive `index → enrich`
+// runs against an unchanged tree produce identical graphs and the 2nd/3rd
+// enrichments short-circuit.
+func TestIncrementalRerunIsIdempotent(t *testing.T) {
+	src := t.TempDir()
+	if err := os.WriteFile(filepath.Join(src, "X.java"), []byte("class X {}"), 0o644); err != nil {
+		t.Fatal(err)
+	}
+	cachePath := filepath.Join(src, ".codeiq", "cache.sqlite")
+	graphDir := filepath.Join(src, ".codeiq", "graph", "codeiq.kuzu")
+	if err := os.MkdirAll(filepath.Dir(cachePath), 0o755); err != nil {
+		t.Fatal(err)
+	}
+
+	var summaries []analyzer.EnrichSummary
+	for i := 0; i < 3; i++ {
+		c, err := cache.Open(cachePath)
+		if err != nil {
+			t.Fatal(err)
+		}
+		a := analyzer.NewAnalyzer(analyzer.Options{Cache: c, Workers: 1})
+		if _, err := a.Run(src); err != nil {
+			t.Fatal(err)
+		}
+		summary, err := analyzer.Enrich(src, c, analyzer.EnrichOptions{GraphDir: graphDir})
+		if err != nil {
+			t.Fatalf("enrich #%d: %v", i, err)
+		}
+		c.Close()
+		summaries = append(summaries, summary)
+	}
+
+	if summaries[0].ShortCircuited {
+		t.Fatal("first enrich short-circuited; want full")
+	}
+	if !summaries[1].ShortCircuited {
+		t.Fatal("second enrich did NOT short-circuit")
+	}
+	if !summaries[2].ShortCircuited {
+		t.Fatal("third enrich did NOT short-circuit")
+	}
+}
+
+// TestIncrementalAcrossDeleteThenAdd — delete a file, re-index, add it
+// back with the same content, re-index. The cache should reflect the
+// transition correctly (path purged on delete, re-added with same hash).
+func TestIncrementalAcrossDeleteThenAdd(t *testing.T) {
+	src := t.TempDir()
+	cachePath := filepath.Join(src, ".codeiq", "cache.sqlite")
+	if err := os.MkdirAll(filepath.Dir(cachePath), 0o755); err != nil {
+		t.Fatal(err)
+	}
+
+	mustWrite := func(rel, content string) {
+		full := filepath.Join(src, rel)
+		if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
+			t.Fatal(err)
+		}
+		if err := os.WriteFile(full, []byte(content), 0o644); err != nil {
+			t.Fatal(err)
+		}
+	}
+
+	runIndex := func() *cache.Cache {
+		c, err := cache.Open(cachePath)
+		if err != nil {
+			t.Fatal(err)
+		}
+		a := analyzer.NewAnalyzer(analyzer.Options{Cache: c, Workers: 1})
+		if _, err := a.Run(src); err != nil {
+			t.Fatal(err)
+		}
+		return c
+	}
+
+	mustWrite("A.java", "class A {}")
+	c := runIndex()
+	c.Close()
+
+	// Delete the file. Next index should purge it from the cache.
+	if err := os.Remove(filepath.Join(src, "A.java")); err != nil {
+		t.Fatal(err)
+	}
+	c = runIndex()
+	if _, _, ok := c.GetFileByPath("A.java"); ok {
+		t.Fatal("deleted file still in cache after re-index")
+	}
+	c.Close()
+
+	// Recreate the same file. Next index should re-add it.
+	mustWrite("A.java", "class A {}")
+	c = runIndex()
+	if _, _, ok := c.GetFileByPath("A.java"); !ok {
+		t.Fatal("re-added file missing from cache")
+	}
+	c.Close()
+}