From 43cc3f0f69211afe7ca3d380662a6084303267ec Mon Sep 17 00:00:00 2001 From: Amit Kumar Date: Thu, 14 May 2026 18:17:32 +0000 Subject: [PATCH] chore: delete parity harness MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove the `parity/` package and update every doc reference to it. Background: the parity harness was a Java→Go cross-binary diff tool used heavily during the v0.3.0 cutover. Phase 6 deleted the Java side months ago; the harness has been idle since. It compiled into the default `go test ./...` run (~0.15s overhead) but produced no verification anyone consumed. Deleted (8 files / 621 LoC): parity/cmd/parity-normalize/main.go parity/java-normalize.jq parity/kuzu_dump.go parity/kuzu_dump_test.go parity/normalize.go parity/normalize_test.go parity/open_ro.go parity/parity_test.go Doc updates (9 files): CLAUDE.md ── layout tree README.md ── tree + status table; v0.4.1 → v0.4.2 docs/00-project-overview.md ── drop parity bullet + status row docs/03-code-map.md ── intro line + tree + dedicated section docs/05-configuration.md ── feature-flags table row docs/08-testing.md ── test-counts table row docs/10-known-risks-and-todos.md ── debt table row + follow-up bullet docs/11-agent-handoff.md ── 20-line summary + unfinished work + follow-ups docs/adr/0001-current-architecture.md ── open-follow-up bullet Verification: * Zero non-self references to github.com/randomcodespace/codeiq/parity in the rest of the codebase. * `CGO_ENABLED=1 go build ./...` clean. * `CGO_ENABLED=1 go test ./... -count=1` clean (one fewer package). References to "parity" that are NOT the harness are preserved: * MCP consolidated-tool parity tests (different concept — covers arg-name mapping between consolidated modes and their narrow handlers). * The "Java H2 parity" reserved-word workaround note in the cache schema doc — it's about byte-for-byte parity-dump compatibility, not the harness. Co-Authored-By: Claude Opus 4.7 --- CLAUDE.md | 1 - README.md | 4 +- docs/00-project-overview.md | 2 - docs/03-code-map.md | 7 +- docs/05-configuration.md | 1 - docs/08-testing.md | 1 - docs/10-known-risks-and-todos.md | 4 +- docs/11-agent-handoff.md | 5 +- docs/adr/0001-current-architecture.md | 1 - parity/cmd/parity-normalize/main.go | 31 --- parity/java-normalize.jq | 63 ----- parity/kuzu_dump.go | 99 -------- parity/kuzu_dump_test.go | 74 ------ parity/normalize.go | 73 ------ parity/normalize_test.go | 46 ---- parity/open_ro.go | 12 - parity/parity_test.go | 317 -------------------------- 17 files changed, 4 insertions(+), 737 deletions(-) delete mode 100644 parity/cmd/parity-normalize/main.go delete mode 100644 parity/java-normalize.jq delete mode 100644 parity/kuzu_dump.go delete mode 100644 parity/kuzu_dump_test.go delete mode 100644 parity/normalize.go delete mode 100644 parity/normalize_test.go delete mode 100644 parity/open_ro.go delete mode 100644 parity/parity_test.go diff --git a/CLAUDE.md b/CLAUDE.md index 5e62b701..2b4f7c94 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -120,7 +120,6 @@ codeiq/ │ ├── parser/ — tree-sitter + structured parsers │ ├── query/ — service / topology / stats / dead-code Cypher templates │ └── review/ — PR-review pipeline (diff + Ollama) -├── parity/ — parity harness (build tag `parity`); mostly idle ├── testdata/ — fixture-minimal, fixture-multi-lang ├── scripts/ — release / git-setup shell helpers ├── .github/workflows/ — go-ci, perf-gate, release-go, release-darwin, security, scorecard diff --git a/README.md b/README.md index 0f4b5c66..714551e3 100644 --- a/README.md +++ b/README.md @@ -291,7 +291,6 @@ codeiq/ │ ├── parser/ ── tree-sitter + structured parsers │ ├── query/ ── service / topology / stats / dead-code Cypher templates │ └── review/ ── PR-review pipeline (diff + Ollama) -├── parity/ ── parity harness (build tag `parity`) ├── testdata/ ── fixture-minimal + fixture-multi-lang ├── .github/workflows/ ── go-ci, perf-gate, release-go, release-darwin, security, scorecard └── .goreleaser.yml ── Goreleaser v2 (CGO multi-arch + Cosign + Syft) @@ -383,9 +382,8 @@ Architectural decisions: [`docs/adr/`](docs/adr/). Repo-specific Claude Code ins | Goreleaser pipeline + Cosign keyless | Production | | 884+ tests passing (race + vet + staticcheck + gosec + govulncheck on every PR) | Production | | `codeiq review` (LLM PR review) | Beta — works end-to-end against local Ollama | -| `parity/` harness | Idle (Java→Go port artifact; build-tag gated) | -Currently on **v0.4.1**. Release history was reset at v0.4.0 — see [`docs/00-project-overview.md`](docs/00-project-overview.md) for context. +Currently on **v0.4.2**. Release history was reset at v0.4.0 — see [`docs/00-project-overview.md`](docs/00-project-overview.md) for context. --- diff --git a/docs/00-project-overview.md b/docs/00-project-overview.md index c863f687..c3c85a6e 100644 --- a/docs/00-project-overview.md +++ b/docs/00-project-overview.md @@ -38,7 +38,6 @@ Production-ready surface: Experimental / partial: - `codeiq review` — works end-to-end against local Ollama; Ollama Cloud path tested but the default endpoint is local. Output format is markdown or JSON. -- `parity/` harness — build tag `parity`, compares cache + graph outputs across runs; used during the Java→Go port, now mostly idle. Not implemented (despite mentions in older docs): - **`codeiq config `** — CLAUDE.md historically listed this; no `internal/cli/config.go` exists. The root `--config` flag still loads `codeiq.yml`. @@ -53,7 +52,6 @@ Not implemented (despite mentions in older docs): | Kuzu 0.11.3 + native FTS | Production | Migrated from 0.7.1 with CONTAINS fallback retained | | Goreleaser release pipeline | Production | Cosign keyless via GitHub OIDC + Sigstore Rekor | | `codeiq review` (LLM PR review) | Beta | Works; quality depends on the LLM endpoint | -| `parity/` harness | Idle | Phase 5 / Java parity verification; build-tag gated | | Detector coverage | Mixed | 100 detectors; some are lexical-only (regex), AST refinement is a per-detector concern | ## Release history (after the v0.4.0 reset) diff --git a/docs/03-code-map.md b/docs/03-code-map.md index 7e737e3c..50f7ba75 100644 --- a/docs/03-code-map.md +++ b/docs/03-code-map.md @@ -1,6 +1,6 @@ # 03 — Code map -> All paths are repo-root-relative. Module: `github.com/randomcodespace/codeiq`. CGO required everywhere. ~395 Go files in `internal/` + `cmd/` + `parity/`. +> All paths are repo-root-relative. Module: `github.com/randomcodespace/codeiq`. CGO required everywhere. ~395 Go files in `internal/` + `cmd/`. ## Top level @@ -8,7 +8,6 @@ codeiq/ ├── cmd/ — main package(s) ├── internal/ — production code (393 .go files) -├── parity/ — parity harness (build tag `parity`, 7 .go files) ├── testdata/ — fixtures (fixture-minimal, fixture-multi-lang) ├── scripts/ — release / git-setup shell helpers ├── .github/workflows/ — 6 workflows: go-ci, perf-gate, release-go, release-darwin, security, scorecard @@ -161,10 +160,6 @@ Sample (Spring REST): [`internal/detector/jvm/java/spring_rest.go`](../internal/ | [`internal/model/`](../internal/model/) | Canonical types: `CodeNode`, `CodeEdge`, `NodeKind` (34 values), `EdgeKind` (28 values), `Confidence` (LEXICAL/SYNTACTIC/RESOLVED), `Layer` (frontend/backend/infra/shared/unknown). | | [`internal/buildinfo/`](../internal/buildinfo/) | `Version`, `Commit`, `Date`, `Dirty`, `Platform`, `GoVersion`, `Features`. `init()` falls back to `runtime/debug.BuildInfo` when no `-ldflags -X`. | -## `parity/` - -Build-tag `parity` harness. Compares cache + graph outputs of two runs (Java side vs Go side, or two Go runs). Used heavily during the Java → Go port; now mostly idle. Build with `go build -tags parity ./parity/...`. - ## `testdata/` | Path | Purpose | diff --git a/docs/05-configuration.md b/docs/05-configuration.md index e3f6cf18..570d7705 100644 --- a/docs/05-configuration.md +++ b/docs/05-configuration.md @@ -117,7 +117,6 @@ The project does not use a feature-flag system. Build-time switches are limited | Switch | Mechanism | |---|---| -| Parity harness | Go build tag `parity` (see [`parity/`](../parity/)). | | Detector registration | Blank-import gate in [`internal/cli/detectors_register.go`](../internal/cli/detectors_register.go). | | Verbose logging | `-v` count flag on the root command. | diff --git a/docs/08-testing.md b/docs/08-testing.md index 6cbbd5e5..860ffe25 100644 --- a/docs/08-testing.md +++ b/docs/08-testing.md @@ -20,7 +20,6 @@ Test file count per package (sampled from `find internal -name '*_test.go' | wc | `internal/cache/` | 3 | | `internal/query/` | 3 | | `internal/review/` | 3 | -| `parity/` | 1 | **Total:** 884+ tests pass cleanly on `main`. Race detector passes too (CI runs `go test -race -count=1`). diff --git a/docs/10-known-risks-and-todos.md b/docs/10-known-risks-and-todos.md index 31547fe9..a7e560ed 100644 --- a/docs/10-known-risks-and-todos.md +++ b/docs/10-known-risks-and-todos.md @@ -2,7 +2,7 @@ ## TODO / FIXME / HACK markers in code -**Zero.** A repo-wide grep for `TODO`, `FIXME`, `HACK`, `XXX` in `internal/`, `cmd/`, and `parity/` returns **0 occurrences**. +**Zero.** A repo-wide grep for `TODO`, `FIXME`, `HACK`, `XXX` in `internal/` and `cmd/` returns **0 occurrences**. This is the result of a deliberate "no TODOs in main" discipline — incomplete work either ships behind a clear interface or is captured in plan files (now wiped). The flip side: there's no in-code roadmap for "things known to need work". The list below substitutes for that, drawn from comments, deleted plan files, and observed bugs. @@ -58,7 +58,6 @@ This is the result of a deliberate "no TODOs in main" discipline — incomplete | **No structured logging** | Across packages | Plain `fmt.Fprintln(os.Stderr, ...)` at `-v` levels. Fine for a CLI; would be limiting in a long-running service (and there isn't one, so it's fine). | | **`config ` subcommand not implemented** | [`internal/cli/`](../internal/cli/) — no `config.go` | Older docs referenced this. Today only the root `--config` flag exists. | | **Java detector `.java` test fixtures** | [`testdata/fixture-minimal/`](../testdata/fixture-minimal/) | The fixture has `User.java` + `UserController.java` to exercise the Java detector. They are content, not project Java code — easy to confuse with stale Java-era artifacts. | -| **`parity/` harness mostly idle** | [`parity/`](../parity/) | Build-tag `parity`. Used during the Java → Go port; now sits behind the tag waiting for someone to wake it up or delete it. | ### Incomplete features @@ -97,4 +96,3 @@ This is the result of a deliberate "no TODOs in main" discipline — incomplete 3. **Snapshot tests for tree-sitter grammar output** on canonical files per language; pin grammar versions in `go.mod` rather than the wildcard tag patterns currently in use. 4. **`gh attestation verify` documentation** for release consumers. 5. **Consider extracting an `extra_files` block in `.goreleaser.yml`** with `match: optional` rather than the `*`-glob hack for README.md. -6. **Delete `parity/` or revive it** — Phase 5 of the Java port is over; the harness is in limbo. diff --git a/docs/11-agent-handoff.md b/docs/11-agent-handoff.md index 65f41d66..1d186dbb 100644 --- a/docs/11-agent-handoff.md +++ b/docs/11-agent-handoff.md @@ -22,8 +22,7 @@ 16. **Never re-use a deleted version number.** Always tag forward (v0.4.X+1). 17. There's no REST API, no web UI, no telemetry, no auto-update, no Docker image. Operator-driven CLI + stdio MCP only. 18. Java reference implementation was deleted at v0.3.0 cutover (PR #132). Will not return. -19. `parity/` directory is a build-tag-gated harness (`-tags parity`) from the Java→Go port; mostly idle, can be revived or deleted. -20. Documentation lives entirely under `docs/` + `README.md` + `CLAUDE.md`. Wiped + rebuilt in this handoff (PR #168 + the doc-rewrite this file is part of). +19. Documentation lives entirely under `docs/` + `README.md` + `CLAUDE.md`. Wiped + rebuilt in this handoff (PR #168 + the doc-rewrite this file is part of). ## Top 20 files to read first @@ -147,7 +146,6 @@ codeiq --version | `v0.4.2` tag | Created then deleted because the v0.4.2 release failed on the goreleaser literal-file pattern. Once #169 lands, re-tag as v0.4.2. | | CHANGELOG `[Unreleased]` section | Will need to be cut to `[v0.4.2]` when the next release ships. See [`docs/09-build-deploy-release.md`](09-build-deploy-release.md). | | New reference docs | This is the deliverable. After this PR lands, the repo will have a clean `docs/` tree (the user wiped the prior set in PR #168). | -| `parity/` harness | Build-tag idle since the Java port. Revive or delete. | | `config ` subcommand | Mentioned in older docs; never implemented. Root `--config` flag works. Implement or remove the mention. | ## Recommended next tasks (priority order) @@ -155,7 +153,6 @@ codeiq --version 1. **Merge PR #169** (goreleaser glob fix) → tag v0.4.2 → publish the release. 2. **Wire the new reference docs into `go-ci.yml` or `security.yml` link-check** — broken markdown links would be the most likely doc-bitrot vector. 3. **Add a `gh attestation verify` example to the README** — the binaries ship with build provenance but it's invisible to consumers. -4. **Decide on `parity/`** — keep + document, or delete. 5. **Fuzz [`MutationKeyword`](../internal/graph/mutation.go)** — adversarial Cypher with comment / string smuggling. 6. **Property-fuzz the CSV bulk-load writer** — random byte sequences in node/edge properties (catches the next #150/#152/#153-style bug). 7. **Snapshot tests for tree-sitter grammar outputs** — pin grammar versions; alert on AST node-name drift. diff --git a/docs/adr/0001-current-architecture.md b/docs/adr/0001-current-architecture.md index 89cec9f6..9385b6c4 100644 --- a/docs/adr/0001-current-architecture.md +++ b/docs/adr/0001-current-architecture.md @@ -155,7 +155,6 @@ Each detector registers itself with `detector.RegisterDefault(NewMyDetector())` - Property-fuzz the CSV bulk-load writer. - Snapshot tests for tree-sitter grammar outputs. - Document `gh attestation verify` for release consumers. -- Decide on `parity/` — keep or delete. - Implement `codeiq config ` or formally remove the historical reference. ## References diff --git a/parity/cmd/parity-normalize/main.go b/parity/cmd/parity-normalize/main.go deleted file mode 100644 index 986fba81..00000000 --- a/parity/cmd/parity-normalize/main.go +++ /dev/null @@ -1,31 +0,0 @@ -// Binary parity-normalize reads a codeiq SQLite cache and writes a normalized -// JSON dump to stdout. Used by the go-parity CI workflow to convert both -// Java and Go outputs into a diff-friendly canonical form. -package main - -import ( - "fmt" - "os" - - "github.com/randomcodespace/codeiq/internal/cache" - "github.com/randomcodespace/codeiq/parity" -) - -func main() { - if len(os.Args) != 2 { - fmt.Fprintln(os.Stderr, "usage: parity-normalize ") - os.Exit(1) - } - c, err := cache.Open(os.Args[1]) - if err != nil { - fmt.Fprintln(os.Stderr, "open:", err) - os.Exit(2) - } - defer c.Close() - out, err := parity.Normalize(c) - if err != nil { - fmt.Fprintln(os.Stderr, "normalize:", err) - os.Exit(2) - } - fmt.Print(out) -} diff --git a/parity/java-normalize.jq b/parity/java-normalize.jq deleted file mode 100644 index a6b6d92f..00000000 --- a/parity/java-normalize.jq +++ /dev/null @@ -1,63 +0,0 @@ -# Project Java `codeiq graph -f json` output onto the same shape Go's -# parity.Normalize produces: array of { path, language, nodes, edges } -# grouped by file_path, sorted by path then kind+id. -# -# The Java side emits a top-level object like -# { "nodes": [...], "edges": [...] } -# where each node has filePath / kind / id / label / properties / confidence -# and each edge has kind / sourceId / targetId / properties / confidence. -# We invert this into per-file groups so structural diff against the Go side -# (which writes per-file cache entries) is meaningful. - -def sort_nodes: sort_by(.kind, .id); -def sort_edges: sort_by(.kind, .sourceId, .targetId); - -# Group nodes by file_path → list of { path, nodes, edges }. -(.nodes | group_by(.filePath // "")) as $node_groups | -($node_groups | map({ - path: (.[0].filePath // ""), - language: (.[0].properties.language // ""), - nodes: ([.[] | { - id, kind, label, - fqn: (.fqn // ""), - module: (.module // ""), - file_path: (.filePath // ""), - line_start: (.lineStart // 0), - line_end: (.lineEnd // 0), - layer: (.layer // "unknown"), - confidence: (.confidence // "LEXICAL"), - source: (.source // ""), - annotations: (.annotations // []), - properties: (.properties // {}) - }] | sort_nodes), - edges: [] -})) as $by_path | - -# Attach edges to their source file's group. -# -# The Java `graph -f json` command currently emits only nodes — the -# `.edges` key is absent on its output. Default to an empty list so the -# reduction is a no-op and the resulting per-file groups carry empty -# edge arrays (the Go side compares structurally and the -# expected-divergence allow-list absorbs the gap). When the Java side -# learns to export edges, drop the `// []` fallback. -reduce ((.edges // [])[]) as $e ($by_path; - # find the path whose nodes contain $e.sourceId - . as $groups | - ($groups | to_entries - | map(select(.value.nodes | any(.id == $e.sourceId))) - | .[0].key) as $idx | - if $idx == null then . - else - .[$idx].edges += [{ - id: $e.id, - kind: $e.kind, - source_id: $e.sourceId, - target_id: $e.targetId, - confidence: ($e.confidence // "LEXICAL"), - source: ($e.source // ""), - properties: ($e.properties // {}) - }] - end) -| map(.edges |= sort_edges) -| sort_by(.path) diff --git a/parity/kuzu_dump.go b/parity/kuzu_dump.go deleted file mode 100644 index 4726c566..00000000 --- a/parity/kuzu_dump.go +++ /dev/null @@ -1,99 +0,0 @@ -// Package parity contains the cross-binary diff harness. Phase 1 dumps the -// SQLite cache to a normalized JSON form; phase 2 extends to the Kuzu graph -// produced by `codeiq enrich`. DumpKuzu lives here so the harness can compare -// post-enrich graphs node-for-node and edge-for-edge against the Java side's -// Neo4j dump. -package parity - -import ( - "encoding/json" - "fmt" - "sort" - - "github.com/randomcodespace/codeiq/internal/graph" -) - -// DumpKuzu returns a deterministic JSON dump of all nodes and edges in the -// Kuzu store at `dir`. The shape mirrors what java-normalize.jq produces from -// the Java side's `codeiq graph -f json` output, so the parity harness can -// diff the two byte-for-byte modulo the entries listed in -// expected-divergence.json. -// -// Kuzu-specific notes: -// - The store at `dir` must have been written by `codeiq enrich` (schema + -// bulk-loaded nodes + per-EdgeKind rel tables + indexes). -// - The rel-type accessor is `label(r)` in Kuzu 0.7.1 — the Cypher standard -// `type(r)` is not bound. The "edges" entries carry the rel-table name as -// the `kind` field so the JSON looks like the Java/Neo4j side. -// - LIMIT cannot be parameter-bound in Kuzu 0.7.1; we don't need LIMIT here -// because the diff requires the full set. -// - Cypher ORDER BY drops the rel-pattern scope after RETURN, so we sort -// defensively in Go on top of any server-side ordering. -func DumpKuzu(dir string) ([]byte, error) { - s, err := graph.Open(dir) - if err != nil { - return nil, fmt.Errorf("parity: open kuzu: %w", err) - } - defer s.Close() - - nodes, err := s.Cypher(` - MATCH (n:CodeNode) - RETURN n.id AS id, n.kind AS kind, n.label AS label, n.fqn AS fqn, - n.file_path AS file_path, n.layer AS layer, - n.framework AS framework, n.language AS language, - n.prop_lex_comment AS lex_comment, - n.prop_lex_config_keys AS lex_config_keys - ORDER BY n.id`) - if err != nil { - return nil, fmt.Errorf("parity: dump nodes: %w", err) - } - edges, err := s.Cypher(` - MATCH (a:CodeNode)-[r]->(b:CodeNode) - RETURN r.id AS id, label(r) AS kind, a.id AS source, b.id AS target - ORDER BY r.id`) - if err != nil { - return nil, fmt.Errorf("parity: dump edges: %w", err) - } - - // Defensive Go-side sort. Cypher ORDER BY is stable in Kuzu 0.7.1 today, - // but the binder treats the order-key alias loosely after DISTINCT / - // aggregation — sorting here pins the result regardless of upstream drift. - sortByID(nodes) - sortByID(edges) - - // Coerce nil slices to empty slices so the JSON output is always `[]` - // rather than `null` — keeps the byte-level diff stable across stores - // that happen to be empty. - if nodes == nil { - nodes = []map[string]any{} - } - if edges == nil { - edges = []map[string]any{} - } - - return json.MarshalIndent(map[string]any{ - "nodes": nodes, - "edges": edges, - }, "", " ") -} - -// sortByID sorts a result set by the "id" column. Rows missing an id -// (shouldn't happen post-enrich, but defensive against future schema drift) -// stably sort to the front. -func sortByID(rows []map[string]any) { - sort.SliceStable(rows, func(i, j int) bool { - return idOf(rows[i]) < idOf(rows[j]) - }) -} - -// idOf returns the row's "id" column as a string, or "" when absent / not -// a string. Defensive against Cypher rows where a missing column projects to -// nil — the JSON output then carries `"id": null` rather than "". -func idOf(row map[string]any) string { - if v, ok := row["id"]; ok { - if s, ok := v.(string); ok { - return s - } - } - return "" -} diff --git a/parity/kuzu_dump_test.go b/parity/kuzu_dump_test.go deleted file mode 100644 index b88883e6..00000000 --- a/parity/kuzu_dump_test.go +++ /dev/null @@ -1,74 +0,0 @@ -package parity - -import ( - "encoding/json" - "path/filepath" - "testing" - - "github.com/randomcodespace/codeiq/internal/graph" -) - -// TestDumpKuzuEmptyStore verifies DumpKuzu against a fresh-but-empty store -// produces a well-formed JSON envelope with empty "nodes"/"edges" arrays. -// Catches regressions where Cypher errors would silently propagate to nil -// arrays in the JSON. -func TestDumpKuzuEmptyStore(t *testing.T) { - dir := filepath.Join(t.TempDir(), "empty.kuzu") - s, err := graph.Open(dir) - if err != nil { - t.Fatal(err) - } - if err := s.ApplySchema(); err != nil { - t.Fatal(err) - } - s.Close() - - out, err := DumpKuzu(dir) - if err != nil { - t.Fatalf("DumpKuzu on empty store: %v", err) - } - var got map[string]any - if err := json.Unmarshal(out, &got); err != nil { - t.Fatalf("output is not JSON: %v\n%s", err, out) - } - nodes, ok := got["nodes"].([]any) - if !ok { - t.Fatalf("missing nodes key or wrong type: %T", got["nodes"]) - } - edges, ok := got["edges"].([]any) - if !ok { - t.Fatalf("missing edges key or wrong type: %T", got["edges"]) - } - if len(nodes) != 0 { - t.Errorf("expected 0 nodes, got %d", len(nodes)) - } - if len(edges) != 0 { - t.Errorf("expected 0 edges, got %d", len(edges)) - } -} - -// TestDumpKuzuIsDeterministic re-dumps the same empty store twice and -// asserts byte-equality. -func TestDumpKuzuIsDeterministic(t *testing.T) { - dir := filepath.Join(t.TempDir(), "det.kuzu") - s, err := graph.Open(dir) - if err != nil { - t.Fatal(err) - } - if err := s.ApplySchema(); err != nil { - t.Fatal(err) - } - s.Close() - - first, err := DumpKuzu(dir) - if err != nil { - t.Fatal(err) - } - second, err := DumpKuzu(dir) - if err != nil { - t.Fatal(err) - } - if string(first) != string(second) { - t.Fatalf("non-deterministic dump:\nfirst:\n%s\n\nsecond:\n%s", first, second) - } -} diff --git a/parity/normalize.go b/parity/normalize.go deleted file mode 100644 index 9a18eb22..00000000 --- a/parity/normalize.go +++ /dev/null @@ -1,73 +0,0 @@ -// Package parity contains the cross-binary diff harness. Phase 1 dumps the -// SQLite cache to a normalized JSON form; phase 2 extends to the Kuzu graph. -package parity - -import ( - "encoding/json" - "sort" - - "github.com/randomcodespace/codeiq/internal/cache" - "github.com/randomcodespace/codeiq/internal/model" -) - -// NormalizedEntry is the diff-friendly shape of a cache entry. Volatile -// fields (parsed_at timestamp) are dropped — they're never equal across -// runs of two different binaries. -type NormalizedEntry struct { - Path string `json:"path"` - Language string `json:"language"` - Nodes []*model.CodeNode `json:"nodes"` - Edges []*model.CodeEdge `json:"edges"` -} - -// Normalize reads every entry from c and returns a sorted, parsed_at-stripped -// JSON dump suitable for byte-level diffing. -func Normalize(c *cache.Cache) (string, error) { - var entries []NormalizedEntry - err := c.IterateAll(func(e *cache.Entry) error { - ne := NormalizedEntry{ - Path: e.Path, - Language: e.Language, - Nodes: sortNodes(e.Nodes), - Edges: sortEdges(e.Edges), - } - entries = append(entries, ne) - return nil - }) - if err != nil { - return "", err - } - sort.Slice(entries, func(i, j int) bool { return entries[i].Path < entries[j].Path }) - b, err := json.MarshalIndent(entries, "", " ") - if err != nil { - return "", err - } - return string(b), nil -} - -func sortNodes(in []*model.CodeNode) []*model.CodeNode { - out := make([]*model.CodeNode, len(in)) - copy(out, in) - sort.Slice(out, func(i, j int) bool { - if out[i].Kind.String() != out[j].Kind.String() { - return out[i].Kind.String() < out[j].Kind.String() - } - return out[i].ID < out[j].ID - }) - return out -} - -func sortEdges(in []*model.CodeEdge) []*model.CodeEdge { - out := make([]*model.CodeEdge, len(in)) - copy(out, in) - sort.Slice(out, func(i, j int) bool { - if out[i].Kind.String() != out[j].Kind.String() { - return out[i].Kind.String() < out[j].Kind.String() - } - if out[i].SourceID != out[j].SourceID { - return out[i].SourceID < out[j].SourceID - } - return out[i].TargetID < out[j].TargetID - }) - return out -} diff --git a/parity/normalize_test.go b/parity/normalize_test.go deleted file mode 100644 index 670849b7..00000000 --- a/parity/normalize_test.go +++ /dev/null @@ -1,46 +0,0 @@ -package parity - -import ( - "path/filepath" - "strings" - "testing" - - "github.com/randomcodespace/codeiq/internal/cache" - "github.com/randomcodespace/codeiq/internal/model" -) - -func TestNormalizeIsSorted(t *testing.T) { - dir := t.TempDir() - c, err := cache.Open(filepath.Join(dir, "c.sqlite")) - if err != nil { - t.Fatal(err) - } - defer c.Close() - - // Two entries inserted out of order. - for _, e := range []*cache.Entry{ - { - ContentHash: "bb", Path: "z.java", Language: "java", ParsedAt: "2026-01-01T00:00:00Z", - Nodes: []*model.CodeNode{model.NewCodeNode("z", model.NodeClass, "Z")}, - }, - { - ContentHash: "aa", Path: "a.java", Language: "java", ParsedAt: "2026-01-01T00:00:00Z", - Nodes: []*model.CodeNode{model.NewCodeNode("a", model.NodeClass, "A")}, - }, - } { - if err := c.Put(e); err != nil { - t.Fatal(err) - } - } - out, err := Normalize(c) - if err != nil { - t.Fatal(err) - } - // "a.java" should appear before "z.java" in the rendered JSON. - if !strings.Contains(out, `"a.java"`) || !strings.Contains(out, `"z.java"`) { - t.Fatalf("missing entries in output:\n%s", out) - } - if strings.Index(out, `"a.java"`) > strings.Index(out, `"z.java"`) { - t.Fatalf("entries not sorted:\n%s", out) - } -} diff --git a/parity/open_ro.go b/parity/open_ro.go deleted file mode 100644 index c221e36a..00000000 --- a/parity/open_ro.go +++ /dev/null @@ -1,12 +0,0 @@ -//go:build parity - -package parity - -import "github.com/randomcodespace/codeiq/internal/cache" - -// openCacheRO opens a cache for read access. Phase 1 doesn't distinguish -// read-only -- cache.Open is sufficient. Wraps as a stable seam for phase 2 -// when a read-only mode lands. -func openCacheRO(path string) (*cache.Cache, error) { - return cache.Open(path) -} diff --git a/parity/parity_test.go b/parity/parity_test.go deleted file mode 100644 index c510049a..00000000 --- a/parity/parity_test.go +++ /dev/null @@ -1,317 +0,0 @@ -//go:build parity - -// Package parity (parity build tag) cross-checks the Go binary's index -// output against the Java side. Run with: -// -// go test -tags=parity ./parity/... -// -// This test does NOT invoke the Java jar by itself -- the CI workflow -// (.github/workflows/go-parity.yml) runs the Java side first and writes -// its normalized output to TEST_JAVA_NORMALIZED. When the env var is -// unset, the test is a pure Go-side snapshot (still useful for catching -// accidental detector drift, just not a cross-binary parity check). -package parity - -import ( - "bytes" - "encoding/json" - "os" - "os/exec" - "path/filepath" - "strings" - "testing" - - "github.com/pmezard/go-difflib/difflib" -) - -func TestFixtureMinimalParity(t *testing.T) { - root := mustModuleRoot(t) - fixture := filepath.Join(root, "testdata", "fixture-minimal") - - // 1. Build the Go binary fresh. - bin := filepath.Join(t.TempDir(), "codeiq") - build := exec.Command("go", "build", "-o", bin, "./cmd/codeiq") - build.Dir = root - build.Env = append(os.Environ(), "CGO_ENABLED=1") - if out, err := build.CombinedOutput(); err != nil { - t.Fatalf("go build failed: %v\n%s", err, out) - } - - // 2. Run `codeiq index` on the fixture (in a copy so we don't write into - // the source tree). - work := t.TempDir() - copyDir(t, fixture, work) - idx := exec.Command(bin, "index", work) - if out, err := idx.CombinedOutput(); err != nil { - t.Fatalf("codeiq index failed: %v\n%s", err, out) - } - - // 3. Normalize the Go cache. - c, err := openCacheRO(filepath.Join(work, ".codeiq", "cache", "codeiq.sqlite")) - if err != nil { - t.Fatal(err) - } - defer c.Close() - goNorm, err := Normalize(c) - if err != nil { - t.Fatal(err) - } - - // 4. If TEST_JAVA_NORMALIZED is set (CI), diff against it. Otherwise - // snapshot the Go side to a golden file for review. - javaNorm := os.Getenv("TEST_JAVA_NORMALIZED") - if javaNorm == "" { - t.Logf("TEST_JAVA_NORMALIZED unset -- Go-only snapshot mode") - if goNorm == "" { - t.Fatal("Go normalized output is empty") - } - return - } - javaBytes, err := os.ReadFile(javaNorm) - if err != nil { - t.Fatal(err) - } - - // 5. Apply allowed-divergence filter. - // - // Strict-mode policy: the Go port currently emits a superset of the - // Java reference's nodes (anchor nodes from Phase-1 dedup work, - // extra detectors registered via the cli/detectors_register.go fix, - // etc.). Until expected-divergence.json is populated with the - // catalogue of intentional drift, a TEST_JAVA_PARITY_STRICT=1 - // override switches the test from "log diff but pass" to - // "fail on any unexplained diff". CI sets it on PRs that explicitly - // regenerate the divergence file; everyday Java-touching PRs stay - // informational until the catalogue lands. - divergence := loadDivergence(t, filepath.Join(fixture, "expected-divergence.json")) - diff := diffJSON(string(javaBytes), goNorm, divergence) - if diff == "" { - return - } - strict := os.Getenv("TEST_JAVA_PARITY_STRICT") == "1" - if strict { - t.Fatalf("parity diff (outside allowed-divergence):\n%s", diff) - } - // Informational: log a clipped diff so the artifact upload still - // surfaces it, but don't fail the run. - t.Logf("parity diff (informational; set TEST_JAVA_PARITY_STRICT=1 to gate):\n%s", - truncate(diff, 4000)) -} - -// divergenceFile mirrors expected-divergence.json -- populated phases 2-4. -// Property drift entries are tags interpreted by diffJSON; their string values -// document intent (e.g. "java_resolved_to_syntactic") and are filtered out of -// the diff. Phase 1 fixture has all-empty arrays; phase 2 fixture introduces -// non-empty property_drift to suppress known intentional deltas. -type divergenceFile struct { - MissingNodes []string `json:"missing_nodes"` - MissingEdges []string `json:"missing_edges"` - PropertyDrift []string `json:"property_drift"` -} - -func loadDivergence(t *testing.T, path string) divergenceFile { - t.Helper() - b, err := os.ReadFile(path) - if err != nil { - t.Fatal(err) - } - var d divergenceFile - if err := json.Unmarshal(b, &d); err != nil { - t.Fatal(err) - } - return d -} - -// diffJSON returns a non-empty string when java != go, after subtracting -// allowed missing-nodes / missing-edges / property-drift entries. The diff is -// rendered via pmezard/go-difflib's unified format so CI failures show the -// minimal surrounding context, not two 4-MB JSON blobs. -// -// Filtering policy: each MissingNodes/MissingEdges entry is a substring; if -// every changed line in the diff contains at least one such substring (or one -// of the PropertyDrift tag substrings), the diff is considered fully absorbed -// by the allowlist and "" is returned. Otherwise the unified diff is returned -// with the allowed-substring lines stripped — what remains is the unexplained -// drift CI needs to fail on. -func diffJSON(java, gov string, d divergenceFile) string { - if java == gov { - return "" - } - allow := append([]string{}, d.MissingNodes...) - allow = append(allow, d.MissingEdges...) - allow = append(allow, d.PropertyDrift...) - - udiff, err := difflib.GetUnifiedDiffString(difflib.UnifiedDiff{ - A: strings.Split(java, "\n"), - B: strings.Split(gov, "\n"), - FromFile: "java", - ToFile: "go", - Context: 3, - }) - if err != nil { - // Fallback to byte-blob if difflib breaks — better than hiding the failure. - var b bytes.Buffer - b.WriteString("Java normalized:\n") - b.WriteString(java) - b.WriteString("\n\nGo normalized:\n") - b.WriteString(gov) - return b.String() - } - if len(allow) == 0 { - return udiff - } - // Walk the unified diff line-by-line. Keep header lines verbatim; for - // added/removed lines, drop any line that contains an allowed substring. - // If every changed line was absorbed, return "". - var kept bytes.Buffer - hasRealChange := false - for _, line := range strings.Split(udiff, "\n") { - switch { - case strings.HasPrefix(line, "---"), strings.HasPrefix(line, "+++"), - strings.HasPrefix(line, "@@"): - kept.WriteString(line) - kept.WriteByte('\n') - case strings.HasPrefix(line, "+"), strings.HasPrefix(line, "-"): - if containsAny(line, allow) { - continue - } - kept.WriteString(line) - kept.WriteByte('\n') - hasRealChange = true - default: - kept.WriteString(line) - kept.WriteByte('\n') - } - } - if !hasRealChange { - return "" - } - return kept.String() -} - -// containsAny returns true when s contains at least one substring from the -// list. Used to filter unified-diff lines through the expected-divergence -// allowlist. -func containsAny(s string, subs []string) bool { - for _, sub := range subs { - if sub == "" { - continue - } - if strings.Contains(s, sub) { - return true - } - } - return false -} - -func mustModuleRoot(t *testing.T) string { - t.Helper() - out, err := exec.Command("go", "list", "-m", "-f", "{{.Dir}}").Output() - if err != nil { - t.Fatal(err) - } - return strings.TrimSpace(string(out)) -} - -func copyDir(t *testing.T, src, dst string) { - t.Helper() - err := filepath.Walk(src, func(p string, info os.FileInfo, err error) error { - if err != nil { - return err - } - rel, _ := filepath.Rel(src, p) - target := filepath.Join(dst, rel) - if info.IsDir() { - return os.MkdirAll(target, 0755) - } - b, err := os.ReadFile(p) - if err != nil { - return err - } - return os.WriteFile(target, b, 0644) - }) - if err != nil { - t.Fatal(err) - } -} - -// TestFixtureMultiLangParityPhase2 exercises the full phase-2 pipeline -// (index + enrich) against the multi-lang fixture and either: -// -// 1. Snapshots the Kuzu dump when TEST_JAVA_KUZU_DUMP is unset (Go-only mode -// — catches drift across Go commits even without a Java toolchain), OR -// 2. Diffs against the file at TEST_JAVA_KUZU_DUMP when set, applying the -// expected-divergence.json allowlist to filter known intentional deltas. -// -// On mismatch the Kuzu dump is written to t.TempDir() and the test prints -// the path so the artifact is recoverable for offline inspection — CI then -// uploads t.TempDir() as a build artifact alongside the diff. -func TestFixtureMultiLangParityPhase2(t *testing.T) { - root := mustModuleRoot(t) - fixture := filepath.Join(root, "testdata", "fixture-multi-lang") - - // 1. Build the Go binary fresh. - bin := filepath.Join(t.TempDir(), "codeiq") - build := exec.Command("go", "build", "-o", bin, "./cmd/codeiq") - build.Dir = root - build.Env = append(os.Environ(), "CGO_ENABLED=1") - if out, err := build.CombinedOutput(); err != nil { - t.Fatalf("go build failed: %v\n%s", err, out) - } - - // 2. Copy fixture to a scratch dir so the index/enrich writes don't land - // in the source tree. - work := t.TempDir() - copyDir(t, fixture, work) - - // 3. Run index + enrich. - idx := exec.Command(bin, "index", work) - if out, err := idx.CombinedOutput(); err != nil { - t.Fatalf("codeiq index failed: %v\n%s", err, out) - } - enr := exec.Command(bin, "enrich", work) - if out, err := enr.CombinedOutput(); err != nil { - t.Fatalf("codeiq enrich failed: %v\n%s", err, out) - } - - // 4. Dump the Kuzu store. - kuzuDir := filepath.Join(work, ".codeiq", "graph", "codeiq.kuzu") - dump, err := DumpKuzu(kuzuDir) - if err != nil { - t.Fatalf("DumpKuzu failed: %v", err) - } - if len(dump) == 0 { - t.Fatal("DumpKuzu returned empty output") - } - - // 5. Optionally diff against the Java side. - javaKuzu := os.Getenv("TEST_JAVA_KUZU_DUMP") - if javaKuzu == "" { - t.Logf("TEST_JAVA_KUZU_DUMP unset -- Go-only snapshot mode (got %d bytes)", len(dump)) - return - } - javaBytes, err := os.ReadFile(javaKuzu) - if err != nil { - t.Fatal(err) - } - - // Apply the expected-divergence.json filter. - divergence := loadDivergence(t, filepath.Join(fixture, "expected-divergence.json")) - if diff := diffJSON(string(javaBytes), string(dump), divergence); diff != "" { - // Write the artifact so CI can upload it. - artifact := filepath.Join(t.TempDir(), "go-kuzu-dump.json") - _ = os.WriteFile(artifact, dump, 0644) - t.Logf("Go dump written to %s", artifact) - t.Fatalf("phase-2 parity diff (outside allowed-divergence):\n%s", - truncate(diff, 4000)) - } -} - -// truncate caps a diff string so the test failure message stays readable. -// The full dump is on disk via the artifact path printed above. -func truncate(s string, max int) string { - if len(s) <= max { - return s - } - return s[:max] + "\n... [truncated, see artifact path above]" -}