diff --git a/.github/workflows/ci-java.yml b/.github/workflows/ci-java.yml index 0d3daa45..f8ce46a0 100644 --- a/.github/workflows/ci-java.yml +++ b/.github/workflows/ci-java.yml @@ -1,15 +1,35 @@ name: Java CI + +# Lean Java CI — fast compile + unit-test gate on the Java reference side. +# Pairs with go-parity.yml: this workflow proves the Java jar still builds +# on every PR; go-parity.yml then uses the same build to diff against the +# Go port. +# +# Heavier checks (jacoco coverage, SpotBugs, OWASP dependency-check) live +# under workflow_dispatch via release-java.yml — they're not in the per-PR +# loop because they slow the Go port's PRs without adding signal. +# +# Disappears in Phase 6 cutover along with the rest of the Java tree. + on: push: branches: [main] - paths: ['src/**', 'pom.xml'] + paths: + - 'src/**' + - 'pom.xml' + - '.github/workflows/ci-java.yml' pull_request: branches: [main] + paths: + - 'src/**' + - 'pom.xml' + - '.github/workflows/ci-java.yml' permissions: read-all jobs: build: + name: build runs-on: ubuntu-latest permissions: contents: read @@ -22,14 +42,14 @@ jobs: distribution: 'temurin' java-version: '25' cache: 'maven' - - name: Build + verify (jacoco 85% + SpotBugs) - run: mvn -B -ntp clean verify + - name: Compile + unit tests (skip frontend) + # -Dfrontend.skip=true so the npm step doesn't run — CI image + # doesn't carry node 20 by default and the frontend is owned by + # a separate workflow. -B (batch) + -ntp (no transfer progress) + # for quiet logs. + run: mvn -B -ntp -Dfrontend.skip=true verify - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v4.6.2 if: always() with: - name: test-results + name: java-test-results path: target/surefire-reports/ - - uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v4.6.2 - with: - name: coverage-report - path: target/site/jacoco/ diff --git a/.github/workflows/go-ci.yml b/.github/workflows/go-ci.yml new file mode 100644 index 00000000..54ac79bb --- /dev/null +++ b/.github/workflows/go-ci.yml @@ -0,0 +1,66 @@ +name: go-ci + +on: + push: + branches: [main] + paths: ['go/**', '.github/workflows/go-ci.yml'] + pull_request: + branches: [main] + paths: ['go/**', '.github/workflows/go-ci.yml'] + +permissions: + contents: read + +jobs: + go: + name: vet / test / staticcheck / gosec / govulncheck + runs-on: ubuntu-latest + env: + CGO_ENABLED: "1" + defaults: + run: + working-directory: go + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-go@v5 + with: + # Pin to 1.25.x — 1.26+ isn't on enough developer machines yet. + # 1.25.10 includes the fix for GO-2026-4918 (HTTP/2 SETTINGS + # infinite loop) which is reachable via review.Client.Review. + go-version: '1.25.10' + cache: true + cache-dependency-path: go/go.sum + - name: Install C toolchain + run: sudo apt-get update -y && sudo apt-get install -y build-essential + - name: go vet + run: go vet ./... + - name: go test (race) + run: go test ./... -race -count=1 + - name: staticcheck + run: | + # staticcheck must understand the Go toolchain version that built + # the binaries above. 2024.1.1 errors with "internal error in + # importing internal/byteorder (unsupported version: 2)" against + # Go 1.25's stdlib. 2025.1.1 is the first release that handles it. + go install honnef.co/go/tools/cmd/staticcheck@2025.1.1 + "$(go env GOPATH)/bin/staticcheck" ./... + - name: gosec + run: | + # v2.21.4 won't compile under Go 1.25 — its pinned + # golang.org/x/tools v0.25.0 hits an int64 constant-overflow + # bug in tokeninternal.go. v2.22.0 ships an x/tools bump that + # builds clean on 1.25.x. + go install github.com/securego/gosec/v2/cmd/gosec@v2.22.0 + # Suppressed rule rationale (all reviewed manually): + # G104 — idiomatic deferred Close()/Rollback() error drops + # G115 — uint64→int64 on counter rows from Kuzu, bounded + # G202 — analysis-cache LIMIT/OFFSET; ints, not user input + # G204 — git ls-files / mvn shellouts, no user input + # G301/G306 — codeiq cache files are dev-local, 0o755/0o644 ok + # G304 — fixture and cache files under controlled dirs + # G401/G404/G501 — non-crypto hashing (MD5 for ID dedup, etc.) + "$(go env GOPATH)/bin/gosec" -quiet -exclude=G104,G115,G202,G204,G301,G304,G306,G401,G404,G501 ./... + - name: govulncheck + run: | + go install golang.org/x/vuln/cmd/govulncheck@latest + "$(go env GOPATH)/bin/govulncheck" ./... diff --git a/.github/workflows/go-parity.yml b/.github/workflows/go-parity.yml new file mode 100644 index 00000000..54f87666 --- /dev/null +++ b/.github/workflows/go-parity.yml @@ -0,0 +1,101 @@ +name: go-parity + +# Java vs Go parity test for fixture-minimal. Validates that the Go port +# produces the same canonical graph shape as the Java reference until +# Phase 6 cutover deletes the Java tree. Runs on PRs that touch the Go +# tree, the Java tree, the parity harness, or this workflow. +# +# The Java side ships a JSON graph via `codeiq graph -f json` from the +# `serving` profile (Neo4j-backed). A small jq filter +# (go/parity/java-normalize.jq) rewrites that into the same per-file +# canonical shape that the Go-side parity.Normalize emits. The two +# normalized JSON blobs are then diff'd by the `parity` build tag in +# go/parity/parity_test.go, with expected-divergence.json holding the +# allow-list of intentional drift. + +on: + pull_request: + branches: [main] + paths: + - 'go/**' + - 'src/**' + - 'pom.xml' + - '.github/workflows/go-parity.yml' + workflow_dispatch: + +permissions: + contents: read + +jobs: + parity: + name: Java vs Go parity (fixture-minimal) + runs-on: ubuntu-latest + env: + CGO_ENABLED: "1" + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-java@v4 + with: + distribution: temurin + java-version: '25' + cache: maven + - uses: actions/setup-go@v5 + with: + # Pin to 1.25.x — 1.26+ isn't on enough developer machines yet. + go-version: '1.25.10' + cache: true + cache-dependency-path: go/go.sum + - name: Install C toolchain + run: sudo apt-get update -y && sudo apt-get install -y build-essential jq + + # ---- Java side ---------------------------------------------------- + - name: Build Java jar (skip frontend) + run: mvn -B -q -DskipTests -Dfrontend.skip=true package + - name: Stage Java fixture (separate copy so caches don't collide) + run: cp -r go/testdata/fixture-minimal /tmp/fm-java + - name: Java index → H2 cache + run: java -jar target/code-iq-*-cli.jar index /tmp/fm-java + - name: Java enrich → Neo4j (serving profile) + # `graph -f json` reads from Neo4j under the serving profile, not + # H2. Need to enrich first or the export prints "No graph data + # found. Run 'codeiq analyze' first." + run: | + java -Dspring.profiles.active=serving \ + -jar target/code-iq-*-cli.jar enrich /tmp/fm-java + - name: Java graph → normalized JSON + # Run from inside the fixture so Neo4j path resolution finds the + # store enrich wrote. java-normalize.jq pivots the Java + # {nodes:[...]} shape into the per-file array shape + # parity.Normalize uses on the Go side. + # + # The Java CLI prints Logback JSON log lines to stdout BEFORE the + # graph JSON, so we capture everything then awk to the first line + # that is exactly "{" — that's the pretty-printed graph object. + run: | + cd /tmp/fm-java + java -Dspring.profiles.active=serving \ + -jar "$GITHUB_WORKSPACE"/target/code-iq-*-cli.jar graph . -f json \ + > /tmp/java-raw-with-logs.json + awk '/^\{$/ {f=1} f' /tmp/java-raw-with-logs.json > /tmp/java-raw.json + jq -f "$GITHUB_WORKSPACE"/go/parity/java-normalize.jq /tmp/java-raw.json \ + > /tmp/java-normalized.json + + # ---- Go side ------------------------------------------------------ + - name: Build Go binary + working-directory: go + run: go build -o codeiq ./cmd/codeiq + - name: Go parity test (diff vs normalized Java output) + working-directory: go + env: + TEST_JAVA_NORMALIZED: /tmp/java-normalized.json + run: go test -tags=parity ./parity/... -v + + # ---- Failure artifact -------------------------------------------- + - name: Upload normalized JSON on failure + if: failure() + uses: actions/upload-artifact@v4 + with: + name: parity-diff + path: | + /tmp/java-normalized.json + /tmp/java-raw.json diff --git a/.gitignore b/.gitignore index f45b62ea..752155b1 100644 --- a/.gitignore +++ b/.gitignore @@ -16,6 +16,9 @@ target/ *.swo *~ +# Claude Code local state (progress trackers, settings, ralph-loop state) +.claude/ + # OS .DS_Store Thumbs.db @@ -94,6 +97,7 @@ dist/ build/ *.whl pyproject.toml +!go/testdata/**/pyproject.toml uv.lock .venv/ venv/ @@ -116,3 +120,7 @@ graph.db/ # Phase A baseline .seeds/ docs/superpowers/baselines/**/raw/** + +# Agent-generated plans / scratch (not project deliverables) +go-port-phase4-plan.md +phase*-plan.md diff --git a/CHANGELOG.md b/CHANGELOG.md index fae0aa12..76dccaa9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,24 @@ for that specific tag for the per-commit details. ### Added +- **Go port (Phases 1-4 of the rewrite)** — codeiq is being ported from + Java/Spring Boot to a single static Go binary on the `port/go-port` + branch. PR #130. 100 detectors at 1:1 parity with the Java side; 34 MCP + tools (deprecated) + 6 consolidated mode-driven tools (new); `codeiq + review` CLI + `review_changes` MCP tool for LLM-driven PR review via + Ollama (Cloud or local). Java tree untouched until Phase 6 cutover. +- **Graph dedup + determinism** (Go side) — `GraphBuilder` deduplicates + nodes by ID with confidence-aware merging, edges by canonical + `(source, target, kind)` tuple. Linker output sorted at the boundary. + `codeiq index` surfaces "Deduped: N nodes, M edges Dropped: K phantom + edges" so graph hygiene is visible. +- **`codeiq review`** — LLM-driven review of `git diff base..head` against + the indexed graph. Defaults to local Ollama (`gpt-oss:20b`); set + `OLLAMA_API_KEY` to flip to Ollama Cloud. `--format=markdown|json`, + `--out`, `--focus`. Graph evidence (nodes-in-file + 1-hop blast radius) + attached per changed file when the Kuzu store is enriched. +- **`review_changes` MCP tool** — same review flow exposed over MCP for + agent-driven invocation. Strictly read-only against the graph. - OpenSSF supply-chain wiring — Best Practices project [12650](https://www.bestpractices.dev/projects/12650), live Scorecard at [securityscorecards.dev](https://api.securityscorecards.dev/projects/github.com/RandomCodeSpace/codeiq), diff --git a/README.md b/README.md index 505f43aa..0528bd1d 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,27 @@ --- +## Development — Go Port (Phase 1) + +An in-progress Go port lives in [`go/`](./go/). Phase 1 ships `codeiq index` +over 5 detectors with byte-level parity against the Java side on +`go/testdata/fixture-minimal`. Phases 2-6 land enrich, MCP, the remaining 94 +detectors, release infra, and Java cutover (see +[`docs/superpowers/specs/2026-05-11-codeiq-go-port-design.md`](docs/superpowers/specs/2026-05-11-codeiq-go-port-design.md)). + +Build and run: + +```bash +cd go +CGO_ENABLED=1 go build -o codeiq ./cmd/codeiq +./codeiq index . +./codeiq --version +``` + +The Go binary writes to the same `.codeiq/cache/` location the Java side +uses, but `CACHE_VERSION` is bumped to 6 so the first run triggers a clean +rebuild. Phase 1 is parity-only — use the Java side for production runs. + ## Quick Start ```bash diff --git a/go/.gitignore b/go/.gitignore new file mode 100644 index 00000000..002bb693 --- /dev/null +++ b/go/.gitignore @@ -0,0 +1,6 @@ +/codeiq +/codeiq.exe +/coverage.out +/coverage.html +/dist/ +/.cache/ diff --git a/go/cmd/codeiq/main.go b/go/cmd/codeiq/main.go new file mode 100644 index 00000000..8d7f0362 --- /dev/null +++ b/go/cmd/codeiq/main.go @@ -0,0 +1,13 @@ +// Binary codeiq is the codeiq CLI entry point. All logic lives in +// internal/cli; this file is just the os.Exit shim. +package main + +import ( + "os" + + "github.com/randomcodespace/codeiq/go/internal/cli" +) + +func main() { + os.Exit(cli.Execute()) +} diff --git a/go/cmd/extcheck/main.go b/go/cmd/extcheck/main.go new file mode 100644 index 00000000..b9c4b985 --- /dev/null +++ b/go/cmd/extcheck/main.go @@ -0,0 +1,33 @@ +package main + +import ( + "fmt" + + "github.com/randomcodespace/codeiq/go/internal/detector" + // Same blank imports as the CLI uses + _ "github.com/randomcodespace/codeiq/go/internal/detector/auth" + _ "github.com/randomcodespace/codeiq/go/internal/detector/csharp" + _ "github.com/randomcodespace/codeiq/go/internal/detector/frontend" + _ "github.com/randomcodespace/codeiq/go/internal/detector/generic" + _ "github.com/randomcodespace/codeiq/go/internal/detector/golang" + _ "github.com/randomcodespace/codeiq/go/internal/detector/iac" + _ "github.com/randomcodespace/codeiq/go/internal/detector/jvm/java" + _ "github.com/randomcodespace/codeiq/go/internal/detector/jvm/kotlin" + _ "github.com/randomcodespace/codeiq/go/internal/detector/jvm/scala" + _ "github.com/randomcodespace/codeiq/go/internal/detector/markup" + _ "github.com/randomcodespace/codeiq/go/internal/detector/proto" + _ "github.com/randomcodespace/codeiq/go/internal/detector/python" + _ "github.com/randomcodespace/codeiq/go/internal/detector/script/shell" + _ "github.com/randomcodespace/codeiq/go/internal/detector/sql" + _ "github.com/randomcodespace/codeiq/go/internal/detector/structured" + _ "github.com/randomcodespace/codeiq/go/internal/detector/systems/cpp" + _ "github.com/randomcodespace/codeiq/go/internal/detector/systems/rust" + _ "github.com/randomcodespace/codeiq/go/internal/detector/typescript" +) + +func main() { + for _, lang := range []string{"terraform", "csharp", "kotlin", "vue", "bash", "rust", "powershell"} { + dets := detector.Default.For(lang) + fmt.Printf("%-12s: %d detectors\n", lang, len(dets)) + } +} diff --git a/go/go.mod b/go/go.mod new file mode 100644 index 00000000..83bd2354 --- /dev/null +++ b/go/go.mod @@ -0,0 +1,37 @@ +module github.com/randomcodespace/codeiq/go + +// Minimum Go version that can compile this module — clamped at 1.25.0 +// because github.com/modelcontextprotocol/go-sdk v1.6 (Phase 3, MCP +// server) declares `go 1.25.0`. `go mod tidy` rewrites anything lower +// back to 1.25.0. Bumping out of 1.25 should wait until a release of +// that SDK that targets 1.26+. +go 1.25.0 + +// Actual build toolchain. Pinned to 1.25.7 — 1.26+ isn't on enough +// developer machines yet. CI pins the same version (.github/workflows/ +// go-ci.yml + go-parity.yml). +toolchain go1.25.10 + +require github.com/mattn/go-sqlite3 v1.14.22 + +require ( + github.com/google/uuid v1.6.0 + github.com/kuzudb/go-kuzu v0.7.1 + github.com/modelcontextprotocol/go-sdk v1.6.0 + github.com/pmezard/go-difflib v1.0.0 + github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 + github.com/spf13/cobra v1.8.0 + github.com/spf13/pflag v1.0.5 + gopkg.in/yaml.v3 v3.0.1 +) + +require ( + github.com/google/jsonschema-go v0.4.3 // indirect + github.com/inconshreveable/mousetrap v1.1.0 // indirect + github.com/segmentio/asm v1.1.3 // indirect + github.com/segmentio/encoding v0.5.4 // indirect + github.com/shopspring/decimal v1.4.0 // indirect + github.com/yosida95/uritemplate/v3 v3.0.2 // indirect + golang.org/x/oauth2 v0.35.0 // indirect + golang.org/x/sys v0.41.0 // indirect +) diff --git a/go/go.sum b/go/go.sum new file mode 100644 index 00000000..db329145 --- /dev/null +++ b/go/go.sum @@ -0,0 +1,48 @@ +github.com/cpuguy83/go-md2man/v2 v2.0.3/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/golang-jwt/jwt/v5 v5.3.1 h1:kYf81DTWFe7t+1VvL7eS+jKFVWaUnK9cB1qbwn63YCY= +github.com/golang-jwt/jwt/v5 v5.3.1/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/google/jsonschema-go v0.4.3 h1:/DBOLZTfDow7pe2GmaJNhltueGTtDKICi8V8p+DQPd0= +github.com/google/jsonschema-go v0.4.3/go.mod h1:r5quNTdLOYEz95Ru18zA0ydNbBuYoo9tgaYcxEYhJVE= +github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= +github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/inconshreveable/mousetrap v1.1.0 h1:wN+x4NVGpMsO7ErUn/mUI3vEoE6Jt13X2s0bqwp9tc8= +github.com/inconshreveable/mousetrap v1.1.0/go.mod h1:vpF70FUmC8bwa3OWnCshd2FqLfsEA9PFc4w1p2J65bw= +github.com/kuzudb/go-kuzu v0.7.1 h1:EJHqur2zwIMwdenw/VQKVdH2Xz62UF9y1KQyXeyo8+A= +github.com/kuzudb/go-kuzu v0.7.1/go.mod h1:s2NvXX3fB2QZfWGf6SjJSYawgTPE17a7WHZmzfLIZtU= +github.com/mattn/go-sqlite3 v1.14.22 h1:2gZY6PC6kBnID23Tichd1K+Z0oS6nE/XwU+Vz/5o4kU= +github.com/mattn/go-sqlite3 v1.14.22/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y= +github.com/modelcontextprotocol/go-sdk v1.6.0 h1:PPLS3kn7WtOEnR+Af4X5H96SG0qSab8R/ZQT/HkhPkY= +github.com/modelcontextprotocol/go-sdk v1.6.0/go.mod h1:kzm3kzFL1/+AziGOE0nUs3gvPoNxMCvkxokMkuFapXQ= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM= +github.com/segmentio/asm v1.1.3 h1:WM03sfUOENvvKexOLp+pCqgb/WDjsi7EK8gIsICtzhc= +github.com/segmentio/asm v1.1.3/go.mod h1:Ld3L4ZXGNcSLRg4JBsZ3//1+f/TjYl0Mzen/DQy1EJg= +github.com/segmentio/encoding v0.5.4 h1:OW1VRern8Nw6ITAtwSZ7Idrl3MXCFwXHPgqESYfvNt0= +github.com/segmentio/encoding v0.5.4/go.mod h1:HS1ZKa3kSN32ZHVZ7ZLPLXWvOVIiZtyJnO1gPH1sKt0= +github.com/shopspring/decimal v1.4.0 h1:bxl37RwXBklmTi0C79JfXCEBD1cqqHt0bbgBAGFp81k= +github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+DMd9qYNcwME= +github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82 h1:6C8qej6f1bStuePVkLSFxoU22XBS165D3klxlzRg8F4= +github.com/smacker/go-tree-sitter v0.0.0-20240827094217-dd81d9e9be82/go.mod h1:xe4pgH49k4SsmkQq5OT8abwhWmnzkhpgnXeekbx2efw= +github.com/spf13/cobra v1.8.0 h1:7aJaZx1B85qltLMc546zn58BxxfZdR/W22ej9CFoEf0= +github.com/spf13/cobra v1.8.0/go.mod h1:WXLWApfZ71AjXPya3WOlMsY9yMs7YeiHhFVlvLyhcho= +github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= +github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= +github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/yosida95/uritemplate/v3 v3.0.2 h1:Ed3Oyj9yrmi9087+NczuL5BwkIc4wvTb5zIM+UJPGz4= +github.com/yosida95/uritemplate/v3 v3.0.2/go.mod h1:ILOh0sOhIJR3+L/8afwt/kE++YT040gmv5BQTMR2HP4= +golang.org/x/oauth2 v0.35.0 h1:Mv2mzuHuZuY2+bkyWXIHMfhNdJAdwW3FuWeCPYN5GVQ= +golang.org/x/oauth2 v0.35.0/go.mod h1:lzm5WQJQwKZ3nwavOZ3IS5Aulzxi68dUSgRHujetwEA= +golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k= +golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= +golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k= +golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/go/internal/analyzer/analyzer.go b/go/internal/analyzer/analyzer.go new file mode 100644 index 00000000..3afebe29 --- /dev/null +++ b/go/internal/analyzer/analyzer.go @@ -0,0 +1,150 @@ +package analyzer + +import ( + "fmt" + "os" + "runtime" + "sync" + "time" + + "github.com/randomcodespace/codeiq/go/internal/cache" + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/parser" +) + +// DefaultBatchSize matches the Java side's tuned default (CLAUDE.md gotcha). +const DefaultBatchSize = 500 + +// Options configures an Analyzer. +type Options struct { + Cache *cache.Cache + Registry *detector.Registry + BatchSize int // defaults to DefaultBatchSize + Workers int // defaults to 2 * GOMAXPROCS +} + +// Analyzer orchestrates the index pipeline. +type Analyzer struct { + opts Options +} + +// NewAnalyzer returns an analyzer wired to opts. +func NewAnalyzer(opts Options) *Analyzer { + if opts.BatchSize <= 0 { + opts.BatchSize = DefaultBatchSize + } + if opts.Workers <= 0 { + opts.Workers = runtime.GOMAXPROCS(0) * 2 + } + if opts.Registry == nil { + opts.Registry = detector.Default + } + return &Analyzer{opts: opts} +} + +// Stats reports per-run counts. +// +// Plan §1.5 — DedupedNodes/DedupedEdges/DroppedEdges expose dedup activity +// so operators can see "graph collapsed 312 duplicate nodes, dropped 14 +// phantom edges" — the visibility is what makes "meaningful" diagnosable. +type Stats struct { + Files int + Nodes int + Edges int + DedupedNodes int + DedupedEdges int + DroppedEdges int +} + +// Run executes FileDiscovery → parse → detectors → GraphBuilder → cache writes +// and returns aggregate stats. Errors from individual file processing are +// logged to stderr but do not stop the run — partial output is better than no +// output (matches Java's per-file try/catch behaviour). +func (a *Analyzer) Run(root string) (Stats, error) { + disc := NewFileDiscovery() + files, err := disc.Discover(root) + if err != nil { + return Stats{}, fmt.Errorf("file discovery: %w", err) + } + gb := NewGraphBuilder() + + // Bounded worker pool. + type job struct { + f DiscoveredFile + } + jobs := make(chan job) + var wg sync.WaitGroup + for i := 0; i < a.opts.Workers; i++ { + wg.Add(1) + go func() { + defer wg.Done() + for j := range jobs { + if err := a.processFile(j.f, gb); err != nil { + fmt.Fprintf(os.Stderr, "codeiq: %s: %v\n", j.f.RelPath, err) + } + } + }() + } + for _, f := range files { + jobs <- job{f: f} + } + close(jobs) + wg.Wait() + + snap := gb.Snapshot() + return Stats{ + Files: len(files), + Nodes: len(snap.Nodes), + Edges: len(snap.Edges), + DedupedNodes: snap.DedupedNodes, + DedupedEdges: snap.DedupedEdges, + DroppedEdges: snap.DroppedEdges, + }, nil +} + +func (a *Analyzer) processFile(f DiscoveredFile, gb *GraphBuilder) error { + content, err := os.ReadFile(f.AbsPath) + if err != nil { + return err + } + hash := cache.HashString(string(content)) + tree, err := parser.Parse(f.Language, content) + if err != nil { + // Continue with regex-only detectors when the parser bails — matches + // Java behaviour for non-fatal parse errors. + tree = nil + } + if tree != nil { + defer tree.Close() + } + parsed, _ := parser.ParseStructured(f.Language, content) + ctx := &detector.Context{ + FilePath: f.RelPath, + Language: f.Language.String(), + Content: string(content), + Tree: tree, + ParsedData: parsed, + } + + entry := &cache.Entry{ + ContentHash: hash, + Path: f.RelPath, + Language: f.Language.String(), + ParsedAt: time.Now().UTC().Format(time.RFC3339), + } + for _, d := range a.opts.Registry.For(f.Language.String()) { + r := d.Detect(ctx) + if r == nil { + continue + } + gb.Add(r) + entry.Nodes = append(entry.Nodes, r.Nodes...) + entry.Edges = append(entry.Edges, r.Edges...) + } + if a.opts.Cache != nil { + if err := a.opts.Cache.Put(entry); err != nil { + return fmt.Errorf("cache put: %w", err) + } + } + return nil +} diff --git a/go/internal/analyzer/analyzer_test.go b/go/internal/analyzer/analyzer_test.go new file mode 100644 index 00000000..64d5dd6a --- /dev/null +++ b/go/internal/analyzer/analyzer_test.go @@ -0,0 +1,88 @@ +package analyzer + +import ( + "os" + "path/filepath" + "testing" + + "github.com/randomcodespace/codeiq/go/internal/cache" + "github.com/randomcodespace/codeiq/go/internal/detector" + + // Register the 5 phase-1 detectors via blank imports. + _ "github.com/randomcodespace/codeiq/go/internal/detector/generic" + _ "github.com/randomcodespace/codeiq/go/internal/detector/jvm/java" + _ "github.com/randomcodespace/codeiq/go/internal/detector/python" +) + +const fixtureJava = `package com.x; +import java.util.List; +import org.springframework.web.bind.annotation.*; + +@RestController +@RequestMapping("/users") +public class UserController { + @GetMapping("/{id}") + public String get(Long id) { return ""; } +} +` + +const fixturePython = `from django.db import models + +class Author(models.Model): + name = models.CharField(max_length=100) +` + +func TestAnalyzerEndToEnd(t *testing.T) { + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, "UserController.java"), []byte(fixtureJava), 0644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(dir, "models.py"), []byte(fixturePython), 0644); err != nil { + t.Fatal(err) + } + + c, err := cache.Open(filepath.Join(dir, "cache.sqlite")) + if err != nil { + t.Fatal(err) + } + defer c.Close() + + a := NewAnalyzer(Options{Cache: c, Registry: detector.Default}) + stats, err := a.Run(dir) + if err != nil { + t.Fatal(err) + } + if stats.Files != 2 { + t.Fatalf("Files = %d, want 2", stats.Files) + } + if stats.Nodes < 2 { + t.Fatalf("Nodes = %d, want >= 2", stats.Nodes) + } + // Verify both files round-tripped through the cache. + count := 0 + _ = c.IterateAll(func(*cache.Entry) error { count++; return nil }) + if count != 2 { + t.Fatalf("cache entries = %d, want 2", count) + } +} + +func TestAnalyzerDeterminism(t *testing.T) { + dir := t.TempDir() + if err := os.WriteFile(filepath.Join(dir, "UserController.java"), []byte(fixtureJava), 0644); err != nil { + t.Fatal(err) + } + if err := os.WriteFile(filepath.Join(dir, "models.py"), []byte(fixturePython), 0644); err != nil { + t.Fatal(err) + } + c1, _ := cache.Open(filepath.Join(dir, "c1.sqlite")) + c2, _ := cache.Open(filepath.Join(dir, "c2.sqlite")) + defer c1.Close() + defer c2.Close() + a1 := NewAnalyzer(Options{Cache: c1, Registry: detector.Default}) + a2 := NewAnalyzer(Options{Cache: c2, Registry: detector.Default}) + s1, _ := a1.Run(dir) + s2, _ := a2.Run(dir) + if s1.Nodes != s2.Nodes || s1.Edges != s2.Edges || s1.Files != s2.Files { + t.Fatalf("non-deterministic stats: %+v vs %+v", s1, s2) + } +} diff --git a/go/internal/analyzer/enrich.go b/go/internal/analyzer/enrich.go new file mode 100644 index 00000000..916520aa --- /dev/null +++ b/go/internal/analyzer/enrich.go @@ -0,0 +1,141 @@ +package analyzer + +import ( + "fmt" + "path/filepath" + + "github.com/randomcodespace/codeiq/go/internal/analyzer/linker" + "github.com/randomcodespace/codeiq/go/internal/cache" + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/graph" + "github.com/randomcodespace/codeiq/go/internal/intelligence/extractor" + extractorgolang "github.com/randomcodespace/codeiq/go/internal/intelligence/extractor/golang" + extractorjava "github.com/randomcodespace/codeiq/go/internal/intelligence/extractor/java" + extractorpython "github.com/randomcodespace/codeiq/go/internal/intelligence/extractor/python" + extractortypescript "github.com/randomcodespace/codeiq/go/internal/intelligence/extractor/typescript" + "github.com/randomcodespace/codeiq/go/internal/intelligence/lexical" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// EnrichOptions configures Enrich. The zero value is usable; GraphDir +// defaults to `/.codeiq/graph/codeiq.kuzu` when empty. +type EnrichOptions struct { + // GraphDir overrides the Kuzu output directory. When "", the default + // `/.codeiq/graph/codeiq.kuzu` is used. + GraphDir string +} + +// EnrichSummary reports per-run counters from a successful Enrich. +type EnrichSummary struct { + Nodes int + Edges int + Services int +} + +// Enrich loads the SQLite cache for `root`, runs the linker / classifier / +// lexical / language-extractor / service-detector passes, bulk-loads the +// resulting graph into Kuzu, and creates the FTS-equivalent indexes. The +// returned summary reports total nodes / edges / service nodes after every +// pass has run. +// +// Mirrors the `enrich` pipeline in Java (Analyzer.java + GraphStore.java). +// The pipeline order matches the Java side exactly: +// +// 1. Linkers (TopicLinker, EntityLinker, ModuleContainmentLinker) +// 2. LayerClassifier +// 3. LexicalEnricher (doc comments + config keys) +// 4. LanguageEnricher (Java, TypeScript, Python, Go extractors) +// 5. ServiceDetector (filesystem walk for build files) +// 6. graph.Store.BulkLoadNodes / BulkLoadEdges / CreateIndexes +// +// All steps are deterministic — repeated calls against the same cache + root +// produce identical Kuzu output. +func Enrich(root string, c *cache.Cache, opts EnrichOptions) (EnrichSummary, error) { + if opts.GraphDir == "" { + opts.GraphDir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + + // Re-hydrate the graph from cache. GraphBuilder dedupes by node/edge ID and + // produces a deterministic snapshot with dangling edges dropped. + builder := NewGraphBuilder() + err := c.IterateAll(func(r *cache.Entry) error { + builder.Add(&detector.Result{Nodes: r.Nodes, Edges: r.Edges}) + return nil + }) + if err != nil { + return EnrichSummary{}, fmt.Errorf("enrich: iterate cache: %w", err) + } + snap := builder.Snapshot() + nodes := snap.Nodes + edges := snap.Edges + + // 1. Linkers — order matches Analyzer.java. + // Plan §1.4 — Sorted() at the boundary makes the output independent of + // any linker's internal iteration order. + for _, l := range []linker.Linker{ + linker.NewTopicLinker(), + linker.NewEntityLinker(), + linker.NewModuleContainmentLinker(), + } { + r := l.Link(nodes, edges).Sorted() + nodes = append(nodes, r.Nodes...) + edges = append(edges, r.Edges...) + } + + // 2. Layer classification — mutates nodes in place. + (&LayerClassifier{}).Classify(nodes) + + // 3. Lexical enrichment — stamps lex_comment / lex_config_keys properties + // onto candidate nodes. Reads files from disk under root. + lexical.NewEnricher().Enrich(nodes, root) + + // 4. Language extractors — stamp type hints, emit CALLS / IMPORTS edges. + // Registration is via init() in each extractor package; the orchestrator + // selects by file extension. + en := extractor.NewEnricher( + extractorjava.New(), + extractortypescript.New(), + extractorpython.New(), + extractorgolang.New(), + ) + en.Enrich(nodes, &edges, root) + + // 5. ServiceDetector — walk filesystem for build files, emit SERVICE nodes + // + CONTAINS edges. Mutates nodes' `service` property in place. + sd := &ServiceDetector{} + sres := sd.Detect(nodes, edges, filepath.Base(root), root) + nodes = append(nodes, sres.Nodes...) + edges = append(edges, sres.Edges...) + + // 6. Bulk-load Kuzu — schema + nodes + edges + indexes. The store is + // closed when this function returns; read-side commands re-open it. + store, err := graph.Open(opts.GraphDir) + if err != nil { + return EnrichSummary{}, fmt.Errorf("enrich: open graph: %w", err) + } + defer store.Close() + if err := store.ApplySchema(); err != nil { + return EnrichSummary{}, fmt.Errorf("enrich: apply schema: %w", err) + } + if err := store.BulkLoadNodes(nodes); err != nil { + return EnrichSummary{}, fmt.Errorf("enrich: bulk load nodes: %w", err) + } + if err := store.BulkLoadEdges(edges); err != nil { + return EnrichSummary{}, fmt.Errorf("enrich: bulk load edges: %w", err) + } + if err := store.CreateIndexes(); err != nil { + return EnrichSummary{}, fmt.Errorf("enrich: create indexes: %w", err) + } + + return EnrichSummary{ + Nodes: len(nodes), + Edges: len(edges), + Services: len(sres.Nodes), + }, nil +} + +// Touch the model.NodeService symbol so the package import stays meaningful +// even when callers don't reach for the constant directly — this gives the +// Java-side comment in EnrichSummary a referent and prevents accidental +// import pruning during goimports runs. +var _ = model.NodeService diff --git a/go/internal/analyzer/enrich_test.go b/go/internal/analyzer/enrich_test.go new file mode 100644 index 00000000..b86c1b4f --- /dev/null +++ b/go/internal/analyzer/enrich_test.go @@ -0,0 +1,115 @@ +package analyzer_test + +import ( + "io" + "os" + "path/filepath" + "testing" + + "github.com/randomcodespace/codeiq/go/internal/analyzer" + "github.com/randomcodespace/codeiq/go/internal/cache" +) + +// copyDirAll mirrors `cp -r` for test-fixture staging: every regular file +// under src lands at the same relative path under dst. Source-tree symlinks +// and special files are skipped (not needed by the test fixtures). +func copyDirAll(src, dst string) error { + return filepath.Walk(src, func(p string, info os.FileInfo, err error) error { + if err != nil { + return err + } + rel, relErr := filepath.Rel(src, p) + if relErr != nil { + return relErr + } + target := filepath.Join(dst, rel) + if info.IsDir() { + return os.MkdirAll(target, 0o755) + } + if !info.Mode().IsRegular() { + return nil + } + in, err := os.Open(p) + if err != nil { + return err + } + defer in.Close() + if mkdErr := os.MkdirAll(filepath.Dir(target), 0o755); mkdErr != nil { + return mkdErr + } + out, err := os.OpenFile(target, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0o644) + if err != nil { + return err + } + if _, err := io.Copy(out, in); err != nil { + out.Close() + return err + } + return out.Close() + }) +} + +// TestEnrichEmptyCacheIsNoop confirms enrich tolerates an empty cache — the +// pipeline `index → enrich` must work when index produced no results (empty +// directory, all-skipped files), returning zero nodes / zero edges / zero +// services rather than erroring. +func TestEnrichEmptyCacheIsNoop(t *testing.T) { + dir := t.TempDir() + c, err := cache.Open(filepath.Join(dir, "cache.sqlite")) + if err != nil { + t.Fatalf("cache open: %v", err) + } + defer c.Close() + summary, err := analyzer.Enrich(dir, c, analyzer.EnrichOptions{ + GraphDir: filepath.Join(dir, "graph.kuzu"), + }) + if err != nil { + t.Fatalf("enrich: %v", err) + } + // Empty cache produces no original nodes; ServiceDetector still synthesises + // one root SERVICE node for the project directory itself. + if summary.Nodes < summary.Services { + t.Fatalf("nodes %d less than services %d", summary.Nodes, summary.Services) + } + if summary.Edges < 0 { + t.Fatalf("negative edges: %d", summary.Edges) + } +} + +// TestEnrichFixtureMinimalProducesGraph runs the full index → enrich pipeline +// against the fixture-minimal corpus and asserts the resulting graph has at +// least the entity / endpoint / service nodes the fixture is expected to +// produce. Sanity check, not a parity check. +func TestEnrichFixtureMinimalProducesGraph(t *testing.T) { + src := filepath.Join("..", "..", "testdata", "fixture-minimal") + // Copy fixture to a writable tmp dir so the index cache + graph store + // can be created under it without touching the source tree. + tmp := t.TempDir() + if err := copyDirAll(src, tmp); err != nil { + t.Fatalf("copy fixture: %v", err) + } + + c, err := cache.Open(filepath.Join(tmp, "cache.sqlite")) + if err != nil { + t.Fatalf("cache: %v", err) + } + defer c.Close() + + a := analyzer.NewAnalyzer(analyzer.Options{Cache: c}) + if _, err := a.Run(tmp); err != nil { + t.Fatalf("index: %v", err) + } + + summary, err := analyzer.Enrich(tmp, c, analyzer.EnrichOptions{ + GraphDir: filepath.Join(tmp, "graph.kuzu"), + }) + if err != nil { + t.Fatalf("enrich: %v", err) + } + if summary.Nodes == 0 { + t.Fatalf("expected non-empty graph, got 0 nodes") + } + if summary.Services == 0 { + t.Fatalf("expected at least one SERVICE node") + } +} diff --git a/go/internal/analyzer/file_discovery.go b/go/internal/analyzer/file_discovery.go new file mode 100644 index 00000000..0da18b50 --- /dev/null +++ b/go/internal/analyzer/file_discovery.go @@ -0,0 +1,125 @@ +package analyzer + +import ( + "bytes" + "io/fs" + "os/exec" + "path/filepath" + "sort" + "strings" + + "github.com/randomcodespace/codeiq/go/internal/parser" +) + +// DefaultExcludeDirs mirrors the Java FileDiscovery.DEFAULT_EXCLUDES set. +var DefaultExcludeDirs = map[string]bool{ + "node_modules": true, "build": true, "target": true, "dist": true, + "out": true, "bin": true, "obj": true, + ".git": true, ".svn": true, ".idea": true, ".vscode": true, + ".eclipse": true, ".settings": true, + "__pycache__": true, "venv": true, ".venv": true, ".tox": true, + ".mypy_cache": true, ".pytest_cache": true, ".eggs": true, + ".gradle": true, ".mvn": true, + "bower_components": true, ".next": true, ".nuxt": true, "coverage": true, + ".nyc_output": true, ".parcel-cache": true, ".turbo": true, ".cache": true, + "vendor": true, + ".codeiq": true, +} + +// DiscoveredFile is one file discovered for analysis. +type DiscoveredFile struct { + AbsPath string + RelPath string // forward-slash, relative to root + Language parser.Language + Ext string +} + +// FileDiscovery walks a repo and emits language-tagged files. Uses +// `git ls-files -co --exclude-standard` first; falls back to fs walk. +type FileDiscovery struct{} + +// NewFileDiscovery returns a discovery instance. +func NewFileDiscovery() *FileDiscovery { return &FileDiscovery{} } + +// Discover walks root and returns files sorted by RelPath. +func (d *FileDiscovery) Discover(root string) ([]DiscoveredFile, error) { + abs, err := filepath.Abs(root) + if err != nil { + return nil, err + } + files, err := d.gitLsFiles(abs) + if err != nil || len(files) == 0 { + files, err = d.walkFS(abs) + if err != nil { + return nil, err + } + } + sort.Slice(files, func(i, j int) bool { return files[i].RelPath < files[j].RelPath }) + return files, nil +} + +func (d *FileDiscovery) gitLsFiles(root string) ([]DiscoveredFile, error) { + cmd := exec.Command("git", "-C", root, "ls-files", "-co", "--exclude-standard") + var out bytes.Buffer + cmd.Stdout = &out + if err := cmd.Run(); err != nil { + return nil, err + } + var files []DiscoveredFile + for _, line := range strings.Split(out.String(), "\n") { + rel := strings.TrimSpace(line) + if rel == "" { + continue + } + df, ok := makeDiscoveredFile(root, rel) + if !ok { + continue + } + files = append(files, df) + } + return files, nil +} + +func (d *FileDiscovery) walkFS(root string) ([]DiscoveredFile, error) { + var files []DiscoveredFile + err := filepath.WalkDir(root, func(path string, dent fs.DirEntry, err error) error { + if err != nil { + return nil + } + if dent.IsDir() { + if DefaultExcludeDirs[dent.Name()] { + return filepath.SkipDir + } + return nil + } + rel, _ := filepath.Rel(root, path) + rel = filepath.ToSlash(rel) + df, ok := makeDiscoveredFile(root, rel) + if !ok { + return nil + } + files = append(files, df) + return nil + }) + return files, err +} + +func makeDiscoveredFile(root, rel string) (DiscoveredFile, bool) { + rel = filepath.ToSlash(rel) + for _, seg := range strings.Split(rel, "/") { + if DefaultExcludeDirs[seg] { + return DiscoveredFile{}, false + } + } + ext := strings.ToLower(filepath.Ext(rel)) + lang := parser.LanguageFromExtension(ext) + if lang == parser.LanguageUnknown { + return DiscoveredFile{}, false + } + return DiscoveredFile{ + AbsPath: filepath.Join(root, filepath.FromSlash(rel)), + RelPath: rel, + Language: lang, + Ext: ext, + }, true +} diff --git a/go/internal/analyzer/file_discovery_test.go b/go/internal/analyzer/file_discovery_test.go new file mode 100644 index 00000000..e251b650 --- /dev/null +++ b/go/internal/analyzer/file_discovery_test.go @@ -0,0 +1,86 @@ +package analyzer + +import ( + "os" + "path/filepath" + "sort" + "testing" + + "github.com/randomcodespace/codeiq/go/internal/parser" +) + +func makeTree(t *testing.T) string { + dir := t.TempDir() + mustWrite := func(p, c string) { + full := filepath.Join(dir, p) + _ = os.MkdirAll(filepath.Dir(full), 0755) + if err := os.WriteFile(full, []byte(c), 0644); err != nil { + t.Fatal(err) + } + } + mustWrite("a.java", "public class A {}") + mustWrite("sub/b.py", "x = 1") + mustWrite("node_modules/skip.js", "skip me") + mustWrite(".git/HEAD", "ref: refs/heads/main") + mustWrite(".codeiq/cache/x.sqlite", "blob") + mustWrite("LICENSE", "MIT") + return dir +} + +func TestDirWalkDiscovery(t *testing.T) { + dir := makeTree(t) + disc := NewFileDiscovery() + files, err := disc.Discover(dir) + if err != nil { + t.Fatal(err) + } + got := make([]string, 0, len(files)) + for _, f := range files { + got = append(got, f.RelPath) + } + sort.Strings(got) + want := []string{"a.java", "sub/b.py"} + if len(got) != len(want) { + t.Fatalf("Discover() = %v, want %v", got, want) + } + for i := range want { + if got[i] != want[i] { + t.Errorf("got[%d] = %q, want %q", i, got[i], want[i]) + } + } +} + +func TestLanguageTagging(t *testing.T) { + dir := makeTree(t) + files, err := NewFileDiscovery().Discover(dir) + if err != nil { + t.Fatal(err) + } + for _, f := range files { + switch f.RelPath { + case "a.java": + if f.Language != parser.LanguageJava { + t.Errorf("a.java lang = %v, want Java", f.Language) + } + case "sub/b.py": + if f.Language != parser.LanguagePython { + t.Errorf("b.py lang = %v, want Python", f.Language) + } + } + } +} + +func TestDeterministicOrder(t *testing.T) { + dir := makeTree(t) + disc := NewFileDiscovery() + a, _ := disc.Discover(dir) + b, _ := disc.Discover(dir) + if len(a) != len(b) { + t.Fatal("non-deterministic count") + } + for i := range a { + if a[i].RelPath != b[i].RelPath { + t.Fatalf("non-deterministic order at %d: %q != %q", i, a[i].RelPath, b[i].RelPath) + } + } +} diff --git a/go/internal/analyzer/graph_builder.go b/go/internal/analyzer/graph_builder.go new file mode 100644 index 00000000..95d4e162 --- /dev/null +++ b/go/internal/analyzer/graph_builder.go @@ -0,0 +1,119 @@ +package analyzer + +import ( + "sort" + "sync" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// GraphBuilder buffers detector results across batches. Concurrent-safe. +// +// Phase 1 (plan §1.1, §1.2): +// - Nodes are deduped by ID via mergeNode (confidence-aware). +// - Edges are deduped by canonical (source, target, kind) key via mergeEdge. +// +// Snapshot() produces a deterministic sorted view with phantom edges (those +// whose endpoint is still missing) dropped, and exposes the dedup/drop +// counts so the CLI can surface "deduped N, dropped K" diagnostics. +type GraphBuilder struct { + mu sync.Mutex + nodes map[string]*model.CodeNode + edges map[edgeKey]*model.CodeEdge + + // Counters incremented as Add() observes duplicates and used by + // Snapshot() to populate the surfaced stats. + dedupedNodes int + dedupedEdges int +} + +// NewGraphBuilder returns an empty builder. +func NewGraphBuilder() *GraphBuilder { + return &GraphBuilder{ + nodes: make(map[string]*model.CodeNode), + edges: make(map[edgeKey]*model.CodeEdge), + } +} + +// Add merges a detector result. Duplicate node IDs and duplicate edge +// (source, target, kind) tuples collapse with confidence-aware merging. +func (b *GraphBuilder) Add(r *detector.Result) { + if r == nil { + return + } + b.mu.Lock() + defer b.mu.Unlock() + for _, n := range r.Nodes { + if existing, ok := b.nodes[n.ID]; ok { + b.nodes[n.ID] = mergeNode(existing, n) + b.dedupedNodes++ + continue + } + b.nodes[n.ID] = n + } + for _, e := range r.Edges { + k := makeEdgeKey(e) + if existing, ok := b.edges[k]; ok { + b.edges[k] = mergeEdge(existing, e) + b.dedupedEdges++ + continue + } + b.edges[k] = e + } +} + +// Snapshot is the deterministic, sorted view of buffered state with +// phantom edges (source or target node missing) dropped. It also exposes +// the count of duplicate emissions collapsed during Add() and the count +// of dangling edges dropped during this Snapshot call. +type Snapshot struct { + Nodes []*model.CodeNode + Edges []*model.CodeEdge + + // DedupedNodes is the count of node emissions that collided with an + // existing node ID and were merged in. Zero on a graph where no + // detector double-emitted. + DedupedNodes int + // DedupedEdges is the same for edges by (source, target, kind). + DedupedEdges int + // DroppedEdges is the count of edges that had no matching source or + // target node in the final node set — phantom references usually + // caused by a linker pointing at a node that no detector emitted. + DroppedEdges int +} + +// Snapshot returns the current state as a sorted, dangling-edge-free +// Snapshot with surfaced dedup/drop counts. +func (b *GraphBuilder) Snapshot() Snapshot { + b.mu.Lock() + defer b.mu.Unlock() + nodes := make([]*model.CodeNode, 0, len(b.nodes)) + for _, n := range b.nodes { + nodes = append(nodes, n) + } + sort.Slice(nodes, func(i, j int) bool { return nodes[i].ID < nodes[j].ID }) + + edges := make([]*model.CodeEdge, 0, len(b.edges)) + dropped := 0 + for _, e := range b.edges { + if _, src := b.nodes[e.SourceID]; !src { + dropped++ + continue + } + if _, tgt := b.nodes[e.TargetID]; !tgt { + dropped++ + continue + } + edges = append(edges, e) + } + sort.Slice(edges, func(i, j int) bool { return edges[i].ID < edges[j].ID }) + + return Snapshot{ + Nodes: nodes, + Edges: edges, + DedupedNodes: b.dedupedNodes, + DedupedEdges: b.dedupedEdges, + DroppedEdges: dropped, + } +} diff --git a/go/internal/analyzer/graph_builder_test.go b/go/internal/analyzer/graph_builder_test.go new file mode 100644 index 00000000..cb51c0f1 --- /dev/null +++ b/go/internal/analyzer/graph_builder_test.go @@ -0,0 +1,75 @@ +package analyzer + +import ( + "testing" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +func TestGraphBuilderDeduplicatesByID(t *testing.T) { + gb := NewGraphBuilder() + n1 := model.NewCodeNode("a", model.NodeClass, "A") + n2 := model.NewCodeNode("a", model.NodeClass, "A") // duplicate + gb.Add(&detector.Result{Nodes: []*model.CodeNode{n1, n2}}) + snap := gb.Snapshot() + if len(snap.Nodes) != 1 { + t.Fatalf("expected 1 deduped node, got %d", len(snap.Nodes)) + } +} + +func TestGraphBuilderSortsForDeterminism(t *testing.T) { + gb := NewGraphBuilder() + gb.Add(&detector.Result{ + Nodes: []*model.CodeNode{ + model.NewCodeNode("z", model.NodeClass, "Z"), + model.NewCodeNode("a", model.NodeClass, "A"), + model.NewCodeNode("m", model.NodeClass, "M"), + }, + }) + snap := gb.Snapshot() + want := []string{"a", "m", "z"} + for i, n := range snap.Nodes { + if n.ID != want[i] { + t.Errorf("ID[%d] = %q, want %q", i, n.ID, want[i]) + } + } +} + +func TestGraphBuilderDropsEdgesWithMissingSourceOrTarget(t *testing.T) { + gb := NewGraphBuilder() + gb.Add(&detector.Result{ + Nodes: []*model.CodeNode{model.NewCodeNode("a", model.NodeClass, "A")}, + Edges: []*model.CodeEdge{ + model.NewCodeEdge("a->b", model.EdgeCalls, "a", "b"), // b missing + model.NewCodeEdge("a->ext", model.EdgeImports, "a", "ext:django"), + }, + }) + gb.Add(&detector.Result{ + Nodes: []*model.CodeNode{model.NewCodeNode("ext:django", model.NodeModule, "django")}, + }) + snap := gb.Snapshot() + if len(snap.Edges) != 1 || snap.Edges[0].ID != "a->ext" { + t.Fatalf("missing-target edges should be dropped, got %+v", snap.Edges) + } +} + +func TestGraphBuilderNodesBeforeEdges(t *testing.T) { + // Snapshot returns nodes already populated when edges are walked, so a + // graph-store flush can write in two phases (nodes, then edges) without + // reordering. + gb := NewGraphBuilder() + gb.Add(&detector.Result{ + Nodes: []*model.CodeNode{ + model.NewCodeNode("src", model.NodeClass, "S"), + model.NewCodeNode("tgt", model.NodeClass, "T"), + }, + Edges: []*model.CodeEdge{ + model.NewCodeEdge("src->tgt", model.EdgeCalls, "src", "tgt"), + }, + }) + snap := gb.Snapshot() + if len(snap.Nodes) != 2 || len(snap.Edges) != 1 { + t.Fatalf("snapshot mismatch: %+v", snap) + } +} diff --git a/go/internal/analyzer/layer_classifier.go b/go/internal/analyzer/layer_classifier.go new file mode 100644 index 00000000..b0109fee --- /dev/null +++ b/go/internal/analyzer/layer_classifier.go @@ -0,0 +1,181 @@ +package analyzer + +import ( + "regexp" + "strings" + + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// LayerClassifier assigns a Layer value to every CodeNode based on +// (kind, framework, file_path) heuristics. Pure, deterministic, first-match +// wins. Priority order mirrors LayerClassifier.java: +// 1. Node kind (frontend / backend / infra) +// 2. Language (infra) +// 3. File extension + path +// 4. Framework +// 5. Shared node kinds +// 6. Fallback package/path heuristics + Java src/main convention +type LayerClassifier struct{} + +var ( + frontendKinds = map[model.NodeKind]struct{}{ + model.NodeComponent: {}, + model.NodeHook: {}, + } + backendKinds = map[model.NodeKind]struct{}{ + model.NodeGuard: {}, + model.NodeMiddleware: {}, + model.NodeEndpoint: {}, + model.NodeRepository: {}, + model.NodeDatabaseConnection: {}, + model.NodeQuery: {}, + model.NodeEntity: {}, + model.NodeMigration: {}, + model.NodeService: {}, + model.NodeTopic: {}, + model.NodeQueue: {}, + model.NodeEvent: {}, + model.NodeMessageQueue: {}, + model.NodeRMIInterface: {}, + model.NodeWebSocketEndpoint: {}, + } + infraKinds = map[model.NodeKind]struct{}{ + model.NodeInfraResource: {}, + model.NodeAzureResource: {}, + model.NodeAzureFunction: {}, + model.NodeSQLEntity: {}, + } + sharedKinds = map[model.NodeKind]struct{}{ + model.NodeConfigFile: {}, + model.NodeConfigKey: {}, + model.NodeConfigDefinition: {}, + model.NodeProtocolMessage: {}, + } + infraLangs = map[string]struct{}{ + "terraform": {}, + "bicep": {}, + "dockerfile": {}, + } + frontendFrameworks = map[string]struct{}{ + "react": {}, + "vue": {}, + "angular": {}, + "svelte": {}, + "nextjs": {}, + } + backendFrameworks = map[string]struct{}{ + "express": {}, + "nestjs": {}, + "flask": {}, + "django": {}, + "fastapi": {}, + "spring": {}, + "spring_boot": {}, + "spring_mvc": {}, + "spring_data": {}, + "spring_security": {}, + "gin": {}, + "echo": {}, + "fiber": {}, + "actix": {}, + "rocket": {}, + "axum": {}, + "asp.net": {}, + "koa": {}, + "hapi": {}, + "fastify": {}, + } + + frontendPathRE = regexp.MustCompile(`(?:^|/)(?:src/)?(?:components|pages|views|app/ui|public)/`) + backendPathRE = regexp.MustCompile(`(?:^|/)(?:src/)?(?:server|api|controllers|services|routes|handlers)/`) + frontendExtRE = regexp.MustCompile(`\.(?:tsx|jsx)$`) + backendPkgRE = regexp.MustCompile(`(?i)(?:^|/|\.)(?:controller|controllers|api|web|rest|resource|resources|model|models|entity|entities|domain|dto|dtos|repository|repositories|dao|persistence|service|services|business|logic|routes|handlers|handler|middleware|middlewares|schemas)(?:/|\.|$)`) + sharedPkgRE = regexp.MustCompile(`(?i)(?:^|/|\.)(?:config|configuration|util|utils|helper|helpers|common|shared|exception|exceptions|constants|enums)(?:/|\.|$)`) + frontendPkgRE = regexp.MustCompile(`(?i)(?:^|/|\.)(?:components|views|pages|ui|widgets|screens|templates|layouts)(?:/|\.|$)`) +) + +// Classify sets the Layer property on every node in the slice. +func (c *LayerClassifier) Classify(nodes []*model.CodeNode) { + for _, n := range nodes { + n.Layer = c.classifyOne(n) + } +} + +// classifyOne returns the Layer for a single node. Exported as lowercase +// because callers should go through Classify; exposed package-internally so +// tests can exercise individual rules without a slice. +func (c *LayerClassifier) classifyOne(n *model.CodeNode) model.Layer { + // 1. Node kind rules. + if _, ok := frontendKinds[n.Kind]; ok { + return model.LayerFrontend + } + if _, ok := backendKinds[n.Kind]; ok { + return model.LayerBackend + } + if _, ok := infraKinds[n.Kind]; ok { + return model.LayerInfra + } + + // 2. Language rules. + if lang, _ := n.Properties["language"].(string); lang != "" { + if _, ok := infraLangs[lang]; ok { + return model.LayerInfra + } + } + + // 3. File path rules. + if n.FilePath != "" { + if frontendExtRE.MatchString(n.FilePath) { + return model.LayerFrontend + } + if frontendPathRE.MatchString(n.FilePath) { + return model.LayerFrontend + } + if backendPathRE.MatchString(n.FilePath) { + return model.LayerBackend + } + } + + // 4. Framework rules. + if fw, _ := n.Properties["framework"].(string); fw != "" { + if _, ok := frontendFrameworks[fw]; ok { + return model.LayerFrontend + } + if _, ok := backendFrameworks[fw]; ok { + return model.LayerBackend + } + } + + // 5. Shared node kinds. + if _, ok := sharedKinds[n.Kind]; ok { + return model.LayerShared + } + + // 6. Fallback: package-name / path-pattern heuristics over both file path + // and node ID (the ID often carries package info for JVM-style IDs). + combined := n.FilePath + "|" + n.ID + if frontendPkgRE.MatchString(combined) { + return model.LayerFrontend + } + if backendPkgRE.MatchString(combined) { + return model.LayerBackend + } + if sharedPkgRE.MatchString(combined) { + return model.LayerShared + } + + // 7. Java-family final fallback: files under src/main/java or + // src/main/kotlin in standard Spring/Java layouts are virtually always + // backend code. + if strings.HasSuffix(n.FilePath, ".java") || + strings.HasSuffix(n.FilePath, ".kt") || + strings.HasSuffix(n.FilePath, ".scala") { + if strings.Contains(n.FilePath, "src/main/java/") || + strings.Contains(n.FilePath, "src/main/kotlin/") { + return model.LayerBackend + } + } + + return model.LayerUnknown +} diff --git a/go/internal/analyzer/layer_classifier_test.go b/go/internal/analyzer/layer_classifier_test.go new file mode 100644 index 00000000..60cb41f0 --- /dev/null +++ b/go/internal/analyzer/layer_classifier_test.go @@ -0,0 +1,152 @@ +package analyzer + +import ( + "testing" + + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// TestLayerClassifierRules covers one positive case per priority rule: +// kind → language → file path → framework → shared → fallback path. +func TestLayerClassifierRules(t *testing.T) { + lc := &LayerClassifier{} + + cases := []struct { + name string + node *model.CodeNode + want model.Layer + }{ + { + name: "frontend node kind (component)", + node: &model.CodeNode{ + Kind: model.NodeComponent, + Properties: map[string]any{}, + }, + want: model.LayerFrontend, + }, + { + name: "backend node kind (endpoint)", + node: &model.CodeNode{ + Kind: model.NodeEndpoint, + Properties: map[string]any{}, + }, + want: model.LayerBackend, + }, + { + name: "infra by language (terraform)", + node: &model.CodeNode{ + Kind: model.NodeModule, + Properties: map[string]any{"language": "terraform"}, + }, + want: model.LayerInfra, + }, + { + name: "file extension .tsx → frontend", + node: &model.CodeNode{ + Kind: model.NodeClass, + FilePath: "src/foo/Bar.tsx", + Properties: map[string]any{}, + }, + want: model.LayerFrontend, + }, + { + name: "file path /server/ → backend", + node: &model.CodeNode{ + Kind: model.NodeClass, + FilePath: "src/server/handler.go", + Properties: map[string]any{}, + }, + want: model.LayerBackend, + }, + { + name: "framework=react → frontend", + node: &model.CodeNode{ + Kind: model.NodeClass, + FilePath: "some/unrelated/path.js", + Properties: map[string]any{"framework": "react"}, + }, + want: model.LayerFrontend, + }, + { + name: "shared node kind (config_file)", + node: &model.CodeNode{ + Kind: model.NodeConfigFile, + Properties: map[string]any{}, + }, + want: model.LayerShared, + }, + { + name: "Java path fallback (src/main/java/...) → backend", + node: &model.CodeNode{ + Kind: model.NodeClass, + FilePath: "myapp/src/main/java/com/example/Greeter.java", + Properties: map[string]any{}, + }, + want: model.LayerBackend, + }, + { + name: "fully unknown fallback", + node: &model.CodeNode{ + Kind: model.NodeClass, + FilePath: "random/path/file.txt", + ID: "rand:thing", + Properties: map[string]any{}, + }, + want: model.LayerUnknown, + }, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + got := lc.classifyOne(tc.node) + if got != tc.want { + t.Fatalf("classifyOne(%s) = %s, want %s", tc.name, got, tc.want) + } + }) + } +} + +// TestLayerClassifierClassifyMutates verifies Classify writes Layer on every node. +func TestLayerClassifierClassifyMutates(t *testing.T) { + lc := &LayerClassifier{} + nodes := []*model.CodeNode{ + {Kind: model.NodeComponent, Properties: map[string]any{}}, + {Kind: model.NodeEndpoint, Properties: map[string]any{}}, + {Kind: model.NodeClass, FilePath: "x.txt", Properties: map[string]any{}}, + } + lc.Classify(nodes) + want := []model.Layer{model.LayerFrontend, model.LayerBackend, model.LayerUnknown} + for i, n := range nodes { + if n.Layer != want[i] { + t.Fatalf("node[%d].Layer = %s, want %s", i, n.Layer, want[i]) + } + } +} + +// TestLayerClassifierDeterminism runs the same input twice and asserts identical +// output — guards against accidental map iteration or non-deterministic logic. +func TestLayerClassifierDeterminism(t *testing.T) { + lc := &LayerClassifier{} + build := func() []*model.CodeNode { + return []*model.CodeNode{ + {Kind: model.NodeComponent, Properties: map[string]any{}}, + {Kind: model.NodeEndpoint, Properties: map[string]any{}}, + {Kind: model.NodeModule, Properties: map[string]any{"language": "terraform"}}, + {Kind: model.NodeClass, FilePath: "src/foo/Bar.tsx", Properties: map[string]any{}}, + {Kind: model.NodeClass, FilePath: "src/server/handler.go", Properties: map[string]any{}}, + {Kind: model.NodeClass, FilePath: "x.js", Properties: map[string]any{"framework": "react"}}, + {Kind: model.NodeConfigFile, Properties: map[string]any{}}, + {Kind: model.NodeClass, FilePath: "myapp/src/main/java/com/Greeter.java", Properties: map[string]any{}}, + {Kind: model.NodeClass, FilePath: "random/path.txt", ID: "z", Properties: map[string]any{}}, + } + } + a := build() + b := build() + lc.Classify(a) + lc.Classify(b) + for i := range a { + if a[i].Layer != b[i].Layer { + t.Fatalf("non-deterministic Layer at index %d: %s vs %s", i, a[i].Layer, b[i].Layer) + } + } +} diff --git a/go/internal/analyzer/linker/determinism_test.go b/go/internal/analyzer/linker/determinism_test.go new file mode 100644 index 00000000..8af6481e --- /dev/null +++ b/go/internal/analyzer/linker/determinism_test.go @@ -0,0 +1,75 @@ +package linker + +import ( + "math/rand" + "reflect" + "testing" + + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// TestLinkerDeterminism_ShuffledInput — Plan §1.6. +// Same set of nodes/edges in two different orders must produce identical +// sorted output through Sorted(). +func TestLinkerDeterminism_ShuffledInput(t *testing.T) { + build := func(seed int64) Result { + nodes := []*model.CodeNode{ + model.NewCodeNode("c", model.NodeClass, "c"), + model.NewCodeNode("a", model.NodeClass, "a"), + model.NewCodeNode("b", model.NodeClass, "b"), + } + edges := []*model.CodeEdge{ + model.NewCodeEdge("e3", model.EdgeCalls, "c", "a"), + model.NewCodeEdge("e1", model.EdgeCalls, "a", "b"), + model.NewCodeEdge("e2", model.EdgeCalls, "b", "c"), + } + r := rand.New(rand.NewSource(seed)) + r.Shuffle(len(nodes), func(i, j int) { nodes[i], nodes[j] = nodes[j], nodes[i] }) + r.Shuffle(len(edges), func(i, j int) { edges[i], edges[j] = edges[j], edges[i] }) + return Result{Nodes: nodes, Edges: edges}.Sorted() + } + + r1 := build(1) + r2 := build(2) + if !sameNodeIDs(r1.Nodes, r2.Nodes) { + t.Errorf("node order non-deterministic: %v vs %v", nodeIDs(r1.Nodes), nodeIDs(r2.Nodes)) + } + if !sameEdgeIDs(r1.Edges, r2.Edges) { + t.Errorf("edge order non-deterministic") + } + if !reflect.DeepEqual(nodeIDs(r1.Nodes), []string{"a", "b", "c"}) { + t.Errorf("sort order wrong: %v", nodeIDs(r1.Nodes)) + } +} + +func nodeIDs(ns []*model.CodeNode) []string { + out := make([]string, len(ns)) + for i, n := range ns { + out[i] = n.ID + } + return out +} + +func sameNodeIDs(a, b []*model.CodeNode) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i].ID != b[i].ID { + return false + } + } + return true +} + +func sameEdgeIDs(a, b []*model.CodeEdge) bool { + if len(a) != len(b) { + return false + } + for i := range a { + if a[i].ID != b[i].ID { + return false + } + } + return true +} diff --git a/go/internal/analyzer/linker/entity_linker.go b/go/internal/analyzer/linker/entity_linker.go new file mode 100644 index 00000000..9e599675 --- /dev/null +++ b/go/internal/analyzer/linker/entity_linker.go @@ -0,0 +1,95 @@ +package linker + +import ( + "fmt" + "sort" + "strings" + + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// repoSuffixes is the ordered list of suffixes matched on REPOSITORY labels. +// First match wins, so the order matters: `Repository` before `Repo` so that +// `UserRepository` strips → `User` (not `UserRepository` minus `Repo` → +// `UserRepository`). +var repoSuffixes = []string{"Repository", "Repo", "Dao", "DAO"} + +// EntityLinker emits QUERIES edges from REPOSITORY nodes to the ENTITY nodes +// they manage, matched by naming convention (e.g. `UserRepository` → +// `User`, `OrderDao` → `Order`). +// +// Mirrors src/main/java/io/github/randomcodespace/iq/analyzer/linker/EntityLinker.java +// (lines 33-98). +type EntityLinker struct{} + +// NewEntityLinker returns a stateless linker. +func NewEntityLinker() *EntityLinker { return &EntityLinker{} } + +// Link iterates repositories and matches them to entities by simple-name +// (case-insensitive) after stripping the longest recognised suffix. Skips +// repositories that already have an outbound QUERIES edge to the candidate +// entity to avoid duplicates with what detectors emitted. +func (l *EntityLinker) Link(nodes []*model.CodeNode, edges []*model.CodeEdge) Result { + var entities, repositories []*model.CodeNode + for _, n := range nodes { + switch n.Kind { + case model.NodeEntity: + entities = append(entities, n) + case model.NodeRepository: + repositories = append(repositories, n) + } + } + if len(entities) == 0 || len(repositories) == 0 { + return Result{} + } + + entityByName := make(map[string]*model.CodeNode) + for _, e := range entities { + entityByName[strings.ToLower(e.Label)] = e + if e.FQN != "" { + simple := e.FQN + if idx := strings.LastIndex(simple, "."); idx >= 0 { + simple = simple[idx+1:] + } + entityByName[strings.ToLower(simple)] = e + } + } + + existing := map[string]struct{}{} + for _, e := range edges { + if e.Kind == model.EdgeQueries { + existing[e.SourceID+"->"+e.TargetID] = struct{}{} + } + } + + // Iterate repositories in ID order for determinism (Java side relies on + // the GraphBuilder snapshot already being sorted; we don't, so sort here). + sort.Slice(repositories, func(i, j int) bool { return repositories[i].ID < repositories[j].ID }) + + var newEdges []*model.CodeEdge + for _, repo := range repositories { + for _, suf := range repoSuffixes { + if !strings.HasSuffix(repo.Label, suf) { + continue + } + base := strings.ToLower(repo.Label[:len(repo.Label)-len(suf)]) + ent, ok := entityByName[base] + if !ok { + break // first matching suffix wins, even if entity missing + } + key := repo.ID + "->" + ent.ID + if _, dup := existing[key]; dup { + break + } + newEdges = append(newEdges, &model.CodeEdge{ + ID: fmt.Sprintf("entity-link:%s->%s", repo.ID, ent.ID), + Kind: model.EdgeQueries, + SourceID: repo.ID, + TargetID: ent.ID, + Properties: map[string]any{"inferred": true}, + }) + break + } + } + return Result{Edges: newEdges} +} diff --git a/go/internal/analyzer/linker/entity_linker_test.go b/go/internal/analyzer/linker/entity_linker_test.go new file mode 100644 index 00000000..bae744a3 --- /dev/null +++ b/go/internal/analyzer/linker/entity_linker_test.go @@ -0,0 +1,136 @@ +package linker_test + +import ( + "testing" + + "github.com/randomcodespace/codeiq/go/internal/analyzer/linker" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +func TestEntityLinkerMatchesUserRepositoryToUser(t *testing.T) { + nodes := []*model.CodeNode{ + {ID: "entity:User", Kind: model.NodeEntity, Label: "User"}, + {ID: "repo:UserRepository", Kind: model.NodeRepository, Label: "UserRepository"}, + } + r := linker.NewEntityLinker().Link(nodes, nil) + if len(r.Edges) != 1 { + t.Fatalf("want 1 edge, got %d", len(r.Edges)) + } + got := r.Edges[0] + if got.Kind != model.EdgeQueries { + t.Fatalf("want QUERIES kind, got %s", got.Kind) + } + if got.SourceID != "repo:UserRepository" || got.TargetID != "entity:User" { + t.Fatalf("bad source/target: %s -> %s", got.SourceID, got.TargetID) + } + if got.ID != "entity-link:repo:UserRepository->entity:User" { + t.Fatalf("bad id: %q", got.ID) + } + if got.Properties["inferred"] != true { + t.Fatalf("missing inferred=true") + } +} + +func TestEntityLinkerSupportsAllSuffixVariants(t *testing.T) { + cases := []struct { + repoLabel string + entityID string + }{ + {"OrderRepository", "entity:Order"}, + {"ItemRepo", "entity:Item"}, + {"ProductDao", "entity:Product"}, + {"CustomerDAO", "entity:Customer"}, + } + nodes := []*model.CodeNode{ + {ID: "entity:Order", Kind: model.NodeEntity, Label: "Order"}, + {ID: "entity:Item", Kind: model.NodeEntity, Label: "Item"}, + {ID: "entity:Product", Kind: model.NodeEntity, Label: "Product"}, + {ID: "entity:Customer", Kind: model.NodeEntity, Label: "Customer"}, + } + for _, c := range cases { + repo := &model.CodeNode{ID: "repo:" + c.repoLabel, Kind: model.NodeRepository, Label: c.repoLabel} + all := append([]*model.CodeNode{}, nodes...) + all = append(all, repo) + r := linker.NewEntityLinker().Link(all, nil) + if len(r.Edges) != 1 { + t.Fatalf("suffix %q: want 1 edge, got %d", c.repoLabel, len(r.Edges)) + } + if r.Edges[0].TargetID != c.entityID { + t.Fatalf("suffix %q: want target %s, got %s", c.repoLabel, c.entityID, r.Edges[0].TargetID) + } + } +} + +func TestEntityLinkerSkipsWhenQueriesEdgeAlreadyExists(t *testing.T) { + nodes := []*model.CodeNode{ + {ID: "entity:User", Kind: model.NodeEntity, Label: "User"}, + {ID: "repo:UserRepository", Kind: model.NodeRepository, Label: "UserRepository"}, + } + edges := []*model.CodeEdge{ + {ID: "existing", Kind: model.EdgeQueries, SourceID: "repo:UserRepository", TargetID: "entity:User"}, + } + r := linker.NewEntityLinker().Link(nodes, edges) + if len(r.Edges) != 0 { + t.Fatalf("want 0 edges (existing QUERIES suppresses), got %d", len(r.Edges)) + } +} + +func TestEntityLinkerSkipsUnrecognisedSuffix(t *testing.T) { + nodes := []*model.CodeNode{ + {ID: "entity:User", Kind: model.NodeEntity, Label: "User"}, + {ID: "svc:UserService", Kind: model.NodeRepository, Label: "UserService"}, + } + r := linker.NewEntityLinker().Link(nodes, nil) + if len(r.Edges) != 0 { + t.Fatalf("want 0 edges (no recognised suffix), got %d", len(r.Edges)) + } +} + +func TestEntityLinkerSkipsWhenEntityMissing(t *testing.T) { + nodes := []*model.CodeNode{ + {ID: "repo:UserRepository", Kind: model.NodeRepository, Label: "UserRepository"}, + } + r := linker.NewEntityLinker().Link(nodes, nil) + if len(r.Edges) != 0 { + t.Fatalf("want 0 edges (no entity), got %d", len(r.Edges)) + } +} + +func TestEntityLinkerCaseInsensitiveMatch(t *testing.T) { + // Repository label suffix is stripped, then lower-cased; entity is keyed + // by lower-cased label. So `userrepository` strips → `user` → matches + // `User`. + nodes := []*model.CodeNode{ + {ID: "entity:User", Kind: model.NodeEntity, Label: "User"}, + {ID: "repo:userRepository", Kind: model.NodeRepository, Label: "userRepository"}, + } + r := linker.NewEntityLinker().Link(nodes, nil) + if len(r.Edges) != 1 { + t.Fatalf("want 1 edge (case-insensitive), got %d", len(r.Edges)) + } +} + +func TestEntityLinkerMatchesByFQNSimpleName(t *testing.T) { + // Entity has FQN; repository label matches the simple name from the FQN. + nodes := []*model.CodeNode{ + {ID: "entity:com.acme.User", Kind: model.NodeEntity, Label: "User", FQN: "com.acme.User"}, + {ID: "repo:UserRepository", Kind: model.NodeRepository, Label: "UserRepository"}, + } + r := linker.NewEntityLinker().Link(nodes, nil) + if len(r.Edges) != 1 { + t.Fatalf("want 1 edge (FQN simple-name match), got %d", len(r.Edges)) + } +} + +func TestEntityLinkerOnlyFirstSuffixWins(t *testing.T) { + // "UserRepo" — `Repo` matches before `Dao`/`DAO`. Make sure we don't + // emit duplicate edges by also trying later suffixes. + nodes := []*model.CodeNode{ + {ID: "entity:User", Kind: model.NodeEntity, Label: "User"}, + {ID: "repo:UserRepo", Kind: model.NodeRepository, Label: "UserRepo"}, + } + r := linker.NewEntityLinker().Link(nodes, nil) + if len(r.Edges) != 1 { + t.Fatalf("want exactly 1 edge (first suffix wins), got %d", len(r.Edges)) + } +} diff --git a/go/internal/analyzer/linker/linker.go b/go/internal/analyzer/linker/linker.go new file mode 100644 index 00000000..5501f492 --- /dev/null +++ b/go/internal/analyzer/linker/linker.go @@ -0,0 +1,36 @@ +// Package linker contains cross-file enrichers that run after detectors during +// `codeiq enrich`. Linkers walk the deterministic GraphBuilder snapshot and +// emit additional nodes/edges that span files (e.g. producer→consumer links +// via a shared topic, repository→entity QUERIES edges). +// +// Mirrors src/main/java/io/github/randomcodespace/iq/analyzer/linker/. +package linker + +import ( + "sort" + + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// Result is the bag of new nodes + edges a linker contributes. +type Result struct { + Nodes []*model.CodeNode + Edges []*model.CodeEdge +} + +// Sorted returns r with Nodes and Edges sorted by ID. Plan §1.4 — a +// defensive wrapper applied at the linker boundary so a future linker +// change can't re-introduce drift even if its internal map-iteration +// order shifts. +func (r Result) Sorted() Result { + sort.SliceStable(r.Nodes, func(i, j int) bool { return r.Nodes[i].ID < r.Nodes[j].ID }) + sort.SliceStable(r.Edges, func(i, j int) bool { return r.Edges[i].ID < r.Edges[j].ID }) + return r +} + +// Linker mirrors the Java Linker interface. Implementations MUST be +// deterministic — same input slices in must produce identical output every +// time (sort any map iteration before emitting). +type Linker interface { + Link(nodes []*model.CodeNode, edges []*model.CodeEdge) Result +} diff --git a/go/internal/analyzer/linker/module_containment_linker.go b/go/internal/analyzer/linker/module_containment_linker.go new file mode 100644 index 00000000..7143504f --- /dev/null +++ b/go/internal/analyzer/linker/module_containment_linker.go @@ -0,0 +1,90 @@ +package linker + +import ( + "fmt" + "sort" + + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// ModuleContainmentLinker groups nodes by their Module field and emits MODULE +// nodes plus CONTAINS edges pointing at each member. +// +// Mirrors src/main/java/io/github/randomcodespace/iq/analyzer/linker/ModuleContainmentLinker.java +// (lines 30-97). MODULE-kind nodes are excluded from membership grouping so a +// module never contains itself; duplicate CONTAINS edges are suppressed. +type ModuleContainmentLinker struct{} + +// NewModuleContainmentLinker returns a stateless linker. +func NewModuleContainmentLinker() *ModuleContainmentLinker { + return &ModuleContainmentLinker{} +} + +// Link emits the new MODULE nodes and CONTAINS edges. Modules iterate in +// alphabetical order; members within a module iterate in ID order — making +// the output stable across runs. +func (l *ModuleContainmentLinker) Link(nodes []*model.CodeNode, edges []*model.CodeEdge) Result { + existingModules := map[string]struct{}{} + for _, n := range nodes { + if n.Kind == model.NodeModule { + existingModules[n.ID] = struct{}{} + } + } + + byModule := map[string][]*model.CodeNode{} + for _, n := range nodes { + if n.Kind == model.NodeModule || n.Module == "" { + continue + } + byModule[n.Module] = append(byModule[n.Module], n) + } + if len(byModule) == 0 { + return Result{} + } + + existingContains := map[string]struct{}{} + for _, e := range edges { + if e.Kind == model.EdgeContains { + existingContains[e.SourceID+"->"+e.TargetID] = struct{}{} + } + } + + moduleNames := make([]string, 0, len(byModule)) + for m := range byModule { + moduleNames = append(moduleNames, m) + } + sort.Strings(moduleNames) + + var newNodes []*model.CodeNode + var newEdges []*model.CodeEdge + for _, m := range moduleNames { + moduleID := "module:" + m + if _, ok := existingModules[moduleID]; !ok { + newNodes = append(newNodes, &model.CodeNode{ + ID: moduleID, + Kind: model.NodeModule, + Label: m, + FQN: m, + Module: m, + }) + existingModules[moduleID] = struct{}{} + } + members := byModule[m] + sort.Slice(members, func(i, j int) bool { return members[i].ID < members[j].ID }) + for _, mem := range members { + key := moduleID + "->" + mem.ID + if _, ok := existingContains[key]; ok { + continue + } + newEdges = append(newEdges, &model.CodeEdge{ + ID: fmt.Sprintf("module-link:%s->%s", moduleID, mem.ID), + Kind: model.EdgeContains, + SourceID: moduleID, + TargetID: mem.ID, + Properties: map[string]any{"inferred": true}, + }) + existingContains[key] = struct{}{} + } + } + return Result{Nodes: newNodes, Edges: newEdges} +} diff --git a/go/internal/analyzer/linker/module_containment_linker_test.go b/go/internal/analyzer/linker/module_containment_linker_test.go new file mode 100644 index 00000000..12d2ff26 --- /dev/null +++ b/go/internal/analyzer/linker/module_containment_linker_test.go @@ -0,0 +1,163 @@ +package linker_test + +import ( + "sort" + "testing" + + "github.com/randomcodespace/codeiq/go/internal/analyzer/linker" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +func TestModuleContainmentLinkerCreatesModuleNodeAndContainsEdges(t *testing.T) { + nodes := []*model.CodeNode{ + {ID: "class:A", Kind: model.NodeClass, Label: "A", Module: "com.acme.core"}, + {ID: "class:B", Kind: model.NodeClass, Label: "B", Module: "com.acme.core"}, + } + r := linker.NewModuleContainmentLinker().Link(nodes, nil) + if len(r.Nodes) != 1 { + t.Fatalf("want 1 new module node, got %d", len(r.Nodes)) + } + mod := r.Nodes[0] + if mod.ID != "module:com.acme.core" || mod.Kind != model.NodeModule { + t.Fatalf("bad module node: %+v", mod) + } + if mod.Label != "com.acme.core" || mod.FQN != "com.acme.core" || mod.Module != "com.acme.core" { + t.Fatalf("module name fields not set: label=%q fqn=%q module=%q", mod.Label, mod.FQN, mod.Module) + } + if len(r.Edges) != 2 { + t.Fatalf("want 2 CONTAINS edges, got %d", len(r.Edges)) + } + for _, e := range r.Edges { + if e.Kind != model.EdgeContains { + t.Fatalf("want CONTAINS, got %s", e.Kind) + } + if e.SourceID != "module:com.acme.core" { + t.Fatalf("bad source: %s", e.SourceID) + } + if e.Properties["inferred"] != true { + t.Fatalf("missing inferred=true") + } + } +} + +func TestModuleContainmentLinkerReusesExistingModuleNode(t *testing.T) { + nodes := []*model.CodeNode{ + {ID: "module:com.acme.core", Kind: model.NodeModule, Label: "com.acme.core"}, + {ID: "class:A", Kind: model.NodeClass, Label: "A", Module: "com.acme.core"}, + } + r := linker.NewModuleContainmentLinker().Link(nodes, nil) + if len(r.Nodes) != 0 { + t.Fatalf("want 0 new module nodes (existing reused), got %d", len(r.Nodes)) + } + if len(r.Edges) != 1 { + t.Fatalf("want 1 CONTAINS edge, got %d", len(r.Edges)) + } + if r.Edges[0].SourceID != "module:com.acme.core" || r.Edges[0].TargetID != "class:A" { + t.Fatalf("bad edge: %+v", r.Edges[0]) + } +} + +func TestModuleContainmentLinkerSkipsExistingContainsEdge(t *testing.T) { + nodes := []*model.CodeNode{ + {ID: "module:com.acme.core", Kind: model.NodeModule, Label: "com.acme.core"}, + {ID: "class:A", Kind: model.NodeClass, Label: "A", Module: "com.acme.core"}, + } + edges := []*model.CodeEdge{ + {ID: "pre", Kind: model.EdgeContains, SourceID: "module:com.acme.core", TargetID: "class:A"}, + } + r := linker.NewModuleContainmentLinker().Link(nodes, edges) + if len(r.Edges) != 0 { + t.Fatalf("want 0 new edges (duplicate suppressed), got %d", len(r.Edges)) + } +} + +func TestModuleContainmentLinkerSkipsNodesWithEmptyModule(t *testing.T) { + nodes := []*model.CodeNode{ + {ID: "class:A", Kind: model.NodeClass, Label: "A"}, + {ID: "class:B", Kind: model.NodeClass, Label: "B", Module: ""}, + } + r := linker.NewModuleContainmentLinker().Link(nodes, nil) + if len(r.Nodes) != 0 || len(r.Edges) != 0 { + t.Fatalf("want empty result for nodes with empty module, got %d nodes, %d edges", len(r.Nodes), len(r.Edges)) + } +} + +func TestModuleContainmentLinkerSkipsModuleKindNodesWithSelfModule(t *testing.T) { + // MODULE-kind nodes are excluded from membership grouping even if their + // own Module field is set — they can't contain themselves. + nodes := []*model.CodeNode{ + {ID: "module:com.acme.core", Kind: model.NodeModule, Label: "com.acme.core", Module: "com.acme.core"}, + } + r := linker.NewModuleContainmentLinker().Link(nodes, nil) + if len(r.Nodes) != 0 || len(r.Edges) != 0 { + t.Fatalf("want empty result; module shouldn't contain itself, got %d nodes, %d edges", len(r.Nodes), len(r.Edges)) + } +} + +func TestModuleContainmentLinkerDeterministic(t *testing.T) { + nodes := []*model.CodeNode{ + {ID: "class:Z", Kind: model.NodeClass, Label: "Z", Module: "mod.b"}, + {ID: "class:A", Kind: model.NodeClass, Label: "A", Module: "mod.a"}, + {ID: "class:M", Kind: model.NodeClass, Label: "M", Module: "mod.a"}, + {ID: "class:N", Kind: model.NodeClass, Label: "N", Module: "mod.b"}, + } + var firstNodeIDs, firstEdgeIDs []string + for i := 0; i < 5; i++ { + r := linker.NewModuleContainmentLinker().Link(nodes, nil) + + nIDs := make([]string, 0, len(r.Nodes)) + for _, n := range r.Nodes { + nIDs = append(nIDs, n.ID) + } + sort.Strings(nIDs) + + eIDs := make([]string, 0, len(r.Edges)) + for _, e := range r.Edges { + eIDs = append(eIDs, e.ID) + } + sort.Strings(eIDs) + + if firstNodeIDs == nil { + firstNodeIDs = nIDs + firstEdgeIDs = eIDs + continue + } + if len(firstNodeIDs) != len(nIDs) || len(firstEdgeIDs) != len(eIDs) { + t.Fatalf("non-deterministic count") + } + for j := range nIDs { + if firstNodeIDs[j] != nIDs[j] { + t.Fatalf("non-deterministic node ids") + } + } + for j := range eIDs { + if firstEdgeIDs[j] != eIDs[j] { + t.Fatalf("non-deterministic edge ids") + } + } + } +} + +func TestModuleContainmentLinkerEmitsEdgesInModuleThenMemberOrder(t *testing.T) { + // Spec from the plan: emit CONTAINS edges sorted by module then by + // member ID. So `mod.a` members (sorted) come before `mod.b` members. + nodes := []*model.CodeNode{ + {ID: "class:b_member", Kind: model.NodeClass, Label: "b_member", Module: "mod.b"}, + {ID: "class:a_member", Kind: model.NodeClass, Label: "a_member", Module: "mod.a"}, + {ID: "class:a_member2", Kind: model.NodeClass, Label: "a_member2", Module: "mod.a"}, + } + r := linker.NewModuleContainmentLinker().Link(nodes, nil) + if len(r.Edges) != 3 { + t.Fatalf("want 3 edges, got %d", len(r.Edges)) + } + wantOrder := []string{ + "module-link:module:mod.a->class:a_member", + "module-link:module:mod.a->class:a_member2", + "module-link:module:mod.b->class:b_member", + } + for i, e := range r.Edges { + if e.ID != wantOrder[i] { + t.Fatalf("edge[%d]: want %q, got %q", i, wantOrder[i], e.ID) + } + } +} diff --git a/go/internal/analyzer/linker/topic_linker.go b/go/internal/analyzer/linker/topic_linker.go new file mode 100644 index 00000000..4513fe4d --- /dev/null +++ b/go/internal/analyzer/linker/topic_linker.go @@ -0,0 +1,115 @@ +package linker + +import ( + "fmt" + "sort" + + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// TopicLinker pairs messaging producers with consumers that share a +// topic/queue/event/message-queue node, emitting direct CALLS edges. +// +// Mirrors src/main/java/io/github/randomcodespace/iq/analyzer/linker/TopicLinker.java +// (lines 34-115). Supports Kafka, RabbitMQ, TIBCO EMS, IBM MQ, Azure Service +// Bus, Spring application events, and other enterprise messaging patterns. +type TopicLinker struct{} + +// NewTopicLinker returns a stateless linker. +func NewTopicLinker() *TopicLinker { return &TopicLinker{} } + +var ( + producerEdgeKinds = map[model.EdgeKind]struct{}{ + model.EdgeProduces: {}, + model.EdgeSendsTo: {}, + model.EdgePublishes: {}, + } + consumerEdgeKinds = map[model.EdgeKind]struct{}{ + model.EdgeConsumes: {}, + model.EdgeReceivesFrom: {}, + model.EdgeListens: {}, + } + topicNodeKinds = map[model.NodeKind]struct{}{ + model.NodeTopic: {}, + model.NodeQueue: {}, + model.NodeEvent: {}, + model.NodeMessageQueue: {}, + } +) + +// Link scans nodes for topic-like kinds and edges for producer/consumer kinds, +// then emits a CALLS edge from each producer to each non-self consumer that +// share a topic label. +func (l *TopicLinker) Link(nodes []*model.CodeNode, edges []*model.CodeEdge) Result { + topicIDsByLabel := make(map[string][]string) + for _, n := range nodes { + if _, ok := topicNodeKinds[n.Kind]; ok { + topicIDsByLabel[n.Label] = append(topicIDsByLabel[n.Label], n.ID) + } + } + if len(topicIDsByLabel) == 0 { + return Result{} + } + + producersByTopic := map[string][]string{} + consumersByTopic := map[string][]string{} + for _, e := range edges { + if _, ok := producerEdgeKinds[e.Kind]; ok { + producersByTopic[e.TargetID] = append(producersByTopic[e.TargetID], e.SourceID) + } else if _, ok := consumerEdgeKinds[e.Kind]; ok { + consumersByTopic[e.TargetID] = append(consumersByTopic[e.TargetID], e.SourceID) + } + } + + // Deterministic iteration: walk labels alphabetically. + labels := make([]string, 0, len(topicIDsByLabel)) + for k := range topicIDsByLabel { + labels = append(labels, k) + } + sort.Strings(labels) + + var newEdges []*model.CodeEdge + for _, label := range labels { + topicIDs := topicIDsByLabel[label] + prodSet := map[string]struct{}{} + consSet := map[string]struct{}{} + for _, tid := range topicIDs { + for _, p := range producersByTopic[tid] { + prodSet[p] = struct{}{} + } + for _, c := range consumersByTopic[tid] { + consSet[c] = struct{}{} + } + } + prods := sortedKeys(prodSet) + cons := sortedKeys(consSet) + for _, p := range prods { + for _, c := range cons { + if p == c { + continue + } + newEdges = append(newEdges, &model.CodeEdge{ + ID: fmt.Sprintf("topic-link:%s->%s", p, c), + Kind: model.EdgeCalls, + SourceID: p, + TargetID: c, + Properties: map[string]any{ + "inferred": true, + "topic": label, + }, + }) + } + } + } + return Result{Edges: newEdges} +} + +// sortedKeys returns the keys of a string set in ascending order. +func sortedKeys(m map[string]struct{}) []string { + out := make([]string, 0, len(m)) + for k := range m { + out = append(out, k) + } + sort.Strings(out) + return out +} diff --git a/go/internal/analyzer/linker/topic_linker_test.go b/go/internal/analyzer/linker/topic_linker_test.go new file mode 100644 index 00000000..b37881ed --- /dev/null +++ b/go/internal/analyzer/linker/topic_linker_test.go @@ -0,0 +1,140 @@ +package linker_test + +import ( + "sort" + "testing" + + "github.com/randomcodespace/codeiq/go/internal/analyzer/linker" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +func TestTopicLinkerPairsProducerToConsumer(t *testing.T) { + nodes := []*model.CodeNode{ + {ID: "topic:orders", Kind: model.NodeTopic, Label: "orders"}, + {ID: "svc:checkout", Kind: model.NodeService, Label: "checkout"}, + {ID: "svc:fulfilment", Kind: model.NodeService, Label: "fulfilment"}, + } + edges := []*model.CodeEdge{ + {ID: "p1", Kind: model.EdgeProduces, SourceID: "svc:checkout", TargetID: "topic:orders"}, + {ID: "c1", Kind: model.EdgeConsumes, SourceID: "svc:fulfilment", TargetID: "topic:orders"}, + } + r := linker.NewTopicLinker().Link(nodes, edges) + if len(r.Edges) != 1 { + t.Fatalf("want 1 edge, got %d", len(r.Edges)) + } + got := r.Edges[0] + if got.SourceID != "svc:checkout" || got.TargetID != "svc:fulfilment" || got.Kind != model.EdgeCalls { + t.Fatalf("bad edge: %+v", got) + } + if got.ID != "topic-link:svc:checkout->svc:fulfilment" { + t.Fatalf("bad id: %q", got.ID) + } + if got.Properties["inferred"] != true { + t.Fatalf("missing inferred=true") + } + if got.Properties["topic"] != "orders" { + t.Fatalf("missing topic=orders, got %v", got.Properties["topic"]) + } +} + +func TestTopicLinkerDeterministic(t *testing.T) { + nodes := []*model.CodeNode{ + {ID: "topic:t1", Kind: model.NodeTopic, Label: "t1"}, + {ID: "p1", Kind: model.NodeService, Label: "p1"}, + {ID: "c1", Kind: model.NodeService, Label: "c1"}, + {ID: "c2", Kind: model.NodeService, Label: "c2"}, + } + edges := []*model.CodeEdge{ + {ID: "e1", Kind: model.EdgeProduces, SourceID: "p1", TargetID: "topic:t1"}, + {ID: "e2", Kind: model.EdgeConsumes, SourceID: "c1", TargetID: "topic:t1"}, + {ID: "e3", Kind: model.EdgeConsumes, SourceID: "c2", TargetID: "topic:t1"}, + } + var firstIDs []string + for i := 0; i < 5; i++ { + r := linker.NewTopicLinker().Link(nodes, edges) + ids := make([]string, 0, len(r.Edges)) + for _, e := range r.Edges { + ids = append(ids, e.ID) + } + sort.Strings(ids) + if firstIDs == nil { + firstIDs = ids + } else if len(firstIDs) != len(ids) { + t.Fatalf("non-deterministic count") + } else { + for j := range ids { + if firstIDs[j] != ids[j] { + t.Fatalf("non-deterministic ids") + } + } + } + } +} + +func TestTopicLinkerSupportsAllProducerConsumerKinds(t *testing.T) { + nodes := []*model.CodeNode{ + {ID: "topic:q1", Kind: model.NodeQueue, Label: "q1"}, + {ID: "topic:e1", Kind: model.NodeEvent, Label: "e1"}, + {ID: "topic:m1", Kind: model.NodeMessageQueue, Label: "m1"}, + {ID: "p1", Kind: model.NodeService, Label: "p1"}, + {ID: "p2", Kind: model.NodeService, Label: "p2"}, + {ID: "p3", Kind: model.NodeService, Label: "p3"}, + {ID: "c1", Kind: model.NodeService, Label: "c1"}, + {ID: "c2", Kind: model.NodeService, Label: "c2"}, + {ID: "c3", Kind: model.NodeService, Label: "c3"}, + } + edges := []*model.CodeEdge{ + {ID: "e1", Kind: model.EdgeSendsTo, SourceID: "p1", TargetID: "topic:q1"}, + {ID: "e2", Kind: model.EdgeReceivesFrom, SourceID: "c1", TargetID: "topic:q1"}, + {ID: "e3", Kind: model.EdgePublishes, SourceID: "p2", TargetID: "topic:e1"}, + {ID: "e4", Kind: model.EdgeListens, SourceID: "c2", TargetID: "topic:e1"}, + {ID: "e5", Kind: model.EdgeProduces, SourceID: "p3", TargetID: "topic:m1"}, + {ID: "e6", Kind: model.EdgeConsumes, SourceID: "c3", TargetID: "topic:m1"}, + } + r := linker.NewTopicLinker().Link(nodes, edges) + if len(r.Edges) != 3 { + t.Fatalf("want 3 edges (one per topic), got %d", len(r.Edges)) + } +} + +func TestTopicLinkerSkipsSelfLoops(t *testing.T) { + nodes := []*model.CodeNode{ + {ID: "topic:t1", Kind: model.NodeTopic, Label: "t1"}, + {ID: "svc:a", Kind: model.NodeService, Label: "a"}, + } + edges := []*model.CodeEdge{ + {ID: "p", Kind: model.EdgeProduces, SourceID: "svc:a", TargetID: "topic:t1"}, + {ID: "c", Kind: model.EdgeConsumes, SourceID: "svc:a", TargetID: "topic:t1"}, + } + r := linker.NewTopicLinker().Link(nodes, edges) + if len(r.Edges) != 0 { + t.Fatalf("want 0 edges (self-loop suppressed), got %d", len(r.Edges)) + } +} + +func TestTopicLinkerNoTopicsReturnsEmpty(t *testing.T) { + nodes := []*model.CodeNode{{ID: "svc:a", Kind: model.NodeService, Label: "a"}} + r := linker.NewTopicLinker().Link(nodes, nil) + if len(r.Edges) != 0 || len(r.Nodes) != 0 { + t.Fatalf("expected empty result") + } +} + +func TestTopicLinkerMergesTopicsBySharedLabel(t *testing.T) { + // Two topic nodes with the same label (e.g. defined in two files) should + // be merged: producer on one node, consumer on the other, must still link. + nodes := []*model.CodeNode{ + {ID: "topic:a:orders", Kind: model.NodeTopic, Label: "orders"}, + {ID: "topic:b:orders", Kind: model.NodeTopic, Label: "orders"}, + {ID: "svc:p", Kind: model.NodeService, Label: "p"}, + {ID: "svc:c", Kind: model.NodeService, Label: "c"}, + } + edges := []*model.CodeEdge{ + {ID: "p", Kind: model.EdgeProduces, SourceID: "svc:p", TargetID: "topic:a:orders"}, + {ID: "c", Kind: model.EdgeConsumes, SourceID: "svc:c", TargetID: "topic:b:orders"}, + } + r := linker.NewTopicLinker().Link(nodes, edges) + if len(r.Edges) != 1 { + t.Fatalf("want 1 edge after label merge, got %d", len(r.Edges)) + } +} diff --git a/go/internal/analyzer/merger.go b/go/internal/analyzer/merger.go new file mode 100644 index 00000000..3b0d41b1 --- /dev/null +++ b/go/internal/analyzer/merger.go @@ -0,0 +1,135 @@ +package analyzer + +import ( + "sort" + + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// mergeNode merges incoming into existing, picking the higher-confidence +// node as the survivor, then filling gaps and unioning properties / +// annotations. Returns the survivor (which is mutated in place). +// +// Plan §1.1 — semantics: +// - Higher Confidence wins; ties keep existing. +// - Non-empty FQN / Module / FilePath / LineStart / LineEnd / Layer +// fill in from whichever side has them. +// - Properties: incoming wins per-key only when existing's value is nil +// or missing (do not clobber framework/auth_type already stamped by a +// higher-confidence detector). +// - Annotations are unioned and sorted for determinism. +func mergeNode(existing, incoming *model.CodeNode) *model.CodeNode { + if existing == nil { + return incoming + } + if incoming == nil { + return existing + } + + survivor := existing + donor := incoming + if incoming.Confidence > existing.Confidence { + survivor = incoming + donor = existing + } + + // Gap-fill scalar fields from the donor when the survivor has none. + if survivor.FQN == "" && donor.FQN != "" { + survivor.FQN = donor.FQN + } + if survivor.Module == "" && donor.Module != "" { + survivor.Module = donor.Module + } + if survivor.FilePath == "" && donor.FilePath != "" { + survivor.FilePath = donor.FilePath + } + if survivor.LineStart == 0 && donor.LineStart != 0 { + survivor.LineStart = donor.LineStart + } + if survivor.LineEnd == 0 && donor.LineEnd != 0 { + survivor.LineEnd = donor.LineEnd + } + if survivor.Layer == model.LayerUnknown && donor.Layer != model.LayerUnknown { + survivor.Layer = donor.Layer + } + if survivor.Source == "" && donor.Source != "" { + survivor.Source = donor.Source + } + + // Property union: donor fills missing keys; never clobbers existing. + if survivor.Properties == nil { + survivor.Properties = map[string]any{} + } + for k, v := range donor.Properties { + if _, exists := survivor.Properties[k]; exists { + continue + } + survivor.Properties[k] = v + } + + // Annotation union — dedup + sort for determinism. + survivor.Annotations = unionSorted(survivor.Annotations, donor.Annotations) + + return survivor +} + +// mergeEdge merges two edges with the same EdgeKey (src, tgt, kind). +// Higher-confidence wins; ties keep existing. Properties unioned with +// non-clobber semantics. +func mergeEdge(existing, incoming *model.CodeEdge) *model.CodeEdge { + if existing == nil { + return incoming + } + if incoming == nil { + return existing + } + + survivor := existing + donor := incoming + if incoming.Confidence > existing.Confidence { + survivor = incoming + donor = existing + } + if survivor.Source == "" && donor.Source != "" { + survivor.Source = donor.Source + } + if survivor.Properties == nil { + survivor.Properties = map[string]any{} + } + for k, v := range donor.Properties { + if _, exists := survivor.Properties[k]; exists { + continue + } + survivor.Properties[k] = v + } + return survivor +} + +func unionSorted(a, b []string) []string { + seen := make(map[string]struct{}, len(a)+len(b)) + for _, s := range a { + seen[s] = struct{}{} + } + for _, s := range b { + seen[s] = struct{}{} + } + out := make([]string, 0, len(seen)) + for s := range seen { + out = append(out, s) + } + sort.Strings(out) + return out +} + +// edgeKey is the canonical key used to dedupe edges. Two edges with the +// same (source, target, kind) are considered the same edge regardless of +// detector-assigned ID strings. +type edgeKey struct { + source string + target string + kind model.EdgeKind +} + +func makeEdgeKey(e *model.CodeEdge) edgeKey { + return edgeKey{source: e.SourceID, target: e.TargetID, kind: e.Kind} +} diff --git a/go/internal/analyzer/merger_test.go b/go/internal/analyzer/merger_test.go new file mode 100644 index 00000000..6998344d --- /dev/null +++ b/go/internal/analyzer/merger_test.go @@ -0,0 +1,206 @@ +package analyzer + +import ( + "reflect" + "sort" + "testing" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// TestGraphBuilderDedup_HigherConfidenceWins — Phase 1 plan §1.6. +// Two detectors emit the same node ID with different Confidence levels. +// The merged node keeps the higher-confidence one's Confidence + Source. +func TestGraphBuilderDedup_HigherConfidenceWins(t *testing.T) { + gb := NewGraphBuilder() + + lex := model.NewCodeNode("class:Foo", model.NodeClass, "Foo") + lex.Confidence = model.ConfidenceLexical + lex.Source = "ClassHierarchyDetector" + lex.FQN = "" // missing, should not clobber the SYNTACTIC one + + syn := model.NewCodeNode("class:Foo", model.NodeClass, "Foo") + syn.Confidence = model.ConfidenceSyntactic + syn.Source = "SpringRestDetector" + syn.FQN = "com.example.Foo" + syn.Properties["framework"] = "spring_boot" + + // Order: low-confidence first, then high. Merger must pick high. + gb.Add(&detector.Result{Nodes: []*model.CodeNode{lex}}) + gb.Add(&detector.Result{Nodes: []*model.CodeNode{syn}}) + + snap := gb.Snapshot() + if len(snap.Nodes) != 1 { + t.Fatalf("expected 1 deduped node, got %d", len(snap.Nodes)) + } + got := snap.Nodes[0] + if got.Confidence != model.ConfidenceSyntactic { + t.Errorf("confidence = %v, want SYNTACTIC", got.Confidence) + } + if got.Source != "SpringRestDetector" { + t.Errorf("source = %q, want SpringRestDetector (higher-confidence)", got.Source) + } + if got.FQN != "com.example.Foo" { + t.Errorf("fqn = %q, want com.example.Foo (filled from higher-confidence)", got.FQN) + } + if got.Properties["framework"] != "spring_boot" { + t.Errorf("framework property dropped: %v", got.Properties) + } +} + +// TestGraphBuilderDedup_AnnotationsUnioned — annotations from both emissions +// merge and sort deterministically. +func TestGraphBuilderDedup_AnnotationsUnioned(t *testing.T) { + gb := NewGraphBuilder() + a := model.NewCodeNode("svc:X", model.NodeClass, "X") + a.Annotations = []string{"@Service", "@Transactional"} + b := model.NewCodeNode("svc:X", model.NodeClass, "X") + b.Annotations = []string{"@RestController", "@Service"} // overlap on @Service + + gb.Add(&detector.Result{Nodes: []*model.CodeNode{a}}) + gb.Add(&detector.Result{Nodes: []*model.CodeNode{b}}) + + snap := gb.Snapshot() + if len(snap.Nodes) != 1 { + t.Fatalf("expected 1 node, got %d", len(snap.Nodes)) + } + got := snap.Nodes[0].Annotations + want := []string{"@RestController", "@Service", "@Transactional"} + sort.Strings(got) + if !reflect.DeepEqual(got, want) { + t.Errorf("annotations = %v, want %v", got, want) + } +} + +// TestGraphBuilderDedup_PropertiesMergeNonClobber — incoming wins only when +// existing's value is nil/missing. +func TestGraphBuilderDedup_PropertiesMergeNonClobber(t *testing.T) { + gb := NewGraphBuilder() + hi := model.NewCodeNode("n", model.NodeClass, "n") + hi.Confidence = model.ConfidenceSyntactic + hi.Properties["framework"] = "spring_boot" + hi.Properties["only_on_hi"] = "v1" + + lo := model.NewCodeNode("n", model.NodeClass, "n") + lo.Confidence = model.ConfidenceLexical + lo.Properties["framework"] = "WRONG_GUESS" + lo.Properties["only_on_lo"] = "v2" + + gb.Add(&detector.Result{Nodes: []*model.CodeNode{hi}}) + gb.Add(&detector.Result{Nodes: []*model.CodeNode{lo}}) + + got := gb.Snapshot().Nodes[0].Properties + if got["framework"] != "spring_boot" { + t.Errorf("framework clobbered: %v", got["framework"]) + } + if got["only_on_hi"] != "v1" || got["only_on_lo"] != "v2" { + t.Errorf("union failed: %v", got) + } +} + +// TestGraphBuilderEdgeDedup_ByKey — Phase 1 plan §1.2. +// Same (sourceID, targetID, kind) emitted twice via different edge IDs +// collapses to one edge in the snapshot. +func TestGraphBuilderEdgeDedup_ByKey(t *testing.T) { + gb := NewGraphBuilder() + n1 := model.NewCodeNode("a", model.NodeClass, "a") + n2 := model.NewCodeNode("b", model.NodeClass, "b") + + e1 := model.NewCodeEdge("e1", model.EdgeCalls, "a", "b") + e1.Confidence = model.ConfidenceLexical + e2 := model.NewCodeEdge("e2-different-id", model.EdgeCalls, "a", "b") + e2.Confidence = model.ConfidenceSyntactic + + gb.Add(&detector.Result{Nodes: []*model.CodeNode{n1, n2}, Edges: []*model.CodeEdge{e1, e2}}) + + snap := gb.Snapshot() + if len(snap.Edges) != 1 { + t.Fatalf("expected 1 edge after (src,tgt,kind) dedup, got %d", len(snap.Edges)) + } + if snap.Edges[0].Confidence != model.ConfidenceSyntactic { + t.Errorf("dedup picked lower-confidence edge: %v", snap.Edges[0].Confidence) + } +} + +// TestGraphBuilderEdgeDedup_DifferentKindKept — same (src,tgt) but different +// EdgeKind must stay separate. +func TestGraphBuilderEdgeDedup_DifferentKindKept(t *testing.T) { + gb := NewGraphBuilder() + n1 := model.NewCodeNode("a", model.NodeClass, "a") + n2 := model.NewCodeNode("b", model.NodeClass, "b") + + e1 := model.NewCodeEdge("e1", model.EdgeCalls, "a", "b") + e2 := model.NewCodeEdge("e2", model.EdgeImports, "a", "b") + + gb.Add(&detector.Result{Nodes: []*model.CodeNode{n1, n2}, Edges: []*model.CodeEdge{e1, e2}}) + + snap := gb.Snapshot() + if len(snap.Edges) != 2 { + t.Fatalf("expected 2 edges (different kinds), got %d", len(snap.Edges)) + } +} + +// TestGraphBuilderEdgeDedup_PropertiesUnioned — properties from both emissions +// merge with non-clobber semantics. +func TestGraphBuilderEdgeDedup_PropertiesUnioned(t *testing.T) { + gb := NewGraphBuilder() + n1 := model.NewCodeNode("a", model.NodeClass, "a") + n2 := model.NewCodeNode("b", model.NodeClass, "b") + + e1 := model.NewCodeEdge("e1", model.EdgeCalls, "a", "b") + e1.Confidence = model.ConfidenceSyntactic + e1.Properties["a_only"] = 1 + e2 := model.NewCodeEdge("e2", model.EdgeCalls, "a", "b") + e2.Properties["a_only"] = "WRONG" + e2.Properties["b_only"] = 2 + + gb.Add(&detector.Result{Nodes: []*model.CodeNode{n1, n2}, Edges: []*model.CodeEdge{e1, e2}}) + + snap := gb.Snapshot() + got := snap.Edges[0].Properties + if got["a_only"] != 1 { + t.Errorf("a_only clobbered: %v", got["a_only"]) + } + if got["b_only"] != 2 { + t.Errorf("b_only not unioned: %v", got["b_only"]) + } +} + +// TestGraphBuilderStats_DedupAndDropCounts — Phase 1 plan §1.5. +// Stats expose how many duplicate nodes/edges collapsed and how many +// phantom edges (missing endpoints) were dropped. +func TestGraphBuilderStats_DedupAndDropCounts(t *testing.T) { + gb := NewGraphBuilder() + // Two emissions of the same node → 1 deduped node. + a := model.NewCodeNode("a", model.NodeClass, "a") + a2 := model.NewCodeNode("a", model.NodeClass, "a") + b := model.NewCodeNode("b", model.NodeClass, "b") + // Two emissions of the same edge → 1 deduped edge. + e1 := model.NewCodeEdge("e1", model.EdgeCalls, "a", "b") + e2 := model.NewCodeEdge("e2", model.EdgeCalls, "a", "b") // same (src,tgt,kind) + // One phantom edge → target "z" never added. + ePhantom := model.NewCodeEdge("p", model.EdgeCalls, "a", "z") + + gb.Add(&detector.Result{ + Nodes: []*model.CodeNode{a, a2, b}, + Edges: []*model.CodeEdge{e1, e2, ePhantom}, + }) + + snap := gb.Snapshot() + if snap.DedupedNodes != 1 { + t.Errorf("DedupedNodes = %d, want 1", snap.DedupedNodes) + } + if snap.DedupedEdges != 1 { + t.Errorf("DedupedEdges = %d, want 1", snap.DedupedEdges) + } + if snap.DroppedEdges != 1 { + t.Errorf("DroppedEdges = %d, want 1", snap.DroppedEdges) + } + if len(snap.Nodes) != 2 { + t.Errorf("Nodes = %d, want 2", len(snap.Nodes)) + } + if len(snap.Edges) != 1 { + t.Errorf("Edges = %d, want 1", len(snap.Edges)) + } +} diff --git a/go/internal/analyzer/service_detector.go b/go/internal/analyzer/service_detector.go new file mode 100644 index 00000000..842b7316 --- /dev/null +++ b/go/internal/analyzer/service_detector.go @@ -0,0 +1,443 @@ +package analyzer + +import ( + "encoding/json" + "fmt" + "io/fs" + "os" + "path/filepath" + "regexp" + "sort" + "strings" + + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// ServiceDetector walks the filesystem for build files (30+ build systems) +// and emits SERVICE nodes with CONTAINS edges to their child nodes. Mirrors +// src/main/java/io/github/randomcodespace/iq/analyzer/ServiceDetector.java. +// +// Filesystem-driven by design — not all build files produce CodeNodes during +// index, so we cannot rely on the node list alone. +type ServiceDetector struct{} + +// ServiceDetectionResult holds the new SERVICE nodes and the CONTAINS edges +// produced by a Detect call. The Detect call also mutates the incoming +// `nodes` slice in place by stamping each node's `service` property. +type ServiceDetectionResult struct { + Nodes []*model.CodeNode + Edges []*model.CodeEdge +} + +// buildFiles maps exact build-file filenames to their build tool name. +// Mirrors BUILD_FILES in ServiceDetector.java lines 60-120. +var buildFiles = map[string]string{ + // Java/JVM + "pom.xml": "maven", + "build.gradle": "gradle", + "build.gradle.kts": "gradle", + "settings.gradle": "gradle", + "settings.gradle.kts": "gradle", + "build.xml": "ant", + "build.sbt": "sbt", + "project.clj": "leiningen", + // JS/TS + "package.json": "npm", + "deno.json": "deno", + "deno.jsonc": "deno", + // Go + "go.mod": "go", + // Rust + "Cargo.toml": "cargo", + // Python + "pyproject.toml": "python", + "setup.py": "python", + "setup.cfg": "python", + "Pipfile": "python", + "requirements.txt": "python", + "manage.py": "django", + // Ruby + "Gemfile": "ruby", + // PHP + "composer.json": "php", + // .NET (csproj etc. handled by suffix below) + "Directory.Build.props": "dotnet", + // Swift + "Package.swift": "swift", + // Elixir + "mix.exs": "elixir", + // Dart / Flutter + "pubspec.yaml": "dart", + // Haskell + "stack.yaml": "haskell", + // Zig + "build.zig": "zig", + // OCaml + "dune-project": "ocaml", + // R + "DESCRIPTION": "r", + // Bazel + "BUILD": "bazel", + "BUILD.bazel": "bazel", + // Mono-repo orchestrators (supplemental, like Docker) + "nx.json": "nx", + "lerna.json": "lerna", + "turbo.json": "turbo", + "rush.json": "rush", + // Docker (supplemental — doesn't override real build tools) + "Dockerfile": "docker", + "docker-compose.yml": "docker", + "docker-compose.yaml": "docker", + "compose.yml": "docker", + "compose.yaml": "docker", +} + +// suffixBuildFiles handles cases where the filename ends with a specific +// suffix (e.g. MyApp.csproj). Order does not matter — first match wins per +// directory. +var suffixBuildFiles = []struct { + suffix, tool string +}{ + {".csproj", "dotnet"}, + {".fsproj", "dotnet"}, + {".vbproj", "dotnet"}, + {".gemspec", "ruby"}, + {".cabal", "haskell"}, + {".nimble", "nim"}, +} + +// supplementalTools are signals (docker, monorepo orchestrators) that don't +// override a real build tool already detected in the same directory. +var supplementalTools = map[string]struct{}{ + "docker": {}, "nx": {}, "lerna": {}, "turbo": {}, "rush": {}, +} + +// pythonBuildFiles is the priority order: index 0 wins. +// pyproject.toml > setup.py > requirements.txt > manage.py. +var pythonBuildFiles = []string{ + "pyproject.toml", "setup.py", "requirements.txt", "manage.py", +} + +// skipDirs are directory names pruned entirely during the filesystem walk. +var skipDirs = map[string]struct{}{ + "node_modules": {}, ".git": {}, "target": {}, "build": {}, + "dist": {}, ".gradle": {}, ".idea": {}, ".vscode": {}, + "__pycache__": {}, ".tox": {}, ".eggs": {}, "venv": {}, + ".venv": {}, "vendor": {}, ".bundle": {}, "_build": {}, "deps": {}, +} + +// moduleInfo is per-directory build-file bookkeeping. +type moduleInfo struct{ dir, tool, file string } + +// Detect walks `projectRoot`, identifies module boundaries, creates SERVICE +// nodes and CONTAINS edges. `projectDir` is used as the fallback service +// name for the root module when no name can be extracted from the build +// file. +// +// As a side effect, each node in `nodes` whose filePath falls under a +// detected module has its `service` property set to that service's label. +func (sd *ServiceDetector) Detect(nodes []*model.CodeNode, edges []*model.CodeEdge, + projectDir string, projectRoot string) ServiceDetectionResult { + modules := map[string]moduleInfo{} + if projectRoot != "" { + sd.walkFilesystem(projectRoot, modules) + } + if len(modules) == 0 { + modules[""] = moduleInfo{dir: "", tool: "unknown", file: ""} + } + + // Sort dirs deepest-first so longer prefixes match before their parent + // modules during child assignment. + dirs := make([]string, 0, len(modules)) + for k := range modules { + dirs = append(dirs, k) + } + sort.Slice(dirs, func(i, j int) bool { + if len(dirs[i]) != len(dirs[j]) { + return len(dirs[i]) > len(dirs[j]) + } + return dirs[i] < dirs[j] + }) + + serviceNodes := make([]*model.CodeNode, 0, len(dirs)) + serviceByDir := map[string]*model.CodeNode{} + for _, dir := range dirs { + info := modules[dir] + name := sd.extractServiceName(dir, info, projectDir, projectRoot) + sn := &model.CodeNode{ + ID: "service:" + name, + Kind: model.NodeService, + Label: name, + FilePath: ifBlank(dir, "."), + Layer: model.LayerBackend, + Confidence: model.ConfidenceLexical, + Annotations: []string{}, + Properties: map[string]any{ + "build_tool": info.tool, + "detected_from": info.file, + "endpoint_count": 0, + "entity_count": 0, + }, + } + serviceNodes = append(serviceNodes, sn) + serviceByDir[dir] = sn + } + + endpointCounts := map[string]int{} + entityCounts := map[string]int{} + var newEdges []*model.CodeEdge + for _, n := range nodes { + p := n.FilePath + var matchDir string + found := false + for _, dir := range dirs { + if dir == "" || strings.HasPrefix(p, dir+"/") || p == dir { + matchDir = dir + found = true + break + } + } + if !found { + if _, ok := modules[""]; ok { + matchDir = "" + } else { + continue + } + } + sn := serviceByDir[matchDir] + if sn == nil { + continue + } + if n.Properties == nil { + n.Properties = map[string]any{} + } + n.Properties["service"] = sn.Label + newEdges = append(newEdges, &model.CodeEdge{ + ID: fmt.Sprintf("edge:service:%s:contains:%s", sn.Label, n.ID), + Kind: model.EdgeContains, + SourceID: sn.ID, + TargetID: n.ID, + Confidence: model.ConfidenceLexical, + Properties: map[string]any{}, + }) + switch n.Kind { + case model.NodeEndpoint: + endpointCounts[sn.Label]++ + case model.NodeEntity: + entityCounts[sn.Label]++ + } + } + for _, sn := range serviceNodes { + sn.Properties["endpoint_count"] = endpointCounts[sn.Label] + sn.Properties["entity_count"] = entityCounts[sn.Label] + } + return ServiceDetectionResult{Nodes: serviceNodes, Edges: newEdges} +} + +func ifBlank(v, fallback string) string { + if v == "" { + return fallback + } + return v +} + +// walkFilesystem traverses `root` and registers a moduleInfo per directory +// that has a recognised build file. Skipped directories (skipDirs) are +// pruned via fs.SkipDir. +func (sd *ServiceDetector) walkFilesystem(root string, modules map[string]moduleInfo) { + _ = filepath.WalkDir(root, func(p string, ent fs.DirEntry, err error) error { + if err != nil { + return nil + } + if ent.IsDir() { + // Don't prune the root itself — its name might match a skipDir + // (e.g. someone running on /tmp/.venv) but we still want to + // scan it. + if p == root { + return nil + } + if _, skip := skipDirs[ent.Name()]; skip { + return fs.SkipDir + } + return nil + } + rel, err := filepath.Rel(root, filepath.Dir(p)) + if err != nil { + return nil + } + rel = filepath.ToSlash(rel) + if rel == "." { + rel = "" + } + name := ent.Name() + // Suffix-based first (csproj etc.) + for _, s := range suffixBuildFiles { + if strings.HasSuffix(name, s.suffix) { + if _, present := modules[rel]; !present { + modules[rel] = moduleInfo{dir: rel, tool: s.tool, file: name} + } + return nil + } + } + tool, ok := buildFiles[name] + if !ok { + return nil + } + sd.registerModule(modules, rel, tool, name) + return nil + }) +} + +// registerModule mirrors the priority rules at ServiceDetector.java lines +// 391-416: supplemental tools don't override real ones; python files have a +// strict priority order; gradle settings.* doesn't override build.gradle. +func (sd *ServiceDetector) registerModule(modules map[string]moduleInfo, dir, tool, file string) { + existing, present := modules[dir] + if _, suppl := supplementalTools[tool]; suppl && present { + return + } + if present && isPython(tool) && !isPython(existing.tool) { + return + } + if present && isPython(tool) && isPython(existing.tool) { + if pythonPriority(file) >= pythonPriority(existing.file) { + return + } + } + if tool == "gradle" && present && existing.tool == "gradle" && + strings.HasPrefix(file, "settings.") { + return + } + modules[dir] = moduleInfo{dir: dir, tool: tool, file: file} +} + +func isPython(t string) bool { return t == "python" || t == "django" } + +func pythonPriority(file string) int { + for i, f := range pythonBuildFiles { + if f == file { + return i + } + } + return len(pythonBuildFiles) +} + +// extractServiceName tries the build file content first, then falls back to +// directory-based naming. Matches Java extractServiceName. +func (sd *ServiceDetector) extractServiceName(dir string, info moduleInfo, + projectDir, projectRoot string) string { + if projectRoot != "" && info.file != "" { + if name := sd.readNameFromBuildFile(projectRoot, dir, info); name != "" { + return name + } + } + if dir == "" { + if projectDir != "" { + return projectDir + } + return "root" + } + if idx := strings.LastIndex(dir, "/"); idx >= 0 { + return dir[idx+1:] + } + return dir +} + +// readNameFromBuildFile reads `projectRoot/dir/info.file` and runs the +// per-tool extractor. Returns "" on read failure or no match. +func (sd *ServiceDetector) readNameFromBuildFile(root, dir string, info moduleInfo) string { + full := filepath.Join(root, dir, info.file) + content, err := os.ReadFile(full) + if err != nil { + return "" + } + s := string(content) + switch info.tool { + case "maven": + return extractFromPom(s) + case "npm": + return extractFromPackageJSON(s) + case "go": + return extractFromGoMod(s) + case "cargo": + return matchFirst(reCargoName, s) + case "python": + if info.file == "pyproject.toml" { + return matchFirst(rePyProjectName, s) + } + if info.file == "setup.py" { + return matchFirst(reSetupPyName, s) + } + return "" + case "gradle": + if strings.HasPrefix(info.file, "settings.") { + return matchFirst(reGradleSettingsName, s) + } + return "" + case "sbt": + return matchFirst(reSbtName, s) + case "php": + name := matchFirst(reComposerName, s) + if i := strings.LastIndex(name, "/"); i >= 0 { + name = name[i+1:] + } + return name + case "elixir": + return matchFirst(reMixAppName, s) + case "dart": + return matchFirst(rePubspecName, s) + } + return "" +} + +var ( + rePomArtifactID = regexp.MustCompile(`\s*([^<]+?)\s*`) + rePackageJSONName = regexp.MustCompile(`"name"\s*:\s*"([^"]+)"`) + reGoModModule = regexp.MustCompile(`(?m)^module\s+(\S+)`) + reCargoName = regexp.MustCompile(`(?m)^name\s*=\s*"([^"]+)"`) + rePyProjectName = regexp.MustCompile(`(?m)^name\s*=\s*"([^"]+)"`) + reSetupPyName = regexp.MustCompile(`name\s*=\s*['"]([^'"]+)['"]`) + reGradleSettingsName = regexp.MustCompile(`rootProject\.name\s*=\s*['"]([^'"]+)['"]`) + reSbtName = regexp.MustCompile(`name\s*:=\s*"([^"]+)"`) + reComposerName = regexp.MustCompile(`"name"\s*:\s*"([^"]+)"`) + reMixAppName = regexp.MustCompile(`app:\s*:([\w]+)`) + rePubspecName = regexp.MustCompile(`(?m)^name:\s*(\S+)`) +) + +func extractFromPom(s string) string { + search := s + if idx := strings.Index(s, ""); idx > 0 { + search = s[idx:] + } + return matchFirst(rePomArtifactID, search) +} + +func extractFromPackageJSON(s string) string { + name := matchFirst(rePackageJSONName, s) + if name == "" { + return "" + } + // Validate as JSON before trusting (cheap, gives same result on bad input). + var m map[string]any + _ = json.Unmarshal([]byte(s), &m) + if i := strings.LastIndex(name, "/"); i >= 0 { + name = name[i+1:] + } + return name +} + +func extractFromGoMod(s string) string { + mod := matchFirst(reGoModModule, s) + if i := strings.LastIndex(mod, "/"); i >= 0 { + mod = mod[i+1:] + } + return mod +} + +func matchFirst(re *regexp.Regexp, s string) string { + m := re.FindStringSubmatch(s) + if len(m) < 2 { + return "" + } + return strings.TrimSpace(m[1]) +} diff --git a/go/internal/analyzer/service_detector_test.go b/go/internal/analyzer/service_detector_test.go new file mode 100644 index 00000000..8453214a --- /dev/null +++ b/go/internal/analyzer/service_detector_test.go @@ -0,0 +1,317 @@ +package analyzer + +import ( + "os" + "path/filepath" + "sort" + "testing" + + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// writeFile is a tiny helper for these tests — writes content to dir/relPath, +// creating parent directories. +func writeFile(t *testing.T, root, rel, content string) { + t.Helper() + full := filepath.Join(root, rel) + if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil { + t.Fatalf("mkdir: %v", err) + } + if err := os.WriteFile(full, []byte(content), 0o644); err != nil { + t.Fatalf("write %s: %v", rel, err) + } +} + +// serviceByLabel finds a SERVICE node in a result by its label. +func serviceByLabel(t *testing.T, nodes []*model.CodeNode, label string) *model.CodeNode { + t.Helper() + for _, n := range nodes { + if n.Kind == model.NodeService && n.Label == label { + return n + } + } + t.Fatalf("no service node with label %q (have %d nodes)", label, len(nodes)) + return nil +} + +// TestServiceDetectorTwoModules: pom.xml at root + package.json under api/ → +// 2 SERVICE nodes; root extracted from artifactId; api extracted from name. +func TestServiceDetectorTwoModules(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "pom.xml", ` + my-java-app +`) + writeFile(t, root, "api/package.json", `{"name":"api-server"}`) + + d := &ServiceDetector{} + r := d.Detect(nil, nil, "projectfallback", root) + + if len(r.Nodes) != 2 { + labels := make([]string, 0, len(r.Nodes)) + for _, n := range r.Nodes { + labels = append(labels, n.Label) + } + sort.Strings(labels) + t.Fatalf("want 2 service nodes, got %d: %v", len(r.Nodes), labels) + } + mavenSvc := serviceByLabel(t, r.Nodes, "my-java-app") + if got := mavenSvc.Properties["build_tool"]; got != "maven" { + t.Fatalf("maven svc build_tool = %v, want maven", got) + } + if got := mavenSvc.Properties["detected_from"]; got != "pom.xml" { + t.Fatalf("maven svc detected_from = %v, want pom.xml", got) + } + if mavenSvc.Layer != model.LayerBackend { + t.Fatalf("maven svc layer = %v, want backend", mavenSvc.Layer) + } + if mavenSvc.ID != "service:my-java-app" { + t.Fatalf("maven svc id = %q, want service:my-java-app", mavenSvc.ID) + } + + npmSvc := serviceByLabel(t, r.Nodes, "api-server") + if got := npmSvc.Properties["build_tool"]; got != "npm" { + t.Fatalf("npm svc build_tool = %v, want npm", got) + } +} + +// TestServiceDetectorDirectoryFallback: build file with no extractable name → +// service name falls back to directory (or projectDir for root). +func TestServiceDetectorDirectoryFallback(t *testing.T) { + root := t.TempDir() + // requirements.txt has no name extractor — falls back to directory. + writeFile(t, root, "services/payment/requirements.txt", "flask==2.0\n") + + d := &ServiceDetector{} + r := d.Detect(nil, nil, "rootproj", root) + + if len(r.Nodes) != 1 { + t.Fatalf("want 1 node, got %d", len(r.Nodes)) + } + if r.Nodes[0].Label != "payment" { + t.Fatalf("label = %q, want payment", r.Nodes[0].Label) + } +} + +// TestServiceDetectorRootProjectDirFallback: a build file in the project root +// with no extractable name falls back to projectDir, not "". +func TestServiceDetectorRootProjectDirFallback(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "requirements.txt", "flask\n") + + d := &ServiceDetector{} + r := d.Detect(nil, nil, "topproj", root) + + if len(r.Nodes) != 1 { + t.Fatalf("want 1 node, got %d", len(r.Nodes)) + } + if r.Nodes[0].Label != "topproj" { + t.Fatalf("label = %q, want topproj", r.Nodes[0].Label) + } +} + +// TestServiceDetectorPythonPriority: pyproject.toml beats setup.py beats +// requirements.txt beats manage.py in the same directory. +func TestServiceDetectorPythonPriority(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "svc/pyproject.toml", `[project] +name = "winning-name" +`) + writeFile(t, root, "svc/setup.py", `setup(name="loser1")`) + writeFile(t, root, "svc/requirements.txt", "flask\n") + writeFile(t, root, "svc/manage.py", `# django entry`) + + d := &ServiceDetector{} + r := d.Detect(nil, nil, "p", root) + + if len(r.Nodes) != 1 { + t.Fatalf("want 1 node, got %d", len(r.Nodes)) + } + sn := r.Nodes[0] + if sn.Label != "winning-name" { + t.Fatalf("label = %q, want winning-name", sn.Label) + } + if got := sn.Properties["detected_from"]; got != "pyproject.toml" { + t.Fatalf("detected_from = %v, want pyproject.toml", got) + } +} + +// TestServiceDetectorSupplementalDoesNotOverride: a Dockerfile next to a +// pom.xml does NOT downgrade the build_tool to "docker". +func TestServiceDetectorSupplementalDoesNotOverride(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "Dockerfile", "FROM eclipse-temurin:25\n") + writeFile(t, root, "pom.xml", `real-app`) + + d := &ServiceDetector{} + r := d.Detect(nil, nil, "p", root) + + if len(r.Nodes) != 1 { + t.Fatalf("want 1 node, got %d", len(r.Nodes)) + } + sn := r.Nodes[0] + if sn.Label != "real-app" { + t.Fatalf("label = %q, want real-app", sn.Label) + } + if got := sn.Properties["build_tool"]; got != "maven" { + t.Fatalf("build_tool = %v, want maven (not docker)", got) + } +} + +// TestServiceDetectorSkipsBlacklistedDirs: build files inside node_modules, +// .git, target, build, dist, .venv, vendor MUST be ignored. +func TestServiceDetectorSkipsBlacklistedDirs(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "pom.xml", `top`) + // Each of these should be skipped: + writeFile(t, root, "node_modules/some-pkg/package.json", `{"name":"nope"}`) + writeFile(t, root, ".git/hooks/package.json", `{"name":"git-nope"}`) + writeFile(t, root, "target/embedded/pom.xml", `tgt-nope`) + writeFile(t, root, "build/output/package.json", `{"name":"build-nope"}`) + writeFile(t, root, "dist/output/package.json", `{"name":"dist-nope"}`) + writeFile(t, root, ".venv/lib/pyproject.toml", `name = "venv-nope"`) + writeFile(t, root, "vendor/something/go.mod", "module foo.example/nope\n") + + d := &ServiceDetector{} + r := d.Detect(nil, nil, "p", root) + + if len(r.Nodes) != 1 { + labels := make([]string, 0, len(r.Nodes)) + for _, n := range r.Nodes { + labels = append(labels, n.Label) + } + sort.Strings(labels) + t.Fatalf("want 1 node (only root pom), got %d: %v", len(r.Nodes), labels) + } + if r.Nodes[0].Label != "top" { + t.Fatalf("label = %q, want top", r.Nodes[0].Label) + } +} + +// TestServiceDetectorCsprojSuffix: a *.csproj file triggers the dotnet module +// even though "X.csproj" is not in the exact-filename map. +func TestServiceDetectorCsprojSuffix(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "MyApp/MyApp.csproj", ``) + + d := &ServiceDetector{} + r := d.Detect(nil, nil, "p", root) + + if len(r.Nodes) != 1 { + t.Fatalf("want 1 node, got %d", len(r.Nodes)) + } + sn := r.Nodes[0] + if got := sn.Properties["build_tool"]; got != "dotnet" { + t.Fatalf("build_tool = %v, want dotnet", got) + } + if got := sn.Properties["detected_from"]; got != "MyApp.csproj" { + t.Fatalf("detected_from = %v, want MyApp.csproj", got) + } + // Directory-based name fallback (no extractor for .csproj). + if sn.Label != "MyApp" { + t.Fatalf("label = %q, want MyApp", sn.Label) + } +} + +// TestServiceDetectorAssignsChildrenAndContainsEdges: nodes get a service +// property + a CONTAINS edge from the deepest matching service. +func TestServiceDetectorAssignsChildrenAndContainsEdges(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "pom.xml", `top`) + writeFile(t, root, "api/package.json", `{"name":"api"}`) + + nodes := []*model.CodeNode{ + {ID: "n:1", Kind: model.NodeClass, FilePath: "src/main/java/X.java"}, + {ID: "n:2", Kind: model.NodeEndpoint, FilePath: "api/routes/users.ts"}, + {ID: "n:3", Kind: model.NodeEntity, FilePath: "api/models/user.ts"}, + } + + d := &ServiceDetector{} + r := d.Detect(nodes, nil, "p", root) + + // 2 services + 3 contains edges. + if len(r.Nodes) != 2 { + t.Fatalf("want 2 services, got %d", len(r.Nodes)) + } + if len(r.Edges) != 3 { + t.Fatalf("want 3 contains edges, got %d", len(r.Edges)) + } + // Deepest match: nodes 2+3 land on "api", node 1 lands on "top". + got := map[string]string{} + for _, n := range nodes { + got[n.ID], _ = n.Properties["service"].(string) + } + if got["n:1"] != "top" { + t.Fatalf("n:1 service = %q, want top", got["n:1"]) + } + if got["n:2"] != "api" { + t.Fatalf("n:2 service = %q, want api", got["n:2"]) + } + if got["n:3"] != "api" { + t.Fatalf("n:3 service = %q, want api", got["n:3"]) + } + + // Counts on services. + apiSvc := serviceByLabel(t, r.Nodes, "api") + if got := apiSvc.Properties["endpoint_count"]; got != 1 { + t.Fatalf("api endpoint_count = %v, want 1", got) + } + if got := apiSvc.Properties["entity_count"]; got != 1 { + t.Fatalf("api entity_count = %v, want 1", got) + } + topSvc := serviceByLabel(t, r.Nodes, "top") + if got := topSvc.Properties["endpoint_count"]; got != 0 { + t.Fatalf("top endpoint_count = %v, want 0", got) + } +} + +// TestServiceDetectorNoBuildFilesEmitsSingleUnknown: empty repo (no build +// files) → one synthesised "unknown" service using projectDir as the label. +func TestServiceDetectorNoBuildFilesEmitsSingleUnknown(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "README.md", "# nothing here\n") + + d := &ServiceDetector{} + r := d.Detect(nil, nil, "lonely", root) + + if len(r.Nodes) != 1 { + t.Fatalf("want 1 node, got %d", len(r.Nodes)) + } + sn := r.Nodes[0] + if sn.Label != "lonely" { + t.Fatalf("label = %q, want lonely", sn.Label) + } + if got := sn.Properties["build_tool"]; got != "unknown" { + t.Fatalf("build_tool = %v, want unknown", got) + } +} + +// TestServiceDetectorDeterminism: two identical runs over the same tree +// produce service node lists with identical labels (order may differ between +// runs but membership and metadata must match). +func TestServiceDetectorDeterminism(t *testing.T) { + root := t.TempDir() + writeFile(t, root, "pom.xml", `a`) + writeFile(t, root, "svc1/package.json", `{"name":"b"}`) + writeFile(t, root, "svc2/go.mod", "module example.com/c\n") + + d := &ServiceDetector{} + collect := func() []string { + r := d.Detect(nil, nil, "p", root) + out := make([]string, 0, len(r.Nodes)) + for _, n := range r.Nodes { + out = append(out, n.Label+"|"+n.Properties["build_tool"].(string)) + } + sort.Strings(out) + return out + } + a := collect() + b := collect() + if len(a) != len(b) { + t.Fatalf("len mismatch %d vs %d", len(a), len(b)) + } + for i := range a { + if a[i] != b[i] { + t.Fatalf("determinism broken at %d: %q vs %q", i, a[i], b[i]) + } + } +} diff --git a/go/internal/buildinfo/buildinfo.go b/go/internal/buildinfo/buildinfo.go new file mode 100644 index 00000000..c975c885 --- /dev/null +++ b/go/internal/buildinfo/buildinfo.go @@ -0,0 +1,42 @@ +// Package buildinfo exposes version/commit/date/dirty strings that the release +// pipeline injects via -ldflags -X. When no ldflags are set (e.g. local +// `go build` or `go test`), the defaults below are used. None of the functions +// here panic; --version is required to succeed in all build modes (spec §7.1). +package buildinfo + +import "runtime" + +// Injected at link time via goreleaser: +// +// -X 'github.com/randomcodespace/codeiq/go/internal/buildinfo.Version={{.Version}}' +// -X 'github.com/randomcodespace/codeiq/go/internal/buildinfo.Commit={{.ShortCommit}}' +// -X 'github.com/randomcodespace/codeiq/go/internal/buildinfo.Date={{.Date}}' +// -X 'github.com/randomcodespace/codeiq/go/internal/buildinfo.Dirty={{.IsGitDirty}}' +var ( + Version = "dev" + Commit = "unknown" + Date = "unknown" + Dirty = "false" +) + +// Platform returns "/", e.g. "linux/amd64". +func Platform() string { + return runtime.GOOS + "/" + runtime.GOARCH +} + +// GoVersion returns the Go toolchain version the binary was built with. +func GoVersion() string { + return runtime.Version() +} + +// DirtyBool parses Dirty ("true"/"false") into a bool. Anything not "true" +// (case-sensitive) is false. +func DirtyBool() bool { + return Dirty == "true" +} + +// Features returns the compile-time feature flags. "kuzu" joined the list in +// phase 2 with the Kuzu wrapper landing under internal/graph. +func Features() []string { + return []string{"cgo", "kuzu", "sqlite", "tree-sitter"} +} diff --git a/go/internal/buildinfo/buildinfo_test.go b/go/internal/buildinfo/buildinfo_test.go new file mode 100644 index 00000000..f675d74b --- /dev/null +++ b/go/internal/buildinfo/buildinfo_test.go @@ -0,0 +1,61 @@ +package buildinfo + +import ( + "runtime" + "strings" + "testing" +) + +func TestDefaultsWithoutLdflags(t *testing.T) { + if Version != "dev" { + t.Fatalf("default Version = %q, want \"dev\"", Version) + } + if Commit != "unknown" { + t.Fatalf("default Commit = %q, want \"unknown\"", Commit) + } + if Date != "unknown" { + t.Fatalf("default Date = %q, want \"unknown\"", Date) + } + if Dirty != "false" { + t.Fatalf("default Dirty = %q, want \"false\"", Dirty) + } +} + +func TestPlatform(t *testing.T) { + got := Platform() + want := runtime.GOOS + "/" + runtime.GOARCH + if got != want { + t.Fatalf("Platform() = %q, want %q", got, want) + } +} + +func TestGoVersion(t *testing.T) { + if !strings.HasPrefix(GoVersion(), "go") { + t.Fatalf("GoVersion() = %q, want prefix \"go\"", GoVersion()) + } +} + +func TestFeatures(t *testing.T) { + f := Features() + wantContains := []string{"cgo", "kuzu", "sqlite", "tree-sitter"} + for _, w := range wantContains { + found := false + for _, got := range f { + if got == w { + found = true + break + } + } + if !found { + t.Fatalf("Features() = %v, missing %q", f, w) + } + } +} + +func TestDirtyBool(t *testing.T) { + Dirty = "true" + t.Cleanup(func() { Dirty = "false" }) + if !DirtyBool() { + t.Fatal("DirtyBool() = false, want true when Dirty == \"true\"") + } +} diff --git a/go/internal/cache/cache.go b/go/internal/cache/cache.go new file mode 100644 index 00000000..c026d20d --- /dev/null +++ b/go/internal/cache/cache.go @@ -0,0 +1,213 @@ +package cache + +import ( + "database/sql" + "encoding/json" + "errors" + "fmt" + + _ "github.com/mattn/go-sqlite3" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// ErrNotFound is returned by Get when no row matches the content hash. +var ErrNotFound = errors.New("cache: not found") + +// Entry is a single file's cached detector results, keyed by content hash. +type Entry struct { + ContentHash string + Path string + Language string + ParsedAt string // RFC3339 + Nodes []*model.CodeNode + Edges []*model.CodeEdge +} + +// Cache is a SQLite-backed analysis cache. Safe for concurrent reads. +// Writes serialize via SQLite's WAL mode + busy_timeout. +type Cache struct { + db *sql.DB +} + +// Open opens or creates the cache file at path. Applies schema + WAL pragmas +// + stamps CacheVersion into cache_meta on first open. +func Open(path string) (*Cache, error) { + dsn := fmt.Sprintf("file:%s?_journal=WAL&_busy_timeout=5000&_fk=1", path) + db, err := sql.Open("sqlite3", dsn) + if err != nil { + return nil, fmt.Errorf("cache open: %w", err) + } + if _, err := db.Exec(pragmasDDL); err != nil { + db.Close() + return nil, fmt.Errorf("cache pragmas: %w", err) + } + if _, err := db.Exec(schemaDDL); err != nil { + db.Close() + return nil, fmt.Errorf("cache schema: %w", err) + } + c := &Cache{db: db} + if err := c.stampVersion(); err != nil { + db.Close() + return nil, err + } + return c, nil +} + +// Close releases the underlying database handle. +func (c *Cache) Close() error { return c.db.Close() } + +func (c *Cache) stampVersion() error { + _, err := c.db.Exec( + `INSERT INTO cache_meta(meta_key, meta_value) VALUES('version', ?) + ON CONFLICT(meta_key) DO UPDATE SET meta_value = excluded.meta_value`, + fmt.Sprintf("%d", CacheVersion), + ) + return err +} + +// Version reads the cache_version row. +func (c *Cache) Version() (int, error) { + var s string + err := c.db.QueryRow(`SELECT meta_value FROM cache_meta WHERE meta_key='version'`).Scan(&s) + if err != nil { + return 0, err + } + var v int + if _, err := fmt.Sscanf(s, "%d", &v); err != nil { + return 0, err + } + return v, nil +} + +// Has reports whether an entry for contentHash exists. +func (c *Cache) Has(contentHash string) bool { + var n int + _ = c.db.QueryRow(`SELECT COUNT(*) FROM files WHERE content_hash=?`, contentHash).Scan(&n) + return n > 0 +} + +// Put stores or replaces the cache entry. Atomic — all rows for the hash are +// wiped first then re-inserted in a single transaction. +func (c *Cache) Put(e *Entry) error { + tx, err := c.db.Begin() + if err != nil { + return err + } + defer tx.Rollback() + + if _, err := tx.Exec(`DELETE FROM nodes WHERE content_hash=?`, e.ContentHash); err != nil { + return err + } + if _, err := tx.Exec(`DELETE FROM edges WHERE content_hash=?`, e.ContentHash); err != nil { + return err + } + if _, err := tx.Exec(`DELETE FROM files WHERE content_hash=?`, e.ContentHash); err != nil { + return err + } + if _, err := tx.Exec( + `INSERT INTO files(content_hash, path, language, parsed_at) VALUES(?,?,?,?)`, + e.ContentHash, e.Path, e.Language, e.ParsedAt, + ); err != nil { + return err + } + for _, n := range e.Nodes { + data, err := json.Marshal(n) + if err != nil { + return err + } + if _, err := tx.Exec( + `INSERT INTO nodes(id, content_hash, kind, data) VALUES(?,?,?,?)`, + n.ID, e.ContentHash, n.Kind.String(), string(data), + ); err != nil { + return err + } + } + for _, ed := range e.Edges { + data, err := json.Marshal(ed) + if err != nil { + return err + } + if _, err := tx.Exec( + `INSERT INTO edges(source, target, content_hash, kind, data) VALUES(?,?,?,?,?)`, + ed.SourceID, ed.TargetID, e.ContentHash, ed.Kind.String(), string(data), + ); err != nil { + return err + } + } + return tx.Commit() +} + +// Get fetches the cache entry by content hash. Returns ErrNotFound if absent. +func (c *Cache) Get(contentHash string) (*Entry, error) { + var e Entry + e.ContentHash = contentHash + err := c.db.QueryRow( + `SELECT path, language, parsed_at FROM files WHERE content_hash=?`, + contentHash, + ).Scan(&e.Path, &e.Language, &e.ParsedAt) + if err == sql.ErrNoRows { + return nil, ErrNotFound + } + if err != nil { + return nil, err + } + rows, err := c.db.Query(`SELECT data FROM nodes WHERE content_hash=? ORDER BY row_id`, contentHash) + if err != nil { + return nil, err + } + defer rows.Close() + for rows.Next() { + var data string + if err := rows.Scan(&data); err != nil { + return nil, err + } + var n model.CodeNode + if err := json.Unmarshal([]byte(data), &n); err != nil { + return nil, err + } + e.Nodes = append(e.Nodes, &n) + } + erows, err := c.db.Query(`SELECT data FROM edges WHERE content_hash=?`, contentHash) + if err != nil { + return nil, err + } + defer erows.Close() + for erows.Next() { + var data string + if err := erows.Scan(&data); err != nil { + return nil, err + } + var ed model.CodeEdge + if err := json.Unmarshal([]byte(data), &ed); err != nil { + return nil, err + } + e.Edges = append(e.Edges, &ed) + } + return &e, nil +} + +// IterateAll yields every cached entry in deterministic order (sorted by +// path then content_hash) — used by phase 2's enrich. +func (c *Cache) IterateAll(fn func(*Entry) error) error { + rows, err := c.db.Query( + `SELECT content_hash FROM files ORDER BY path, content_hash`, + ) + if err != nil { + return err + } + defer rows.Close() + for rows.Next() { + var h string + if err := rows.Scan(&h); err != nil { + return err + } + e, err := c.Get(h) + if err != nil { + return err + } + if err := fn(e); err != nil { + return err + } + } + return nil +} diff --git a/go/internal/cache/cache_test.go b/go/internal/cache/cache_test.go new file mode 100644 index 00000000..aefd429e --- /dev/null +++ b/go/internal/cache/cache_test.go @@ -0,0 +1,84 @@ +package cache + +import ( + "path/filepath" + "testing" + "time" + + "github.com/randomcodespace/codeiq/go/internal/model" +) + +func TestCacheRoundTrip(t *testing.T) { + dir := t.TempDir() + c, err := Open(filepath.Join(dir, "test.sqlite")) + if err != nil { + t.Fatal(err) + } + defer c.Close() + + hash := "deadbeef" + nodes := []*model.CodeNode{ + model.NewCodeNode("file.java:Foo", model.NodeClass, "Foo"), + } + nodes[0].FilePath = "file.java" + nodes[0].Source = "SpringRestDetector" + + edges := []*model.CodeEdge{ + model.NewCodeEdge("file.java:Foo->Bar", model.EdgeCalls, + "file.java:Foo", "file.java:Bar"), + } + + entry := &Entry{ + ContentHash: hash, + Path: "file.java", + Language: "java", + ParsedAt: time.Now().UTC().Format(time.RFC3339), + Nodes: nodes, + Edges: edges, + } + if err := c.Put(entry); err != nil { + t.Fatal(err) + } + if !c.Has(hash) { + t.Fatal("Has should return true after Put") + } + got, err := c.Get(hash) + if err != nil { + t.Fatal(err) + } + if got.Path != entry.Path || got.Language != entry.Language { + t.Fatalf("metadata mismatch: %+v", got) + } + if len(got.Nodes) != 1 || got.Nodes[0].ID != "file.java:Foo" { + t.Fatalf("node round-trip: %+v", got.Nodes) + } + if len(got.Edges) != 1 || got.Edges[0].Kind != model.EdgeCalls { + t.Fatalf("edge round-trip: %+v", got.Edges) + } +} + +func TestCacheVersionStamped(t *testing.T) { + dir := t.TempDir() + c, err := Open(filepath.Join(dir, "v.sqlite")) + if err != nil { + t.Fatal(err) + } + defer c.Close() + v, err := c.Version() + if err != nil { + t.Fatal(err) + } + if v != CacheVersion { + t.Fatalf("Version() = %d, want %d", v, CacheVersion) + } +} + +func TestCacheMissReturnsErrNotFound(t *testing.T) { + dir := t.TempDir() + c, _ := Open(filepath.Join(dir, "m.sqlite")) + defer c.Close() + _, err := c.Get("nope") + if err != ErrNotFound { + t.Fatalf("Get(missing) err = %v, want ErrNotFound", err) + } +} diff --git a/go/internal/cache/hasher.go b/go/internal/cache/hasher.go new file mode 100644 index 00000000..8e9b68b1 --- /dev/null +++ b/go/internal/cache/hasher.go @@ -0,0 +1,31 @@ +package cache + +import ( + "crypto/sha256" + "encoding/hex" + "io" + "os" +) + +// HashFile returns the lowercase hex SHA-256 digest of the file at path. +// Output matches Java io.github.randomcodespace.iq.cache.FileHasher.hash — +// 64 hex chars, lowercase, SHA-256. +func HashFile(path string) (string, error) { + f, err := os.Open(path) + if err != nil { + return "", err + } + defer f.Close() + h := sha256.New() + if _, err := io.Copy(h, f); err != nil { + return "", err + } + return hex.EncodeToString(h.Sum(nil)), nil +} + +// HashString returns the lowercase hex SHA-256 of s (UTF-8 bytes). +// Mirrors Java FileHasher.hashString. +func HashString(s string) string { + h := sha256.Sum256([]byte(s)) + return hex.EncodeToString(h[:]) +} diff --git a/go/internal/cache/hasher_test.go b/go/internal/cache/hasher_test.go new file mode 100644 index 00000000..b3dda5b5 --- /dev/null +++ b/go/internal/cache/hasher_test.go @@ -0,0 +1,54 @@ +package cache + +import ( + "os" + "path/filepath" + "strings" + "testing" +) + +func TestHashStringKnownVector(t *testing.T) { + // "hello" → SHA-256: 2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824 + got := HashString("hello") + want := "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824" + if got != want { + t.Fatalf("HashString(\"hello\") = %q, want %q", got, want) + } + if len(got) != 64 { + t.Fatalf("expected 64 hex chars, got %d", len(got)) + } + if strings.ToLower(got) != got { + t.Fatal("hash must be lowercase") + } +} + +func TestHashFile(t *testing.T) { + dir := t.TempDir() + f := filepath.Join(dir, "x.txt") + if err := os.WriteFile(f, []byte("hello"), 0644); err != nil { + t.Fatal(err) + } + got, err := HashFile(f) + if err != nil { + t.Fatal(err) + } + if got != "2cf24dba5fb0a30e26e83b2ac5b9e29e1b161e5c1fa7425e73043362938b9824" { + t.Fatalf("HashFile = %q", got) + } +} + +func TestHashFileMissingReturnsError(t *testing.T) { + _, err := HashFile("/nonexistent/path/zzzz") + if err == nil { + t.Fatal("expected error on missing file") + } +} + +func TestHashEmpty(t *testing.T) { + // SHA-256("") = e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855 + got := HashString("") + want := "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855" + if got != want { + t.Fatalf("HashString(\"\") = %q, want %q", got, want) + } +} diff --git a/go/internal/cache/inspect.go b/go/internal/cache/inspect.go new file mode 100644 index 00000000..cfc075af --- /dev/null +++ b/go/internal/cache/inspect.go @@ -0,0 +1,145 @@ +package cache + +import ( + "database/sql" + "fmt" + "os" +) + +// Stats summarises the cache contents — used by `codeiq cache info`. +type Stats struct { + FileCount int `json:"file_count"` + NodeCount int `json:"node_count"` + EdgeCount int `json:"edge_count"` + SizeBytes int64 `json:"size_bytes"` + Version int `json:"version"` +} + +// Stats returns the row counts and file-size of the cache database. +func (c *Cache) Stats() (Stats, error) { + var s Stats + if err := c.db.QueryRow(`SELECT COUNT(*) FROM files`).Scan(&s.FileCount); err != nil { + return s, fmt.Errorf("cache: count files: %w", err) + } + if err := c.db.QueryRow(`SELECT COUNT(*) FROM nodes`).Scan(&s.NodeCount); err != nil { + return s, fmt.Errorf("cache: count nodes: %w", err) + } + if err := c.db.QueryRow(`SELECT COUNT(*) FROM edges`).Scan(&s.EdgeCount); err != nil { + return s, fmt.Errorf("cache: count edges: %w", err) + } + v, err := c.Version() + if err == nil { + s.Version = v + } + return s, nil +} + +// FileSize returns the cache file size in bytes; 0 when the file does not +// exist. Wrap of os.Stat — exposed as a method so callers don't need to +// know the cache path. +func FileSize(path string) int64 { + st, err := os.Stat(path) + if err != nil { + return 0 + } + return st.Size() +} + +// ListEntry is one summarised row for `codeiq cache list`. +type ListEntry struct { + ContentHash string `json:"content_hash"` + Path string `json:"path"` + Language string `json:"language"` + ParsedAt string `json:"parsed_at"` + NodeCount int `json:"node_count"` + EdgeCount int `json:"edge_count"` +} + +// List returns up to `limit` summarised cache entries ordered by path. Use +// offset to page through the cache. Pass limit <= 0 for an unbounded scan +// (use carefully — large caches can have tens of thousands of rows). +func (c *Cache) List(limit, offset int) ([]ListEntry, error) { + q := ` + SELECT f.content_hash, f.path, f.language, f.parsed_at, + (SELECT COUNT(*) FROM nodes n WHERE n.content_hash = f.content_hash) AS node_count, + (SELECT COUNT(*) FROM edges e WHERE e.content_hash = f.content_hash) AS edge_count + FROM files f + ORDER BY f.path` + if limit > 0 { + q += fmt.Sprintf(" LIMIT %d OFFSET %d", limit, offset) + } + rows, err := c.db.Query(q) + if err != nil { + return nil, fmt.Errorf("cache: list: %w", err) + } + defer rows.Close() + var out []ListEntry + for rows.Next() { + var e ListEntry + if err := rows.Scan(&e.ContentHash, &e.Path, &e.Language, &e.ParsedAt, &e.NodeCount, &e.EdgeCount); err != nil { + return nil, fmt.Errorf("cache: scan: %w", err) + } + out = append(out, e) + } + return out, rows.Err() +} + +// Clear truncates every row from files / nodes / edges. The cache_meta +// row (cache version) is preserved so re-opens don't trigger a version +// mismatch. Returns the number of rows deleted from `files` so callers +// can report progress. +func (c *Cache) Clear() (int64, error) { + tx, err := c.db.Begin() + if err != nil { + return 0, err + } + defer tx.Rollback() + if _, err := tx.Exec(`DELETE FROM edges`); err != nil { + return 0, err + } + if _, err := tx.Exec(`DELETE FROM nodes`); err != nil { + return 0, err + } + res, err := tx.Exec(`DELETE FROM files`) + if err != nil { + return 0, err + } + if _, err := tx.Exec(`DELETE FROM analysis_runs`); err != nil { + return 0, err + } + n, err := res.RowsAffected() + if err != nil { + return 0, err + } + if err := tx.Commit(); err != nil { + return 0, err + } + return n, nil +} + +// LookupByHashOrPath resolves a query against the cache: tries content +// hash first, then file path (full match), then file path suffix. Returns +// the full Entry. When no match is found, returns (nil, sql.ErrNoRows) +// so callers can detect not-found explicitly. +func (c *Cache) LookupByHashOrPath(query string) (*Entry, error) { + // 1. Exact content hash. + if c.Has(query) { + return c.Get(query) + } + // 2. Exact path. + var hash string + err := c.db.QueryRow(`SELECT content_hash FROM files WHERE path = ? LIMIT 1`, query).Scan(&hash) + if err == nil { + return c.Get(hash) + } + if err != sql.ErrNoRows { + return nil, err + } + // 3. Path suffix (handy when callers pass a relative path). + err = c.db.QueryRow(`SELECT content_hash FROM files WHERE path LIKE ? ORDER BY path LIMIT 1`, + "%"+query).Scan(&hash) + if err == nil { + return c.Get(hash) + } + return nil, sql.ErrNoRows +} diff --git a/go/internal/cache/schema.go b/go/internal/cache/schema.go new file mode 100644 index 00000000..5f0ec3a9 --- /dev/null +++ b/go/internal/cache/schema.go @@ -0,0 +1,67 @@ +package cache + +// CacheVersion is bumped whenever the hash algorithm, schema, or any field +// shape changes. Java side is currently version 5. Go side starts at 6 to +// force a rebuild on first run. +const CacheVersion = 6 + +// schemaDDL mirrors Java AnalysisCache SCHEMA_SQL, ported from H2 to SQLite. +// Differences: +// - H2 BIGINT AUTO_INCREMENT → SQLite INTEGER PRIMARY KEY AUTOINCREMENT +// - H2 VARCHAR (unbounded) → SQLite TEXT +// - H2 INTEGER → SQLite INTEGER +// - "key" / "value" reserved-word workaround stays as meta_key/meta_value +// even though SQLite doesn't reserve them — keeps parity dumps identical. +const schemaDDL = ` +CREATE TABLE IF NOT EXISTS cache_meta ( + meta_key TEXT PRIMARY KEY, + meta_value TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS files ( + content_hash TEXT PRIMARY KEY, + path TEXT NOT NULL, + language TEXT NOT NULL, + parsed_at TEXT NOT NULL, + status TEXT DEFAULT 'DETECTED', + detection_method TEXT DEFAULT 'tree-sitter', + file_type TEXT DEFAULT 'source', + snippet TEXT +); + +CREATE TABLE IF NOT EXISTS nodes ( + row_id INTEGER PRIMARY KEY AUTOINCREMENT, + id TEXT NOT NULL, + content_hash TEXT NOT NULL, + kind TEXT NOT NULL, + data TEXT NOT NULL, + FOREIGN KEY (content_hash) REFERENCES files(content_hash) +); + +CREATE TABLE IF NOT EXISTS edges ( + source TEXT NOT NULL, + target TEXT NOT NULL, + content_hash TEXT NOT NULL, + kind TEXT NOT NULL, + data TEXT NOT NULL +); + +CREATE TABLE IF NOT EXISTS analysis_runs ( + run_id TEXT PRIMARY KEY, + commit_sha TEXT, + timestamp TEXT NOT NULL, + file_count INTEGER NOT NULL +); + +CREATE INDEX IF NOT EXISTS idx_nodes_content_hash ON nodes(content_hash); +CREATE INDEX IF NOT EXISTS idx_edges_content_hash ON edges(content_hash); +CREATE INDEX IF NOT EXISTS idx_analysis_runs_timestamp ON analysis_runs(timestamp); +` + +// pragmasDDL is applied at open time for WAL mode + sane defaults. +const pragmasDDL = ` +PRAGMA journal_mode = WAL; +PRAGMA synchronous = NORMAL; +PRAGMA foreign_keys = ON; +PRAGMA busy_timeout = 5000; +` diff --git a/go/internal/cache/schema_test.go b/go/internal/cache/schema_test.go new file mode 100644 index 00000000..9b2d4af9 --- /dev/null +++ b/go/internal/cache/schema_test.go @@ -0,0 +1,95 @@ +package cache + +import ( + "database/sql" + "path/filepath" + "strings" + "testing" + + _ "github.com/mattn/go-sqlite3" +) + +func TestCacheVersionConstant(t *testing.T) { + if CacheVersion != 6 { + t.Fatalf("CacheVersion = %d, want 6 (Java is 5; Go starts at 6 to force rebuild)", CacheVersion) + } +} + +func TestSchemaDDLContainsExpectedTables(t *testing.T) { + wantTables := []string{ + "cache_meta", + "files", + "nodes", + "edges", + "analysis_runs", + } + for _, tbl := range wantTables { + if !strings.Contains(schemaDDL, "CREATE TABLE IF NOT EXISTS "+tbl) { + t.Errorf("schemaDDL missing CREATE TABLE for %q", tbl) + } + } +} + +func TestSchemaDDLPreservesH2ReservedWordWorkaround(t *testing.T) { + // Parity with Java AnalysisCache — meta_key / meta_value (not key/value). + if !strings.Contains(schemaDDL, "meta_key") { + t.Error("schemaDDL must use meta_key (H2 reserved-word workaround, kept for parity)") + } + if !strings.Contains(schemaDDL, "meta_value") { + t.Error("schemaDDL must use meta_value (H2 reserved-word workaround, kept for parity)") + } +} + +func TestPragmasDDLEnablesWAL(t *testing.T) { + wantPragmas := []string{ + "journal_mode = WAL", + "synchronous = NORMAL", + "foreign_keys = ON", + "busy_timeout = 5000", + } + for _, p := range wantPragmas { + if !strings.Contains(pragmasDDL, p) { + t.Errorf("pragmasDDL missing %q", p) + } + } +} + +func TestSchemaDDLAppliesCleanlyToSQLite(t *testing.T) { + // The real contract: SQLite must accept the DDL as-is. This catches + // H2-isms (AUTO_INCREMENT vs AUTOINCREMENT, VARCHAR-without-length, etc.). + dbPath := filepath.Join(t.TempDir(), "schema.db") + db, err := sql.Open("sqlite3", dbPath) + if err != nil { + t.Fatalf("open sqlite: %v", err) + } + defer db.Close() + + if _, err := db.Exec(schemaDDL); err != nil { + t.Fatalf("schemaDDL failed to apply: %v", err) + } + + // Sanity: all five tables and three indexes must exist. + wantObjects := map[string]string{ + "cache_meta": "table", + "files": "table", + "nodes": "table", + "edges": "table", + "analysis_runs": "table", + "idx_nodes_content_hash": "index", + "idx_edges_content_hash": "index", + "idx_analysis_runs_timestamp": "index", + } + for name, typ := range wantObjects { + var got string + err := db.QueryRow( + `SELECT type FROM sqlite_master WHERE name = ?`, name, + ).Scan(&got) + if err != nil { + t.Errorf("missing %s %q: %v", typ, name, err) + continue + } + if got != typ { + t.Errorf("object %q has type %q, want %q", name, got, typ) + } + } +} diff --git a/go/internal/cli/cache.go b/go/internal/cli/cache.go new file mode 100644 index 00000000..83168a62 --- /dev/null +++ b/go/internal/cli/cache.go @@ -0,0 +1,263 @@ +package cli + +import ( + "database/sql" + "encoding/json" + "fmt" + "io" + "path/filepath" + "strings" + "text/tabwriter" + + "github.com/randomcodespace/codeiq/go/internal/cache" + "github.com/spf13/cobra" +) + +func init() { + registerSubcommand(newCacheCommand) +} + +// newCacheCommand assembles `codeiq cache` and its four subcommands — +// `info`, `list`, `inspect`, `clear`. The parent prints help with no args. +func newCacheCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "cache ", + Short: "Inspect or manage the analysis cache (SQLite).", + Long: `Inspect or manage the SQLite analysis cache that ` + "`codeiq index`" + ` +writes to. The cache is keyed by SHA-256 content hash so subsequent runs +reuse detector results for unchanged files. + +Subcommands: + info Print row counts, version, and on-disk size. + list Page through cached file entries. + inspect Print the deserialised nodes + edges for one entry. + clear Wipe every file / node / edge row (preserves the version).`, + Example: ` codeiq cache info + codeiq cache list --limit 20 + codeiq cache inspect path/to/UserController.java + codeiq cache clear --yes`, + RunE: func(c *cobra.Command, _ []string) error { return c.Help() }, + } + cmd.AddCommand(newCacheInfoCommand()) + cmd.AddCommand(newCacheListCommand()) + cmd.AddCommand(newCacheInspectCommand()) + cmd.AddCommand(newCacheClearCommand()) + return cmd +} + +func newCacheInfoCommand() *cobra.Command { + var cachePath string + cmd := &cobra.Command{ + Use: "info [path]", + Short: "Print summary stats about the analysis cache.", + Long: `Print row counts, cache version, and on-disk size of the +SQLite analysis cache. Use ` + "`--cache-path`" + ` to point at a different +file (default: /.codeiq/cache/codeiq.sqlite).`, + Example: ` codeiq cache info + codeiq cache info /repo + codeiq cache info --cache-path /tmp/scratch.sqlite`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + dbPath, err := resolveCachePath(args, cachePath) + if err != nil { + return err + } + c, err := cache.Open(dbPath) + if err != nil { + return fmt.Errorf("open cache %s: %w", dbPath, err) + } + defer c.Close() + stats, err := c.Stats() + if err != nil { + return err + } + stats.SizeBytes = cache.FileSize(dbPath) + out := map[string]any{ + "path": dbPath, + "size_bytes": stats.SizeBytes, + "version": stats.Version, + "file_count": stats.FileCount, + "node_count": stats.NodeCount, + "edge_count": stats.EdgeCount, + } + return jsonOut(cmd.OutOrStdout(), out) + }, + } + cmd.Flags().StringVar(&cachePath, "cache-path", "", + "Path to the SQLite cache file (default: /.codeiq/cache/codeiq.sqlite).") + return cmd +} + +func newCacheListCommand() *cobra.Command { + var ( + cachePath string + limit int + offset int + asJSON bool + ) + cmd := &cobra.Command{ + Use: "list [path]", + Short: "Page through cached file entries.", + Long: `Page through cached file entries ordered by path. Default +output is a tab-aligned table; pass ` + "`--json`" + ` for a machine-parseable +JSON array.`, + Example: ` codeiq cache list + codeiq cache list --limit 20 + codeiq cache list --json --limit 5`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + dbPath, err := resolveCachePath(args, cachePath) + if err != nil { + return err + } + c, err := cache.Open(dbPath) + if err != nil { + return fmt.Errorf("open cache %s: %w", dbPath, err) + } + defer c.Close() + entries, err := c.List(limit, offset) + if err != nil { + return err + } + if asJSON { + return jsonOut(cmd.OutOrStdout(), entries) + } + return printCacheListTable(cmd.OutOrStdout(), entries) + }, + } + cmd.Flags().StringVar(&cachePath, "cache-path", "", + "Path to the SQLite cache file (default: /.codeiq/cache/codeiq.sqlite).") + cmd.Flags().IntVar(&limit, "limit", 100, + "Maximum number of entries to return (default: 100, 0 for unlimited).") + cmd.Flags().IntVar(&offset, "offset", 0, + "Skip the first N entries (default: 0).") + cmd.Flags().BoolVar(&asJSON, "json", false, + "Emit entries as a JSON array instead of a table.") + return cmd +} + +func newCacheInspectCommand() *cobra.Command { + var cachePath string + cmd := &cobra.Command{ + Use: "inspect [path]", + Short: "Print the deserialised nodes/edges for one cached entry.", + Long: `Print the cached entry for the given content hash or file +path. The lookup tries (in order): exact content hash, exact file path, +then path-suffix match — useful when you only remember the relative path.`, + Example: ` codeiq cache inspect path/to/User.java + codeiq cache inspect abc123def456... + codeiq cache inspect User.java /repo`, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + query := args[0] + dbPath, err := resolveCachePath(args[1:], cachePath) + if err != nil { + return err + } + c, err := cache.Open(dbPath) + if err != nil { + return fmt.Errorf("open cache %s: %w", dbPath, err) + } + defer c.Close() + entry, err := c.LookupByHashOrPath(query) + if err != nil { + if err == sql.ErrNoRows { + return fmt.Errorf("no cache entry matched %q", query) + } + return err + } + return jsonOut(cmd.OutOrStdout(), entry) + }, + } + cmd.Flags().StringVar(&cachePath, "cache-path", "", + "Path to the SQLite cache file (default: /.codeiq/cache/codeiq.sqlite).") + return cmd +} + +func newCacheClearCommand() *cobra.Command { + var ( + cachePath string + yes bool + ) + cmd := &cobra.Command{ + Use: "clear [path]", + Short: "Wipe every cached file / node / edge entry.", + Long: `Remove every cached row from files / nodes / edges / +analysis_runs. The cache version is preserved so the next ` + "`codeiq index`" + ` +does not trigger a version-mismatch rebuild prompt. + +This is a destructive operation. ` + "`--yes`" + ` is required to confirm — +no interactive prompt; CI-friendly.`, + Example: ` codeiq cache clear --yes + codeiq cache clear --yes /repo + codeiq cache clear --yes --cache-path /tmp/scratch.sqlite`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + if !yes { + return newUsageError("cache clear is destructive; re-run with --yes to confirm") + } + dbPath, err := resolveCachePath(args, cachePath) + if err != nil { + return err + } + c, err := cache.Open(dbPath) + if err != nil { + return fmt.Errorf("open cache %s: %w", dbPath, err) + } + defer c.Close() + n, err := c.Clear() + if err != nil { + return err + } + fmt.Fprintf(cmd.OutOrStdout(), "cleared %d cache entries from %s\n", n, dbPath) + return nil + }, + } + cmd.Flags().StringVar(&cachePath, "cache-path", "", + "Path to the SQLite cache file (default: /.codeiq/cache/codeiq.sqlite).") + cmd.Flags().BoolVar(&yes, "yes", false, + "Confirm the destructive operation (required for clear to proceed).") + return cmd +} + +// --- helpers --- + +// resolveCachePath returns the SQLite cache path. Explicit --cache-path +// wins; otherwise the standard /.codeiq/cache/codeiq.sqlite. +func resolveCachePath(args []string, override string) (string, error) { + if override != "" { + return override, nil + } + root, err := resolvePath(args) + if err != nil { + return "", err + } + return filepath.Join(root, ".codeiq", "cache", "codeiq.sqlite"), nil +} + +// jsonOut writes v as indented JSON to w with a trailing newline. +func jsonOut(w io.Writer, v any) error { + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + return enc.Encode(v) +} + +// printCacheListTable renders cache entries as a column-aligned table. +func printCacheListTable(w io.Writer, entries []cache.ListEntry) error { + tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0) + fmt.Fprintln(tw, "PATH\tLANGUAGE\tNODES\tEDGES\tHASH") + for _, e := range entries { + fmt.Fprintf(tw, "%s\t%s\t%d\t%d\t%s\n", + e.Path, e.Language, e.NodeCount, e.EdgeCount, truncateHash(e.ContentHash)) + } + return tw.Flush() +} + +// truncateHash returns the first 12 chars of a hash for compact rendering. +func truncateHash(h string) string { + if len(h) <= 12 { + return h + } + return strings.ToLower(h[:12]) +} + diff --git a/go/internal/cli/cache_test.go b/go/internal/cli/cache_test.go new file mode 100644 index 00000000..8eff4d75 --- /dev/null +++ b/go/internal/cli/cache_test.go @@ -0,0 +1,181 @@ +package cli + +import ( + "bytes" + "encoding/json" + "path/filepath" + "strings" + "testing" +) + +// TestCacheInfoCommand asserts `codeiq cache info` prints all canonical +// summary keys (path, size_bytes, version, file_count, node_count, +// edge_count). +func TestCacheInfoCommand(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "cache", "info", + "--cache-path", filepath.Join(dir, "cache.sqlite"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("cache info: %v\n%s", err, out.String()) + } + var got map[string]any + if err := json.Unmarshal(out.Bytes(), &got); err != nil { + t.Fatalf("cache info JSON invalid: %v\n%s", err, out.String()) + } + for _, k := range []string{"path", "size_bytes", "version", "file_count", "node_count", "edge_count"} { + if _, ok := got[k]; !ok { + t.Errorf("cache info missing %q", k) + } + } +} + +// TestCacheListCommandTable asserts the default table output begins with +// the PATH / LANGUAGE / NODES / EDGES / HASH column header. +func TestCacheListCommandTable(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "cache", "list", + "--cache-path", filepath.Join(dir, "cache.sqlite"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("cache list: %v\n%s", err, out.String()) + } + if !strings.Contains(out.String(), "PATH") { + t.Fatalf("cache list missing PATH column header:\n%s", out.String()) + } + if !strings.Contains(out.String(), "LANGUAGE") { + t.Errorf("cache list missing LANGUAGE column:\n%s", out.String()) + } +} + +// TestCacheListCommandJSON asserts the --json flag produces a JSON array. +func TestCacheListCommandJSON(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "cache", "list", "--json", + "--cache-path", filepath.Join(dir, "cache.sqlite"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("cache list --json: %v\n%s", err, out.String()) + } + var arr []any + if err := json.Unmarshal(out.Bytes(), &arr); err != nil { + t.Fatalf("cache list --json invalid JSON: %v\n%s", err, out.String()) + } +} + +// TestCacheClearRequiresYes asserts the clear subcommand bails without +// `--yes`. +func TestCacheClearRequiresYes(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "cache", "clear", + "--cache-path", filepath.Join(dir, "cache.sqlite"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err == nil { + t.Fatalf("expected error without --yes, got success:\n%s", out.String()) + } else if !strings.Contains(err.Error(), "yes") { + t.Errorf("error must mention --yes: %v", err) + } +} + +// TestCacheClearWipesEntries asserts `codeiq cache clear --yes` empties +// the entries table. +func TestCacheClearWipesEntries(t *testing.T) { + dir := statsFixtureDir(t) + // Sanity: pre-clear there is at least one entry. + root := NewRootCommand() + root.SetArgs([]string{ + "cache", "list", "--json", + "--cache-path", filepath.Join(dir, "cache.sqlite"), + dir, + }) + var listOut bytes.Buffer + root.SetOut(&listOut) + root.SetErr(&listOut) + if err := root.Execute(); err != nil { + t.Fatalf("pre-clear list: %v", err) + } + + // Clear. + root = NewRootCommand() + root.SetArgs([]string{ + "cache", "clear", "--yes", + "--cache-path", filepath.Join(dir, "cache.sqlite"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("cache clear: %v\n%s", err, out.String()) + } + if !strings.Contains(out.String(), "cleared") { + t.Errorf("clear output must mention `cleared`: %s", out.String()) + } + + // Post-clear: list must be empty. + root = NewRootCommand() + root.SetArgs([]string{ + "cache", "list", "--json", + "--cache-path", filepath.Join(dir, "cache.sqlite"), + dir, + }) + var afterList bytes.Buffer + root.SetOut(&afterList) + root.SetErr(&afterList) + if err := root.Execute(); err != nil { + t.Fatalf("post-clear list: %v", err) + } + trimmed := strings.TrimSpace(afterList.String()) + if trimmed != "null" && trimmed != "[]" { + t.Errorf("expected empty list after clear, got: %q", trimmed) + } +} + +// TestCacheInspectByPath asserts a cache.inspect call against a relative +// path returns a non-empty entry. +func TestCacheInspectByPath(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "cache", "inspect", "User.java", + "--cache-path", filepath.Join(dir, "cache.sqlite"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("cache inspect: %v\n%s", err, out.String()) + } + var got map[string]any + if err := json.Unmarshal(out.Bytes(), &got); err != nil { + t.Fatalf("cache inspect JSON invalid: %v\n%s", err, out.String()) + } + if got["ContentHash"] == "" && got["content_hash"] == "" { + t.Errorf("cache inspect missing ContentHash:\n%s", out.String()) + } +} diff --git a/go/internal/cli/cypher.go b/go/internal/cli/cypher.go new file mode 100644 index 00000000..2e97a664 --- /dev/null +++ b/go/internal/cli/cypher.go @@ -0,0 +1,137 @@ +package cli + +import ( + "encoding/json" + "fmt" + "io" + "path/filepath" + "sort" + "strings" + "text/tabwriter" + "time" + + "github.com/randomcodespace/codeiq/go/internal/graph" + "github.com/spf13/cobra" +) + +func init() { + registerSubcommand(newCypherCommand) +} + +// newCypherCommand assembles `codeiq cypher` — the actually-implemented Go +// port of `cypher` (the Java side is a stub since commit 81b645c). Runs a +// read-only Cypher query against the Kuzu store and prints rows as JSON +// (default) or a column-aligned table. +// +// Per the read-only contract, mutation keywords (CREATE / DELETE / SET / +// MERGE / DROP / CALL non-readonly-procs) are rejected before execution +// by the OpenReadOnly + MutationKeyword gate in internal/graph. +func newCypherCommand() *cobra.Command { + var ( + graphDir string + asTable bool + maxResults int + queryTimeout time.Duration + ) + cmd := &cobra.Command{ + Use: "cypher [path]", + Short: "Execute a raw read-only Cypher query against the Kuzu graph.", + Long: `Execute a single read-only Cypher query against the Kuzu graph and +print the result rows to stdout as JSON (default) or a column-aligned table. + +The Kuzu store is opened read-only. Mutation keywords (CREATE, DELETE, +SET, MERGE, REMOVE, DETACH, DROP, FOREACH, LOAD CSV, COPY, and CALL of +non-readonly procedures) are rejected before execution. Result rows are +capped at --max-results; the response carries a "truncated" flag when +the cap is hit so the caller can re-run with a tighter query. + +Note: the Java side ` + "`cypher`" + ` command has been a stub since commit +81b645c — the Go port actually wires this through to graph.CypherRows().`, + Example: ` codeiq cypher "MATCH (n) RETURN count(n) AS c" + codeiq cypher "MATCH (n:CodeNode) RETURN n.label LIMIT 5" --table + codeiq cypher 'MATCH (n) RETURN n.kind, count(n) ORDER BY count(n) DESC' --max-results 50`, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + query := args[0] + root, err := resolvePath(args[1:]) + if err != nil { + return err + } + gdir := graphDir + if gdir == "" { + gdir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + // Cheap early-out: surface the blocked keyword before opening + // Kuzu so the read-only gate's error message reaches the user + // quickly. The graph layer will re-check after open. + if kw := graph.MutationKeyword(query); kw != "" { + return fmt.Errorf("cypher: read-only queries only (blocked keyword: %s)", kw) + } + store, err := graph.OpenReadOnly(gdir, queryTimeout) + if err != nil { + return fmt.Errorf("open graph %s: %w", gdir, err) + } + defer store.Close() + + rows, truncated, err := store.CypherRows(query, nil, maxResults) + if err != nil { + return err + } + if asTable { + return printCypherTable(cmd.OutOrStdout(), rows) + } + out := map[string]any{ + "rows": rows, + "count": len(rows), + } + if truncated { + out["truncated"] = true + out["max_results"] = maxResults + } + enc := json.NewEncoder(cmd.OutOrStdout()) + enc.SetIndent("", " ") + return enc.Encode(out) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + cmd.Flags().BoolVar(&asTable, "table", false, + "Render rows as a column-aligned table instead of JSON.") + cmd.Flags().IntVar(&maxResults, "max-results", 500, + "Maximum number of result rows to return (default: 500).") + cmd.Flags().DurationVar(&queryTimeout, "query-timeout", graph.DefaultQueryTimeout, + "Per-query wall-clock timeout (default: 30s).") + return cmd +} + +// printCypherTable renders rows as a column-aligned table using +// text/tabwriter. Column order is taken from the first row; missing cells +// in subsequent rows render as empty strings. Empty input is a no-op. +func printCypherTable(w io.Writer, rows []map[string]any) error { + if len(rows) == 0 { + return nil + } + // Stable column order: the union of all row keys, sorted. + keySet := make(map[string]struct{}) + for _, r := range rows { + for k := range r { + keySet[k] = struct{}{} + } + } + cols := make([]string, 0, len(keySet)) + for k := range keySet { + cols = append(cols, k) + } + sort.Strings(cols) + + tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0) + fmt.Fprintln(tw, strings.Join(cols, "\t")) + for _, r := range rows { + cells := make([]string, len(cols)) + for i, c := range cols { + cells[i] = fmt.Sprintf("%v", r[c]) + } + fmt.Fprintln(tw, strings.Join(cells, "\t")) + } + return tw.Flush() +} diff --git a/go/internal/cli/cypher_test.go b/go/internal/cli/cypher_test.go new file mode 100644 index 00000000..a99ff7ac --- /dev/null +++ b/go/internal/cli/cypher_test.go @@ -0,0 +1,97 @@ +package cli + +import ( + "bytes" + "encoding/json" + "path/filepath" + "strings" + "testing" +) + +// TestCypherCommandJSONOutput asserts `codeiq cypher "MATCH (n) RETURN +// count(n) AS c"` emits a JSON object with a `rows` array containing the +// node count. +func TestCypherCommandJSONOutput(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "cypher", + "MATCH (n:CodeNode) RETURN count(n) AS c", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("cypher: %v\n%s", err, out.String()) + } + var got map[string]any + if err := json.Unmarshal(out.Bytes(), &got); err != nil { + t.Fatalf("cypher output is not valid JSON: %v\n%s", err, out.String()) + } + rows, ok := got["rows"].([]any) + if !ok { + t.Fatalf("cypher output missing `rows` array: %s", out.String()) + } + if len(rows) == 0 { + t.Fatalf("expected at least one row, got %d", len(rows)) + } + first, ok := rows[0].(map[string]any) + if !ok { + t.Fatalf("first row not a map: %v", rows[0]) + } + if _, ok := first["c"]; !ok { + t.Fatalf("first row missing `c` column: %v", first) + } +} + +// TestCypherCommandRejectsMutation asserts a CREATE statement is rejected +// at the mutation gate before reaching Kuzu. +func TestCypherCommandRejectsMutation(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "cypher", + "CREATE (:CodeNode {id: 'x'})", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + err := root.Execute() + if err == nil { + t.Fatalf("expected mutation rejection, got success:\n%s", out.String()) + } + if !strings.Contains(err.Error(), "read-only") && !strings.Contains(err.Error(), "CREATE") { + t.Fatalf("error must mention read-only / CREATE: %v", err) + } +} + +// TestCypherCommandTable asserts the --table flag renders an aligned table +// with the column header on the first line. +func TestCypherCommandTable(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "cypher", + "MATCH (n:CodeNode) RETURN count(n) AS c", + "--table", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("cypher: %v\n%s", err, out.String()) + } + lines := strings.Split(strings.TrimSpace(out.String()), "\n") + if len(lines) < 2 { + t.Fatalf("expected header + at least one row, got:\n%s", out.String()) + } + if !strings.Contains(lines[0], "c") { + t.Errorf("first line must contain column header `c`, got %q", lines[0]) + } +} diff --git a/go/internal/cli/detectors_register.go b/go/internal/cli/detectors_register.go new file mode 100644 index 00000000..56c1e359 --- /dev/null +++ b/go/internal/cli/detectors_register.go @@ -0,0 +1,28 @@ +package cli + +// Side-effect imports: each detector package's init() registers itself with +// the process-wide Default registry. Without these imports the linker would +// drop the packages and the CLI binary would ship with the registry empty. +// +// Keep this list flat (leaf packages only) and exhaustive — any detector +// package added under internal/detector/ must land here too. +import ( + _ "github.com/randomcodespace/codeiq/go/internal/detector/auth" + _ "github.com/randomcodespace/codeiq/go/internal/detector/csharp" + _ "github.com/randomcodespace/codeiq/go/internal/detector/frontend" + _ "github.com/randomcodespace/codeiq/go/internal/detector/generic" + _ "github.com/randomcodespace/codeiq/go/internal/detector/golang" + _ "github.com/randomcodespace/codeiq/go/internal/detector/iac" + _ "github.com/randomcodespace/codeiq/go/internal/detector/jvm/java" + _ "github.com/randomcodespace/codeiq/go/internal/detector/jvm/kotlin" + _ "github.com/randomcodespace/codeiq/go/internal/detector/jvm/scala" + _ "github.com/randomcodespace/codeiq/go/internal/detector/markup" + _ "github.com/randomcodespace/codeiq/go/internal/detector/proto" + _ "github.com/randomcodespace/codeiq/go/internal/detector/python" + _ "github.com/randomcodespace/codeiq/go/internal/detector/script/shell" + _ "github.com/randomcodespace/codeiq/go/internal/detector/sql" + _ "github.com/randomcodespace/codeiq/go/internal/detector/structured" + _ "github.com/randomcodespace/codeiq/go/internal/detector/systems/cpp" + _ "github.com/randomcodespace/codeiq/go/internal/detector/systems/rust" + _ "github.com/randomcodespace/codeiq/go/internal/detector/typescript" +) diff --git a/go/internal/cli/docs_test.go b/go/internal/cli/docs_test.go new file mode 100644 index 00000000..e029f282 --- /dev/null +++ b/go/internal/cli/docs_test.go @@ -0,0 +1,68 @@ +package cli + +import ( + "strings" + "testing" + + "github.com/spf13/cobra" + "github.com/spf13/pflag" +) + +// TestEverySubcommandIsDocumented asserts the §7.1 contract: every Cobra +// subcommand (including nested subcommands like `query consumers`) has Use, +// Short, Long, Example, and RunE populated; every flag has Usage text. A +// subcommand or flag that lacks docs fails the build. +func TestEverySubcommandIsDocumented(t *testing.T) { + root := NewRootCommand() + var walk func(parent string, cmd *cobra.Command) + walk = func(parent string, cmd *cobra.Command) { + // Skip Cobra auto-generated children (help / completion). + if cmd.Hidden || cmd.Name() == "help" || cmd.Name() == "completion" { + return + } + full := cmd.Name() + if parent != "" { + full = parent + " " + full + } + if cmd.Use == "" { + t.Errorf("%s: Use is empty", full) + } + if cmd.Short == "" { + t.Errorf("%s: Short is empty", full) + } + if cmd.Long == "" { + t.Errorf("%s: Long is empty", full) + } + if cmd.Example == "" { + t.Errorf("%s: Example is empty", full) + } else if lines := strings.Split(cmd.Example, "\n"); len(lines) < 3 { + t.Errorf("%s: Example must have >= 3 lines, got %d", full, len(lines)) + } + if cmd.RunE == nil { + t.Errorf("%s: must use RunE (returns error), not Run", full) + } + cmd.Flags().VisitAll(func(f *pflag.Flag) { + if f.Usage == "" { + t.Errorf("%s --%s: Usage is empty", full, f.Name) + } + }) + for _, child := range cmd.Commands() { + walk(full, child) + } + } + for _, cmd := range root.Commands() { + walk("", cmd) + } +} + +// TestRootCommandPersistentFlagsDocumented ensures the global flags themselves +// are documented — they're inherited by every subcommand so a missing Usage +// there pollutes every help screen. +func TestRootCommandPersistentFlagsDocumented(t *testing.T) { + root := NewRootCommand() + root.PersistentFlags().VisitAll(func(f *pflag.Flag) { + if f.Usage == "" { + t.Errorf("persistent flag --%s: Usage is empty", f.Name) + } + }) +} diff --git a/go/internal/cli/enrich.go b/go/internal/cli/enrich.go new file mode 100644 index 00000000..4aba1aac --- /dev/null +++ b/go/internal/cli/enrich.go @@ -0,0 +1,65 @@ +package cli + +import ( + "fmt" + "path/filepath" + + "github.com/randomcodespace/codeiq/go/internal/analyzer" + "github.com/randomcodespace/codeiq/go/internal/cache" + "github.com/spf13/cobra" +) + +func init() { + registerSubcommand(func() *cobra.Command { + var graphDir string + cmd := &cobra.Command{ + Use: "enrich [path]", + Short: "Load the SQLite cache into Kuzu and run linkers, classifiers, intelligence.", + Long: `Enrich the analysis cache into a Kuzu graph store. + +Reads the SQLite cache previously written by ` + "`codeiq index`" + ` and runs +the in-memory enrichment passes -- linkers (TopicLinker, EntityLinker, +ModuleContainmentLinker), the layer classifier, the lexical enricher +(doc comments + config keys), per-language extractors (Java, TypeScript, +Python, Go), and the filesystem-driven service detector. The resulting +node + edge set is bulk-loaded into a Kuzu database at +` + "`.codeiq/graph/codeiq.kuzu/`" + ` and indexed for fast read queries. + +This is the second step of the pipeline ` + "`index -> enrich -> mcp`" + `. +After enrich, read-side commands (` + "`stats`, `query`, `find`, `topology`" + `) +become available and the stdio MCP server can serve clients.`, + Example: ` # Enrich the current directory using the cache written by index + codeiq enrich . + + # Override the graph output directory (handy for staging migrations) + codeiq enrich --graph-dir /tmp/scratch.kuzu /repo + + # Typical pipeline + codeiq index /repo && codeiq enrich /repo && codeiq stats /repo`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + root, err := resolvePath(args) + if err != nil { + return err + } + cachePath := filepath.Join(root, ".codeiq", "cache", "codeiq.sqlite") + c, err := cache.Open(cachePath) + if err != nil { + return fmt.Errorf("open cache %s: %w", cachePath, err) + } + defer c.Close() + summary, err := analyzer.Enrich(root, c, analyzer.EnrichOptions{GraphDir: graphDir}) + if err != nil { + return err + } + fmt.Fprintf(cmd.OutOrStdout(), + "enrich complete: %d nodes, %d edges, %d services\n", + summary.Nodes, summary.Edges, summary.Services) + return nil + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Output directory for the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + return cmd + }) +} diff --git a/go/internal/cli/find.go b/go/internal/cli/find.go new file mode 100644 index 00000000..68b9ae19 --- /dev/null +++ b/go/internal/cli/find.go @@ -0,0 +1,154 @@ +package cli + +import ( + "fmt" + "path/filepath" + + "github.com/randomcodespace/codeiq/go/internal/graph" + "github.com/spf13/cobra" +) + +func init() { + registerSubcommand(newFindCommand) +} + +// findKindSpec is one row of the finder-subcommand table: a sub-name, the +// NodeKind it filters on, plus the short / long doc strings. +type findKindSpec struct { + name, kind, short, long string +} + +// findKindSpecs is the table of preset finders. Order is preserved in `--help` +// output; new finders go to the bottom. +var findKindSpecs = []findKindSpec{ + { + "endpoints", "endpoint", + "List ENDPOINT nodes from the graph.", + `Return all REST / gRPC / messaging endpoint nodes from the enriched +graph, paginated. Endpoints are produced by detectors such as Spring REST, +Flask / FastAPI / Django routes, Express controllers, gRPC server stubs, and +the Kafka @KafkaListener family.`, + }, + { + "guards", "guard", + "List GUARD nodes (auth filters, route guards) from the graph.", + `Return all GUARD nodes from the enriched graph. Guards represent auth +filters / route guards / middleware-style gatekeepers — Spring Security +filters, FastAPI Depends, Angular route guards, etc.`, + }, + { + "entities", "entity", + "List ENTITY nodes (JPA / ORM entities) from the graph.", + `Return all persisted ENTITY nodes from the enriched graph. Entities +are produced by ORM detectors (JPA, EF Core, Django models, SQLAlchemy, +Sequelize, TypeORM, GORM, ...).`, + }, + { + "topics", "topic", + "List TOPIC nodes (Kafka, RabbitMQ, Redis Streams, ...) from the graph.", + `Return all messaging TOPIC nodes from the enriched graph. Topics are +emitted by messaging detectors — Kafka @KafkaListener / @SendTo, Spring +Cloud Stream bindings, NestJS @MessagePattern, Rust lapin queues, etc.`, + }, + { + "queues", "queue", + "List QUEUE nodes from the graph.", + `Return all messaging QUEUE nodes from the enriched graph. Queues are +detected separately from topics — JMS / SQS / RabbitMQ direct queues live +here, while pub-sub topics live under ` + "`find topics`" + `.`, + }, + { + "services", "service", + "List SERVICE nodes (module/service boundaries) from the graph.", + `Return all SERVICE nodes from the enriched graph. SERVICE nodes are +synthesised by ServiceDetector from build files (pom.xml, package.json, +Cargo.toml, ...) and represent module / service boundaries.`, + }, + { + "databases", "database_connection", + "List DATABASE_CONNECTION nodes from the graph.", + `Return all DATABASE_CONNECTION nodes from the enriched graph. These +are detected from JDBC URLs, application-yml datasource blocks, EF Core +DbContext configurations, Sequelize / TypeORM connection options, ...`, + }, + { + "components", "component", + "List COMPONENT nodes (frontend components) from the graph.", + `Return all frontend COMPONENT nodes from the enriched graph — +React / Vue / Angular / Svelte component declarations detected by the +frontend extractor family.`, + }, +} + +// newFindCommand assembles the `find` parent and one finder subcommand per +// entry in findKindSpecs. +func newFindCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "find [path]", + Short: "Preset finders for common node kinds (endpoints, guards, entities, ...).", + Long: `Preset finders return paginated lists of nodes of a given kind from +the enriched graph. Higher-level than ` + "`codeiq query`" + `, which operates on +individual node ids; ` + "`codeiq find`" + ` returns whole categories. + +Each finder accepts ` + "`--limit`" + ` / ` + "`--offset`" + ` for paging and produces +tab-separated ` + "`id\\tlabel`" + ` rows ordered by id.`, + Example: ` codeiq find endpoints + codeiq find entities --limit 50 + codeiq find services /repo --graph-dir /tmp/scratch.kuzu`, + RunE: func(c *cobra.Command, _ []string) error { return c.Help() }, + } + for _, spec := range findKindSpecs { + cmd.AddCommand(newFindKindCommand(spec)) + } + return cmd +} + +// newFindKindCommand returns one finder subcommand for the given spec. The +// shared body resolves the path / graph-dir, opens the store, calls +// FindByKindPaginated, and prints `id\tlabel` rows. +func newFindKindCommand(spec findKindSpec) *cobra.Command { + var ( + graphDir string + limit int + offset int + ) + cmd := &cobra.Command{ + Use: spec.name + " [path]", + Short: spec.short, + Long: spec.long, + Example: fmt.Sprintf(` codeiq find %s + codeiq find %s --limit 200 + codeiq find %s /repo`, spec.name, spec.name, spec.name), + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + root, err := resolvePath(args) + if err != nil { + return err + } + gdir := graphDir + if gdir == "" { + gdir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + store, err := graph.Open(gdir) + if err != nil { + return fmt.Errorf("open graph %s: %w", gdir, err) + } + defer store.Close() + nodes, err := store.FindByKindPaginated(spec.kind, offset, limit) + if err != nil { + return err + } + for _, n := range nodes { + fmt.Fprintf(cmd.OutOrStdout(), "%s\t%s\n", n.ID, n.Label) + } + return nil + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + cmd.Flags().IntVar(&limit, "limit", 100, + "Maximum number of rows to return (default: 100).") + cmd.Flags().IntVar(&offset, "offset", 0, + "Skip the first N rows (default: 0).") + return cmd +} diff --git a/go/internal/cli/find_test.go b/go/internal/cli/find_test.go new file mode 100644 index 00000000..c8048923 --- /dev/null +++ b/go/internal/cli/find_test.go @@ -0,0 +1,90 @@ +package cli + +import ( + "bytes" + "path/filepath" + "strings" + "testing" +) + +// TestFindSubcommandsRegistered runs each finder against fixture-minimal, +// asserts exit 0 and no panic. The fixture has 1 service / 2 endpoints / 1 +// entity (per the index of UserController + User + models.py) so each +// finder produces non-empty output for at least `endpoints` and `entities`. +func TestFindSubcommandsRegistered(t *testing.T) { + dir := statsFixtureDir(t) + cases := []struct { + sub string + want []string // labels that should appear; empty = any output OK + }{ + {"endpoints", nil}, + {"guards", nil}, + {"entities", nil}, + {"topics", nil}, + {"queues", nil}, + {"services", nil}, + {"databases", nil}, + {"components", nil}, + } + for _, tc := range cases { + t.Run(tc.sub, func(t *testing.T) { + root := NewRootCommand() + root.SetArgs([]string{ + "find", tc.sub, + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("find %s: %v\n%s", tc.sub, err, out.String()) + } + }) + } +} + +// TestFindEndpointsReturnsRows asserts that running `find endpoints` +// against fixture-minimal lists the controller endpoints — fixture-minimal +// has 3 GET/POST endpoints on /api/users. +func TestFindEndpointsReturnsRows(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "find", "endpoints", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("find endpoints: %v\n%s", err, out.String()) + } + // fixture-minimal has 5 endpoints (3 Java + 2 Python). Assert at least + // 3 tab-separated rows and that one of the controller methods appears + // in the output. + rows := strings.Split(strings.TrimSpace(out.String()), "\n") + if len(rows) < 3 { + t.Fatalf("find endpoints returned %d rows, want >=3:\n%s", len(rows), out.String()) + } + if !strings.Contains(out.String(), "createUser") { + t.Fatalf("find endpoints missing createUser:\n%s", out.String()) + } +} + +// TestFindParentHelp asserts that running `codeiq find` without a +// subcommand renders the help text. +func TestFindParentHelp(t *testing.T) { + root := NewRootCommand() + root.SetArgs([]string{"find"}) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("find parent: %v", err) + } + if !strings.Contains(out.String(), "Available Commands") { + t.Fatalf("find parent did not print help:\n%s", out.String()) + } +} diff --git a/go/internal/cli/flow.go b/go/internal/cli/flow.go new file mode 100644 index 00000000..526a7a7b --- /dev/null +++ b/go/internal/cli/flow.go @@ -0,0 +1,105 @@ +package cli + +import ( + "context" + "fmt" + "io" + "os" + "path/filepath" + "strings" + "time" + + "github.com/randomcodespace/codeiq/go/internal/flow" + "github.com/randomcodespace/codeiq/go/internal/graph" + "github.com/spf13/cobra" +) + +func init() { + registerSubcommand(newFlowCommand) +} + +// newFlowCommand assembles `codeiq flow` — generates an architecture flow +// diagram for one of the five canonical views. +func newFlowCommand() *cobra.Command { + var ( + graphDir string + format string + outPath string + queryTimeout time.Duration + ) + cmd := &cobra.Command{ + Use: "flow [path]", + Short: "Generate an architecture flow diagram (overview / ci / deploy / runtime / auth).", + Long: `Generate an architecture flow diagram for the analyzed codebase. + +Five views ship out of the box: + overview The high-level system view (CI + Infra + App + Security). + ci CI/CD pipeline detail (workflows, jobs, triggers). + deploy Deployment topology (K8s, Docker, Terraform). + runtime Runtime architecture grouped by layer. + auth Auth / security view with protection coverage. + +Output formats: json (default), mermaid, dot, yaml. Use --out to write to +a file instead of stdout. The renderer is deterministic — nodes within +each subgraph and edges are sorted by ID before emission.`, + Example: ` codeiq flow overview + codeiq flow runtime --format mermaid > runtime.mmd + codeiq flow auth --format dot --out auth.dot + codeiq flow deploy --format yaml /repo`, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + view := args[0] + if !flow.IsKnownView(view) { + return newUsageError( + "unknown view %q; valid: overview, ci, deploy, runtime, auth", view) + } + format = strings.ToLower(strings.TrimSpace(format)) + root, err := resolvePath(args[1:]) + if err != nil { + return err + } + gdir := graphDir + if gdir == "" { + gdir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + store, err := graph.OpenReadOnly(gdir, queryTimeout) + if err != nil { + return fmt.Errorf("open graph %s: %w", gdir, err) + } + defer store.Close() + + engine := flow.NewEngine(store) + diag, err := engine.Generate(context.Background(), flow.View(view)) + if err != nil { + return err + } + rendered, err := flow.Render(diag, format) + if err != nil { + return err + } + return writeFlowOutput(cmd.OutOrStdout(), rendered, outPath) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + cmd.Flags().StringVar(&format, "format", "json", + "Output format: json, mermaid, dot, yaml.") + cmd.Flags().StringVar(&outPath, "out", "", + "Write the rendered diagram to this file instead of stdout.") + cmd.Flags().DurationVar(&queryTimeout, "query-timeout", graph.DefaultQueryTimeout, + "Per-query wall-clock timeout (default: 30s).") + return cmd +} + +// writeFlowOutput emits content to outPath (when non-empty) or to w. +// Always terminates with a trailing newline if the content lacks one. +func writeFlowOutput(w io.Writer, content, outPath string) error { + if !strings.HasSuffix(content, "\n") { + content += "\n" + } + if outPath == "" { + _, err := io.WriteString(w, content) + return err + } + return os.WriteFile(outPath, []byte(content), 0o644) +} diff --git a/go/internal/cli/flow_test.go b/go/internal/cli/flow_test.go new file mode 100644 index 00000000..6ffc3f5f --- /dev/null +++ b/go/internal/cli/flow_test.go @@ -0,0 +1,98 @@ +package cli + +import ( + "bytes" + "encoding/json" + "path/filepath" + "strings" + "testing" +) + +// TestFlowCommandMermaid asserts `codeiq flow overview --format mermaid` +// produces a Mermaid graph starting with `graph LR`. +func TestFlowCommandMermaid(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "flow", "overview", "--format", "mermaid", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("flow: %v\n%s", err, out.String()) + } + if !strings.HasPrefix(out.String(), "graph LR\n") { + t.Fatalf("flow mermaid output must begin with `graph LR`, got:\n%s", out.String()) + } +} + +// TestFlowCommandJSON asserts the default JSON output is valid JSON with +// the canonical `title` + `view` keys. +func TestFlowCommandJSON(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "flow", "runtime", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("flow: %v\n%s", err, out.String()) + } + var got map[string]any + if err := json.Unmarshal(out.Bytes(), &got); err != nil { + t.Fatalf("flow JSON is invalid: %v\n%s", err, out.String()) + } + if got["view"] != "runtime" { + t.Errorf("view = %v, want runtime", got["view"]) + } +} + +// TestFlowCommandRejectsUnknownView asserts the CLI surfaces an unknown +// view as a usage error. +func TestFlowCommandRejectsUnknownView(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "flow", "bogus", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err == nil { + t.Fatalf("expected error for unknown view, got success:\n%s", out.String()) + } +} + +// TestFlowCommandAllFiveViews asserts every documented view succeeds +// against the fixture. +func TestFlowCommandAllFiveViews(t *testing.T) { + dir := statsFixtureDir(t) + for _, view := range []string{"overview", "ci", "deploy", "runtime", "auth"} { + t.Run(view, func(t *testing.T) { + root := NewRootCommand() + root.SetArgs([]string{ + "flow", view, + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("flow %s: %v\n%s", view, err, out.String()) + } + if out.Len() == 0 { + t.Fatalf("flow %s produced empty output", view) + } + }) + } +} diff --git a/go/internal/cli/graph_cmd.go b/go/internal/cli/graph_cmd.go new file mode 100644 index 00000000..31bbdd35 --- /dev/null +++ b/go/internal/cli/graph_cmd.go @@ -0,0 +1,206 @@ +package cli + +import ( + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + "sort" + "strings" + "time" + + "github.com/randomcodespace/codeiq/go/internal/flow" + "github.com/randomcodespace/codeiq/go/internal/graph" + "github.com/randomcodespace/codeiq/go/internal/model" + "github.com/spf13/cobra" + "gopkg.in/yaml.v3" +) + +func init() { + registerSubcommand(newGraphCommand) +} + +// newGraphCommand assembles `codeiq graph` — full graph export in JSON, +// YAML, Mermaid, or DOT. +func newGraphCommand() *cobra.Command { + var ( + graphDir string + format string + outPath string + queryTimeout time.Duration + ) + cmd := &cobra.Command{ + Use: "graph [path]", + Short: "Export the full graph in JSON, YAML, Mermaid, or DOT.", + Long: `Export every node and edge from the analyzed graph in a single +file. Useful for parity diffs, off-line analysis, and feeding the graph +into other tools. + +JSON / YAML emit a {nodes, edges, stats} object with the full hydrated +properties for every node and edge. Mermaid and DOT collapse the data +into a renderable diagram — large graphs (>500 nodes) are truncated to +keep the output legible; use JSON / YAML for the complete view.`, + Example: ` codeiq graph --format json > graph.json + codeiq graph --format mermaid | head -20 + codeiq graph --format dot --out graph.dot + codeiq graph --format yaml /repo`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + format = strings.ToLower(strings.TrimSpace(format)) + root, err := resolvePath(args) + if err != nil { + return err + } + gdir := graphDir + if gdir == "" { + gdir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + store, err := graph.OpenReadOnly(gdir, queryTimeout) + if err != nil { + return fmt.Errorf("open graph %s: %w", gdir, err) + } + defer store.Close() + + nodes, err := store.LoadAllNodes() + if err != nil { + return fmt.Errorf("load nodes: %w", err) + } + edges, err := store.LoadAllEdges() + if err != nil { + return fmt.Errorf("load edges: %w", err) + } + body, err := renderGraphExport(format, nodes, edges) + if err != nil { + return err + } + return writeGraphOutput(cmd.OutOrStdout(), body, outPath) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + cmd.Flags().StringVarP(&format, "format", "f", "json", + "Output format: json, yaml, mermaid, dot.") + cmd.Flags().StringVar(&outPath, "out", "", + "Write the exported graph to this file instead of stdout.") + cmd.Flags().DurationVar(&queryTimeout, "query-timeout", graph.DefaultQueryTimeout, + "Per-query wall-clock timeout (default: 30s).") + return cmd +} + +// renderGraphExport dispatches the format. JSON / YAML emit the full +// (nodes, edges) payload; Mermaid / DOT delegate to the flow renderer +// after projecting the graph into a flow.Diagram. +func renderGraphExport(format string, nodes []*model.CodeNode, edges []*model.CodeEdge) (string, error) { + switch format { + case "", "json": + return renderGraphJSON(nodes, edges) + case "yaml", "yml": + return renderGraphYAML(nodes, edges) + case "mermaid": + return flow.RenderMermaid(graphToDiagram(nodes, edges)), nil + case "dot": + return flow.RenderDOT(graphToDiagram(nodes, edges)), nil + default: + return "", fmt.Errorf("graph: unknown format %q (valid: json, yaml, mermaid, dot)", format) + } +} + +func renderGraphJSON(nodes []*model.CodeNode, edges []*model.CodeEdge) (string, error) { + body, err := json.MarshalIndent(graphExportPayload(nodes, edges), "", " ") + if err != nil { + return "", fmt.Errorf("graph: marshal json: %w", err) + } + return string(body), nil +} + +func renderGraphYAML(nodes []*model.CodeNode, edges []*model.CodeEdge) (string, error) { + body, err := yaml.Marshal(graphExportPayload(nodes, edges)) + if err != nil { + return "", fmt.Errorf("graph: marshal yaml: %w", err) + } + return string(body), nil +} + +// graphExportPayload assembles the canonical {nodes, edges, stats} +// envelope used by JSON and YAML exports. +func graphExportPayload(nodes []*model.CodeNode, edges []*model.CodeEdge) map[string]any { + return map[string]any{ + "nodes": nodes, + "edges": edges, + "stats": map[string]any{ + "node_count": len(nodes), + "edge_count": len(edges), + }, + } +} + +// graphToDiagram projects the raw graph into a flow.Diagram so the Mermaid +// / DOT renderers can render it. Nodes are emitted as loose nodes (no +// subgraph grouping) and edges as flow edges. To keep the rendered output +// legible, the projection truncates at 500 nodes — large graphs should be +// exported as JSON / YAML. +const graphExportMermaidLimit = 500 + +func graphToDiagram(nodes []*model.CodeNode, edges []*model.CodeEdge) *flow.Diagram { + d := flow.NewDiagram("Full Graph", "graph") + limit := len(nodes) + if limit > graphExportMermaidLimit { + limit = graphExportMermaidLimit + } + // Deterministic sort by ID. + sorted := append([]*model.CodeNode(nil), nodes...) + sort.Slice(sorted, func(i, j int) bool { return sorted[i].ID < sorted[j].ID }) + for i := 0; i < limit; i++ { + n := sorted[i] + d.LooseNodes = append(d.LooseNodes, flow.NewNode(n.ID, n.Label, flowKindFor(n.Kind))) + } + for _, e := range edges { + d.Edges = append(d.Edges, flow.NewLabelEdge(e.SourceID, e.TargetID, e.Kind.String())) + } + d.Stats = map[string]any{ + "node_count": len(nodes), + "edge_count": len(edges), + "rendered_nodes": limit, + "truncated": len(nodes) > limit, + } + return d +} + +// flowKindFor maps a NodeKind onto the kind label flow.renderer uses for +// bracket / shape lookup. Falls back to "code" for kinds without a custom +// shape. +func flowKindFor(k model.NodeKind) string { + switch k { + case model.NodeEndpoint, model.NodeWebSocketEndpoint: + return "endpoint" + case model.NodeEntity, model.NodeSQLEntity: + return "entity" + case model.NodeDatabaseConnection: + return "database" + case model.NodeGuard: + return "guard" + case model.NodeMiddleware: + return "middleware" + case model.NodeComponent: + return "component" + case model.NodeTopic, model.NodeQueue, model.NodeEvent, model.NodeMessageQueue: + return "messaging" + case model.NodeInfraResource, model.NodeAzureResource: + return "infra" + case model.NodeService: + return "service" + } + return "code" +} + +func writeGraphOutput(w io.Writer, content, outPath string) error { + if !strings.HasSuffix(content, "\n") { + content += "\n" + } + if outPath == "" { + _, err := io.WriteString(w, content) + return err + } + return os.WriteFile(outPath, []byte(content), 0o644) +} diff --git a/go/internal/cli/graph_cmd_test.go b/go/internal/cli/graph_cmd_test.go new file mode 100644 index 00000000..5680f319 --- /dev/null +++ b/go/internal/cli/graph_cmd_test.go @@ -0,0 +1,117 @@ +package cli + +import ( + "bytes" + "encoding/json" + "path/filepath" + "strings" + "testing" +) + +// TestGraphCommandJSON asserts the default JSON export has `nodes`, +// `edges`, and `stats` keys. +func TestGraphCommandJSON(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "graph", "--format", "json", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("graph: %v\n%s", err, out.String()) + } + var got map[string]any + if err := json.Unmarshal(out.Bytes(), &got); err != nil { + t.Fatalf("graph JSON invalid: %v\n%s", err, out.String()) + } + for _, k := range []string{"nodes", "edges", "stats"} { + if _, ok := got[k]; !ok { + t.Errorf("graph JSON missing %q", k) + } + } +} + +// TestGraphCommandYAML asserts the YAML export is parseable and contains +// the canonical top-level keys. +func TestGraphCommandYAML(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "graph", "-f", "yaml", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("graph yaml: %v\n%s", err, out.String()) + } + for _, k := range []string{"nodes:", "edges:", "stats:"} { + if !strings.Contains(out.String(), k) { + t.Errorf("graph yaml missing %q\n%s", k, out.String()) + } + } +} + +// TestGraphCommandMermaid asserts the mermaid export starts with `graph LR`. +func TestGraphCommandMermaid(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "graph", "-f", "mermaid", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("graph mermaid: %v\n%s", err, out.String()) + } + if !strings.HasPrefix(out.String(), "graph LR\n") { + t.Fatalf("graph mermaid must start with `graph LR`, got:\n%s", out.String()) + } +} + +// TestGraphCommandDOT asserts the dot export is well-formed. +func TestGraphCommandDOT(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "graph", "-f", "dot", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("graph dot: %v\n%s", err, out.String()) + } + if !strings.HasPrefix(out.String(), "digraph G {") { + t.Fatalf("graph dot must start with `digraph G {`, got:\n%s", out.String()) + } +} + +// TestGraphCommandUnknownFormat asserts an unknown format is surfaced as +// an error. +func TestGraphCommandUnknownFormat(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "graph", "-f", "bogus", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err == nil { + t.Fatalf("expected error for unknown format, got success:\n%s", out.String()) + } +} diff --git a/go/internal/cli/index.go b/go/internal/cli/index.go new file mode 100644 index 00000000..20840684 --- /dev/null +++ b/go/internal/cli/index.go @@ -0,0 +1,93 @@ +package cli + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/randomcodespace/codeiq/go/internal/analyzer" + "github.com/randomcodespace/codeiq/go/internal/cache" + "github.com/randomcodespace/codeiq/go/internal/detector" + + // Blank imports register all phase-1 detectors with detector.Default. + _ "github.com/randomcodespace/codeiq/go/internal/detector/generic" + _ "github.com/randomcodespace/codeiq/go/internal/detector/jvm/java" + _ "github.com/randomcodespace/codeiq/go/internal/detector/python" + + "github.com/spf13/cobra" +) + +func init() { + registerSubcommand(func() *cobra.Command { + var ( + batchSize int + workers int + ) + cmd := &cobra.Command{ + Use: "index [path]", + Short: "Scan a codebase into the analysis cache (write path).", + Long: `Scan the source tree at [path] and write detector results into +the SQLite analysis cache at /.codeiq/cache/codeiq.sqlite. The cache is +keyed by SHA-256 file content hash so subsequent runs reuse cached results +for unchanged files. After indexing, run "codeiq enrich" to load the cache +into the Kuzu graph store (phase 2). + +Phase 1 ships 5 detectors -- Spring REST controllers, JPA entities, Django +models, Flask routes, and a generic-imports detector. Languages covered: +Java and Python.`, + Example: ` codeiq index . + codeiq index /path/to/repo --batch-size 1000 --workers 8 + codeiq index . + # -> Files: 12 Nodes: 47 Edges: 23 Cache: ./.codeiq/cache/codeiq.sqlite`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + path := "." + if len(args) == 1 { + path = args[0] + } + abs, err := filepath.Abs(path) + if err != nil { + return err + } + if st, err := os.Stat(abs); err != nil || !st.IsDir() { + return newUsageError("path %q is not a directory", abs) + } + cacheDir := filepath.Join(abs, ".codeiq", "cache") + if err := os.MkdirAll(cacheDir, 0755); err != nil { + return fmt.Errorf("mkdir cache: %w", err) + } + dbPath := filepath.Join(cacheDir, "codeiq.sqlite") + c, err := cache.Open(dbPath) + if err != nil { + return err + } + defer c.Close() + + a := analyzer.NewAnalyzer(analyzer.Options{ + Cache: c, + Registry: detector.Default, + BatchSize: batchSize, + Workers: workers, + }) + stats, err := a.Run(abs) + if err != nil { + return err + } + fmt.Fprintf(cmd.OutOrStdout(), + "Files: %d Nodes: %d Edges: %d Cache: %s\n", + stats.Files, stats.Nodes, stats.Edges, dbPath) + if stats.DedupedNodes > 0 || stats.DedupedEdges > 0 || stats.DroppedEdges > 0 { + fmt.Fprintf(cmd.OutOrStdout(), + "Deduped: %d nodes, %d edges Dropped: %d phantom edges\n", + stats.DedupedNodes, stats.DedupedEdges, stats.DroppedEdges) + } + return nil + }, + } + cmd.Flags().IntVar(&batchSize, "batch-size", 500, + "Number of files processed per batch (default: 500).") + cmd.Flags().IntVarP(&workers, "workers", "w", 0, + "Worker goroutine count (default: 2 * GOMAXPROCS).") + return cmd + }) +} diff --git a/go/internal/cli/index_test.go b/go/internal/cli/index_test.go new file mode 100644 index 00000000..d02ee4a4 --- /dev/null +++ b/go/internal/cli/index_test.go @@ -0,0 +1,43 @@ +package cli + +import ( + "bytes" + "os" + "path/filepath" + "strings" + "testing" +) + +func TestIndexRejectsNonDirectory(t *testing.T) { + cmd := NewRootCommand() + cmd.SetArgs([]string{"index", "/this/path/does/not/exist"}) + var out, errBuf bytes.Buffer + cmd.SetOut(&out) + cmd.SetErr(&errBuf) + err := cmd.Execute() + if err == nil { + t.Fatal("expected error on missing path arg") + } +} + +func TestIndexSmokeRun(t *testing.T) { + dir := t.TempDir() + _ = os.WriteFile(filepath.Join(dir, "a.java"), []byte("public class A {}"), 0644) + + cmd := NewRootCommand() + cmd.SetArgs([]string{"index", dir}) + var out bytes.Buffer + cmd.SetOut(&out) + cmd.SetErr(&out) + if err := cmd.Execute(); err != nil { + t.Fatalf("index errored: %v\n%s", err, out.String()) + } + if !strings.Contains(out.String(), "Files:") { + t.Fatalf("expected stats summary in output:\n%s", out.String()) + } + // Cache file should exist under /.codeiq/cache/codeiq.sqlite. + wantFile := filepath.Join(dir, ".codeiq", "cache", "codeiq.sqlite") + if _, err := os.Stat(wantFile); err != nil { + t.Fatalf("cache file missing: %v", err) + } +} diff --git a/go/internal/cli/mcp.go b/go/internal/cli/mcp.go new file mode 100644 index 00000000..98cfc964 --- /dev/null +++ b/go/internal/cli/mcp.go @@ -0,0 +1,185 @@ +package cli + +import ( + "context" + "fmt" + "os" + "os/signal" + "path/filepath" + "syscall" + "time" + + "github.com/randomcodespace/codeiq/go/internal/buildinfo" + "github.com/randomcodespace/codeiq/go/internal/flow" + "github.com/randomcodespace/codeiq/go/internal/graph" + iqquery "github.com/randomcodespace/codeiq/go/internal/intelligence/query" + "github.com/randomcodespace/codeiq/go/internal/mcp" + "github.com/randomcodespace/codeiq/go/internal/model" + "github.com/randomcodespace/codeiq/go/internal/query" + mcpsdk "github.com/modelcontextprotocol/go-sdk/mcp" + "github.com/spf13/cobra" +) + +func init() { + registerSubcommand(newMCPCommand) +} + +// newMCPCommand assembles `codeiq mcp` — runs the stdio MCP server that +// Claude Code spawns. +// +// The server opens the Kuzu graph read-only, wires every registered +// tool family (RegisterGraph today; topology/flow/intelligence land in +// follow-on phases), and runs the JSON-RPC protocol loop over stdin/ +// stdout via the official Anthropic Go SDK. +// +// Stderr is the log channel — Claude Code surfaces stderr in its MCP +// server log panel. The CLI does not write to stdout outside of the +// JSON-RPC stream because doing so would corrupt the protocol. +func newMCPCommand() *cobra.Command { + var ( + graphDir string + maxResults int + maxDepth int + queryTimeout time.Duration + ) + cmd := &cobra.Command{ + Use: "mcp [path]", + Short: "Run the stdio MCP server (Claude Code spawns this).", + Long: `Run a JSON-RPC MCP server over stdin / stdout. Claude Code +launches this subcommand when the project's .mcp.json registers ` + "`codeiq`" + ` +as an MCP server. + +Prerequisites: ` + "`codeiq index`" + ` and ` + "`codeiq enrich`" + ` must have been run +against the target repository so the Kuzu graph at .codeiq/graph/ is +populated. The Kuzu store is opened read-only; mutation keywords in +` + "`run_cypher`" + ` are rejected at the gate. + +Stderr is the log channel — Claude Code surfaces stderr in its MCP server +log panel. Do not write anything to stdout outside of the JSON-RPC stream +or the protocol will break. + +To register with Claude Code, add to .mcp.json at the repo root: + + { + "mcpServers": { + "code-mcp": { + "command": "codeiq", + "args": ["mcp"] + } + } + }`, + Example: ` codeiq mcp # foreground stdio server + codeiq mcp 2> /tmp/codeiq-mcp.log # capture stderr + codeiq mcp --graph-dir /tmp/scratch.kuzu # alternate graph location`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + root, err := resolvePath(args) + if err != nil { + return err + } + gdir := graphDir + if gdir == "" { + gdir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + store, err := graph.OpenReadOnly(gdir, queryTimeout) + if err != nil { + return fmt.Errorf("open graph %s: %w", gdir, err) + } + defer store.Close() + + deps := &mcp.Deps{ + Store: store, + Query: query.NewService(store), + Stats: query.NewStatsServiceFromStore(func() ([]*model.CodeNode, []*model.CodeEdge, error) { + nodes, err := store.LoadAllNodes() + if err != nil { + return nil, nil, err + } + edges, err := store.LoadAllEdges() + if err != nil { + return nodes, nil, err + } + return nodes, edges, nil + }), + Topology: query.NewTopology(store), + Flow: flow.NewEngine(store), + QueryPlanner: iqquery.NewPlanner(iqquery.CapabilityMatrixFor), + // Evidence assembler + ArtifactMeta are wired by the + // intelligence/evidence loader once it lands the on-disk + // manifest format. Until then get_evidence_pack and + // get_artifact_metadata return the legacy `{"error": + // "...unavailable. Run 'enrich' first."}` envelope which + // matches the Java contract for the "no metadata yet" + // path. RegisterIntelligence registers the tools either + // way so tools/list is stable. + RootPath: root, + MaxResults: maxResults, + MaxDepth: maxDepth, + } + srv, err := mcp.NewServer(mcp.ServerOptions{ + Name: "CODE MCP", + Version: buildinfo.Version, + }) + if err != nil { + return err + } + if err := registerAllTools(srv, deps); err != nil { + return err + } + + ctx, cancel := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM) + defer cancel() + return srv.Serve(ctx, &mcpsdk.StdioTransport{}) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + cmd.Flags().IntVar(&maxResults, "max-results", 500, + "Cap on caller-supplied result counts in tools that page over rows.") + cmd.Flags().IntVar(&maxDepth, "max-depth", 10, + "Cap on caller-supplied traversal depths (ego graph / trace impact / blast radius).") + cmd.Flags().DurationVar(&queryTimeout, "query-timeout", graph.DefaultQueryTimeout, + "Per-Cypher-query wall-clock timeout (default: 30s).") + return cmd +} + +// registerAllTools wires every tool family onto srv. All four families +// land here unconditionally — graph (20) + topology (9) + flow (1) + +// intelligence (4) = 34 tools — matching the Java McpTools registration +// count. The `optionalRegisterHooks` slice remains for forward-compat +// with new tool families that may land in later phases (drill-down +// flows, query planner v2, etc.) without re-touching this function. +func registerAllTools(srv *mcp.Server, d *mcp.Deps) error { + if err := mcp.RegisterGraph(srv, d); err != nil { + return fmt.Errorf("register graph tools: %w", err) + } + if err := mcp.RegisterTopology(srv, d); err != nil { + return fmt.Errorf("register topology tools: %w", err) + } + if err := mcp.RegisterFlow(srv, d); err != nil { + return fmt.Errorf("register flow tools: %w", err) + } + if err := mcp.RegisterIntelligence(srv, d); err != nil { + return fmt.Errorf("register intelligence tools: %w", err) + } + // Plan §2 — consolidated tools alongside the deprecated 34. + if err := mcp.RegisterConsolidated(srv, d); err != nil { + return fmt.Errorf("register consolidated tools: %w", err) + } + for _, hook := range optionalRegisterHooks { + if hook == nil { + continue + } + if err := hook(srv, d); err != nil { + return err + } + } + return nil +} + +// optionalRegisterHooks is the registration hook list for tool families +// whose package may or may not be linked into the binary yet. Reserved +// for future tool-family extensions; the four core families +// (graph / topology / flow / intelligence) are wired unconditionally +// above. +var optionalRegisterHooks []func(*mcp.Server, *mcp.Deps) error diff --git a/go/internal/cli/mcp_test.go b/go/internal/cli/mcp_test.go new file mode 100644 index 00000000..332e76ef --- /dev/null +++ b/go/internal/cli/mcp_test.go @@ -0,0 +1,51 @@ +package cli + +import ( + "strings" + "testing" +) + +// TestMCPCommandIsRegistered asserts the `mcp` subcommand is wired into +// the root command and satisfies the docs contract. +func TestMCPCommandIsRegistered(t *testing.T) { + root := NewRootCommand() + var found bool + for _, c := range root.Commands() { + if c.Name() == "mcp" { + found = true + if c.Short == "" || c.Long == "" || c.Example == "" || c.RunE == nil { + t.Fatalf("mcp subcommand missing docs / RunE") + } + // Sanity: the long help mentions the read-only contract and + // the .mcp.json registration pattern. + if !strings.Contains(c.Long, "read-only") { + t.Errorf("mcp Long missing 'read-only' context: %s", c.Long) + } + if !strings.Contains(c.Long, ".mcp.json") { + t.Errorf("mcp Long missing .mcp.json registration example: %s", c.Long) + } + break + } + } + if !found { + t.Fatal("mcp subcommand not registered") + } +} + +// TestMCPCommandHasExpectedFlags asserts the canonical flags (graph-dir, +// max-results, max-depth, query-timeout) are wired onto `mcp`. +func TestMCPCommandHasExpectedFlags(t *testing.T) { + root := NewRootCommand() + for _, c := range root.Commands() { + if c.Name() != "mcp" { + continue + } + for _, name := range []string{"graph-dir", "max-results", "max-depth", "query-timeout"} { + if c.Flags().Lookup(name) == nil { + t.Errorf("mcp missing flag --%s", name) + } + } + return + } + t.Fatal("mcp subcommand not registered") +} diff --git a/go/internal/cli/plugins.go b/go/internal/cli/plugins.go new file mode 100644 index 00000000..306d0885 --- /dev/null +++ b/go/internal/cli/plugins.go @@ -0,0 +1,215 @@ +package cli + +import ( + "fmt" + "io" + "reflect" + "sort" + "strings" + "text/tabwriter" + + "github.com/randomcodespace/codeiq/go/internal/detector" + + // Blank imports register every phase-1/2 detector with detector.Default. + // Same set the `index` command pulls in — keep in sync. + _ "github.com/randomcodespace/codeiq/go/internal/detector/generic" + _ "github.com/randomcodespace/codeiq/go/internal/detector/jvm/java" + _ "github.com/randomcodespace/codeiq/go/internal/detector/python" + + "github.com/spf13/cobra" +) + +func init() { + registerSubcommand(newPluginsCommand) +} + +// newPluginsCommand assembles `codeiq plugins` — list / inspect registered +// detectors. +// +// Detectors are registered at compile time via the detector.RegisterDefault +// init() pattern (Go's compile-time registry — no classpath scan, no +// reflection at runtime). The list reflects whatever was linked into the +// binary; build tags / blank imports change the set. +func newPluginsCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "plugins ", + Short: "List and inspect available detectors.", + Long: `Inspect the static detector registry. Detectors are +auto-registered by the Go compile-time detector.Default registry — no +classpath scan, no runtime reflection. Use ` + "`plugins list`" + ` for an +overview and ` + "`plugins inspect `" + ` for per-detector metadata. + +Detectors are stateless ` + "`Detector`" + ` implementations registered via +` + "`detector.RegisterDefault`" + ` from their package's ` + "`init()`" + `. The list +in this binary reflects whatever was linked in — build tags / blank +imports change the set.`, + Example: ` codeiq plugins list + codeiq plugins list --language python + codeiq plugins inspect spring_rest`, + RunE: func(c *cobra.Command, _ []string) error { return c.Help() }, + } + cmd.AddCommand(newPluginsListCommand()) + cmd.AddCommand(newPluginsInspectCommand()) + return cmd +} + +func newPluginsListCommand() *cobra.Command { + var ( + lang string + asJSON bool + ) + cmd := &cobra.Command{ + Use: "list", + Short: "List every registered detector.", + Long: `Print one row per registered detector with columns: +NAME, CATEGORY (derived from the detector's package path), and LANGUAGES. + +Filter with ` + "`--language`" + ` to restrict to detectors that handle a given +language. Pass ` + "`--json`" + ` for a machine-parseable array.`, + Example: ` codeiq plugins list + codeiq plugins list --language python + codeiq plugins list --json | jq '.[] | .name'`, + RunE: func(cmd *cobra.Command, args []string) error { + dets := detector.Default.All() + if lang != "" { + dets = filterByLanguage(dets, lang) + } + rows := buildPluginRows(dets) + if asJSON { + return jsonOut(cmd.OutOrStdout(), rows) + } + return printPluginRows(cmd.OutOrStdout(), rows) + }, + } + cmd.Flags().StringVar(&lang, "language", "", + "Filter by supported language (e.g. java, python, typescript).") + cmd.Flags().BoolVar(&asJSON, "json", false, + "Emit detectors as a JSON array instead of a table.") + return cmd +} + +func newPluginsInspectCommand() *cobra.Command { + var asJSON bool + cmd := &cobra.Command{ + Use: "inspect ", + Short: "Print metadata for one detector.", + Long: `Print all registered metadata for the named detector: +category (derived from package path), supported languages, default +confidence level, and the underlying Go type. Use ` + "`plugins list`" + ` to +discover detector names.`, + Example: ` codeiq plugins inspect spring_rest + codeiq plugins inspect jpa_entity + codeiq plugins inspect django_model --json`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + name := args[0] + d := detector.Default.ByName(name) + if d == nil { + return fmt.Errorf("unknown detector %q (try `codeiq plugins list`)", name) + } + info := describeDetector(d) + if asJSON { + return jsonOut(cmd.OutOrStdout(), info) + } + return printPluginInspect(cmd.OutOrStdout(), info) + }, + } + cmd.Flags().BoolVar(&asJSON, "json", false, + "Emit detector metadata as a JSON object instead of a key:value list.") + return cmd +} + +// pluginRow is one row in `plugins list` output. +type pluginRow struct { + Name string `json:"name"` + Category string `json:"category"` + Languages []string `json:"languages"` + DefaultConfidence string `json:"default_confidence"` + GoType string `json:"go_type,omitempty"` +} + +// buildPluginRows converts a slice of Detectors into row structs sorted by name. +func buildPluginRows(dets []detector.Detector) []pluginRow { + rows := make([]pluginRow, 0, len(dets)) + for _, d := range dets { + rows = append(rows, describeDetector(d)) + } + sort.Slice(rows, func(i, j int) bool { return rows[i].Name < rows[j].Name }) + return rows +} + +// describeDetector packages the Detector metadata into a pluginRow. +// Category is derived from the Go package path of the underlying type — +// e.g. `.../detector/jvm/java` -> `jvm/java`. This avoids the need for a +// `Category()` method on every detector while still giving operators a +// useful grouping. +func describeDetector(d detector.Detector) pluginRow { + t := reflect.TypeOf(d) + if t.Kind() == reflect.Ptr { + t = t.Elem() + } + pkgPath := t.PkgPath() + return pluginRow{ + Name: d.Name(), + Category: categoryFromPkgPath(pkgPath), + Languages: sortedCopy(d.SupportedLanguages()), + DefaultConfidence: d.DefaultConfidence().String(), + GoType: pkgPath + "." + t.Name(), + } +} + +// categoryFromPkgPath turns a Go package path like +// `github.com/randomcodespace/codeiq/go/internal/detector/jvm/java` into +// `jvm/java`. Returns "unknown" if `detector/` is not in the path. +func categoryFromPkgPath(pkgPath string) string { + const marker = "/detector/" + idx := strings.Index(pkgPath, marker) + if idx < 0 { + return "unknown" + } + return pkgPath[idx+len(marker):] +} + +// filterByLanguage keeps only detectors that declare lang as a supported +// language. +func filterByLanguage(dets []detector.Detector, lang string) []detector.Detector { + out := make([]detector.Detector, 0, len(dets)) + for _, d := range dets { + for _, l := range d.SupportedLanguages() { + if l == lang { + out = append(out, d) + break + } + } + } + return out +} + +// printPluginRows renders rows as an aligned table. +func printPluginRows(w io.Writer, rows []pluginRow) error { + tw := tabwriter.NewWriter(w, 0, 0, 2, ' ', 0) + fmt.Fprintln(tw, "NAME\tCATEGORY\tLANGUAGES\tCONFIDENCE") + for _, r := range rows { + fmt.Fprintf(tw, "%s\t%s\t[%s]\t%s\n", + r.Name, r.Category, strings.Join(r.Languages, ","), r.DefaultConfidence) + } + return tw.Flush() +} + +// printPluginInspect renders a single row as a key/value block. +func printPluginInspect(w io.Writer, row pluginRow) error { + fmt.Fprintf(w, "name: %s\n", row.Name) + fmt.Fprintf(w, "category: %s\n", row.Category) + fmt.Fprintf(w, "languages: [%s]\n", strings.Join(row.Languages, ", ")) + fmt.Fprintf(w, "default_confidence: %s\n", row.DefaultConfidence) + fmt.Fprintf(w, "go_type: %s\n", row.GoType) + return nil +} + +// sortedCopy returns a defensive sorted copy of the slice. +func sortedCopy(xs []string) []string { + out := append([]string(nil), xs...) + sort.Strings(out) + return out +} + diff --git a/go/internal/cli/plugins_test.go b/go/internal/cli/plugins_test.go new file mode 100644 index 00000000..0ea8f2a4 --- /dev/null +++ b/go/internal/cli/plugins_test.go @@ -0,0 +1,136 @@ +package cli + +import ( + "bytes" + "encoding/json" + "strings" + "testing" +) + +// TestPluginsListTable asserts `codeiq plugins list` prints a table with +// at least one detector row. +func TestPluginsListTable(t *testing.T) { + root := NewRootCommand() + root.SetArgs([]string{"plugins", "list"}) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("plugins list: %v\n%s", err, out.String()) + } + if !strings.Contains(out.String(), "NAME") { + t.Fatalf("plugins list missing NAME column header:\n%s", out.String()) + } + if !strings.Contains(out.String(), "CATEGORY") { + t.Errorf("plugins list missing CATEGORY column header:\n%s", out.String()) + } + // Phase 1 ships spring_rest; check it's present. + if !strings.Contains(out.String(), "spring_rest") { + t.Errorf("plugins list missing spring_rest row:\n%s", out.String()) + } +} + +// TestPluginsListJSON asserts the --json flag produces a JSON array +// containing detector names and categories. +func TestPluginsListJSON(t *testing.T) { + root := NewRootCommand() + root.SetArgs([]string{"plugins", "list", "--json"}) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("plugins list --json: %v\n%s", err, out.String()) + } + var arr []map[string]any + if err := json.Unmarshal(out.Bytes(), &arr); err != nil { + t.Fatalf("plugins list --json invalid JSON: %v\n%s", err, out.String()) + } + if len(arr) == 0 { + t.Fatal("expected at least one detector in --json output") + } + for _, k := range []string{"name", "category", "languages", "default_confidence"} { + if _, ok := arr[0][k]; !ok { + t.Errorf("first detector missing %q: %v", k, arr[0]) + } + } +} + +// TestPluginsListLanguageFilter asserts --language restricts the list. +func TestPluginsListLanguageFilter(t *testing.T) { + root := NewRootCommand() + root.SetArgs([]string{"plugins", "list", "--language", "python", "--json"}) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("plugins list --language python: %v\n%s", err, out.String()) + } + var arr []map[string]any + if err := json.Unmarshal(out.Bytes(), &arr); err != nil { + t.Fatalf("invalid JSON: %v", err) + } + if len(arr) == 0 { + t.Fatal("expected at least one python detector") + } + for _, r := range arr { + langs, _ := r["languages"].([]any) + found := false + for _, l := range langs { + if l == "python" { + found = true + break + } + } + if !found { + t.Errorf("detector %v has no python in languages", r["name"]) + } + } +} + +// TestPluginsInspect asserts `codeiq plugins inspect ` prints the +// canonical key/value block. +func TestPluginsInspect(t *testing.T) { + root := NewRootCommand() + root.SetArgs([]string{"plugins", "inspect", "spring_rest"}) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("plugins inspect: %v\n%s", err, out.String()) + } + for _, k := range []string{"name:", "category:", "languages:", "default_confidence:", "go_type:"} { + if !strings.Contains(out.String(), k) { + t.Errorf("plugins inspect missing %q\n%s", k, out.String()) + } + } + if !strings.Contains(out.String(), "spring_rest") { + t.Errorf("plugins inspect did not name detector:\n%s", out.String()) + } +} + +// TestPluginsInspectUnknown asserts unknown detector surfaces an error. +func TestPluginsInspectUnknown(t *testing.T) { + root := NewRootCommand() + root.SetArgs([]string{"plugins", "inspect", "bogus_does_not_exist"}) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err == nil { + t.Fatalf("expected error for unknown detector, got:\n%s", out.String()) + } +} + +// TestCategoryFromPkgPath unit-tests the package-path -> category mapping. +func TestCategoryFromPkgPath(t *testing.T) { + cases := []struct{ pkgPath, want string }{ + {"github.com/randomcodespace/codeiq/go/internal/detector/jvm/java", "jvm/java"}, + {"github.com/randomcodespace/codeiq/go/internal/detector/python", "python"}, + {"github.com/randomcodespace/codeiq/go/internal/detector/generic", "generic"}, + {"github.com/example/other/package", "unknown"}, + } + for _, c := range cases { + if got := categoryFromPkgPath(c.pkgPath); got != c.want { + t.Errorf("categoryFromPkgPath(%q) = %q, want %q", c.pkgPath, got, c.want) + } + } +} diff --git a/go/internal/cli/query.go b/go/internal/cli/query.go new file mode 100644 index 00000000..8c10bb83 --- /dev/null +++ b/go/internal/cli/query.go @@ -0,0 +1,207 @@ +package cli + +import ( + "fmt" + "io" + "path/filepath" + + "github.com/randomcodespace/codeiq/go/internal/graph" + "github.com/randomcodespace/codeiq/go/internal/model" + "github.com/randomcodespace/codeiq/go/internal/query" + "github.com/spf13/cobra" +) + +func init() { + registerSubcommand(newQueryCommand) +} + +// newQueryCommand assembles the `query` parent and its five preset +// subcommands. Each child shares the same path-resolution / graph-open +// boilerplate via runQueryFinder so the per-subcommand bodies stay readable. +func newQueryCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "query ", + Short: "Run preset graph queries (consumers, producers, callers, dependencies, dependents).", + Long: `Preset query commands that issue targeted Cypher against the +enriched graph store. Each subcommand takes a node id and prints the +matching neighbour set; combine with ` + "`codeiq find`" + ` for higher-level +finders that return whole categories (endpoints, entities, ...). + +The output is tab-separated ` + "`id\\tkind\\tlabel`" + ` per row — easy to pipe +into ` + "`awk`" + ` / ` + "`cut`" + ` and stable across runs because the underlying Cypher +ORDER BYs the projected id column.`, + Example: ` codeiq query consumers svc:checkout + codeiq query callers method:com.foo.Bar#baz + codeiq query dependencies svc:fulfilment`, + RunE: func(c *cobra.Command, _ []string) error { return c.Help() }, + } + cmd.AddCommand(newQueryConsumers()) + cmd.AddCommand(newQueryProducers()) + cmd.AddCommand(newQueryCallers()) + cmd.AddCommand(newQueryDependencies()) + cmd.AddCommand(newQueryDependents()) + return cmd +} + +// finderFn matches the signature of every query.Service.FindXxx method — +// take a node id, return a node slice. +type finderFn func(svc *query.Service, id string) ([]*model.CodeNode, error) + +// runQueryFinder is the shared body for every preset query subcommand. It +// resolves the path, opens the graph, runs `fn` against the supplied node +// id, and prints tab-separated `id\tkind\tlabel` rows. +func runQueryFinder(w io.Writer, args []string, graphDir string, fn finderFn) error { + if len(args) < 1 { + return newUsageError("missing node-id argument") + } + id := args[0] + root, err := resolvePath(args[1:]) + if err != nil { + return err + } + gdir := graphDir + if gdir == "" { + gdir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + store, err := graph.Open(gdir) + if err != nil { + return fmt.Errorf("open graph %s: %w", gdir, err) + } + defer store.Close() + svc := query.NewService(store) + nodes, err := fn(svc, id) + if err != nil { + return err + } + for _, n := range nodes { + fmt.Fprintf(w, "%s\t%s\t%s\n", n.ID, n.Kind, n.Label) + } + return nil +} + +func newQueryConsumers() *cobra.Command { + var graphDir string + cmd := &cobra.Command{ + Use: "consumers [path]", + Short: "Show nodes that consume the given node.", + Long: `Return the set of nodes reachable to the given node via +consume-direction runtime edges (CONSUMES, LISTENS). Excludes structural +edges (CONTAINS, DEFINES, IMPORTS) and build-time DEPENDS_ON. + +The argument is a graph node id (e.g. ` + "`svc:checkout`" + ` or +` + "`endpoint:/api/users:GET`" + `); see ` + "`codeiq find`" + ` for finders that +return whole categories.`, + Example: ` codeiq query consumers svc:checkout + codeiq query consumers svc:checkout /repo + codeiq query consumers svc:checkout --graph-dir /tmp/scratch.kuzu`, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runQueryFinder(cmd.OutOrStdout(), args, graphDir, + func(s *query.Service, id string) ([]*model.CodeNode, error) { + return s.FindConsumers(id) + }) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + return cmd +} + +func newQueryProducers() *cobra.Command { + var graphDir string + cmd := &cobra.Command{ + Use: "producers [path]", + Short: "Show nodes that produce / publish to the given node.", + Long: `Return the set of nodes that produce or publish to the given +target, via PRODUCES and PUBLISHES edges. Typical use: locate every code +path writing to a topic / queue node, or every controller method that +emits a domain event.`, + Example: ` codeiq query producers topic:users.created + codeiq query producers topic:users.created /repo + codeiq query producers topic:users.created --graph-dir /tmp/scratch.kuzu`, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runQueryFinder(cmd.OutOrStdout(), args, graphDir, + func(s *query.Service, id string) ([]*model.CodeNode, error) { + return s.FindProducers(id) + }) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + return cmd +} + +func newQueryCallers() *cobra.Command { + var graphDir string + cmd := &cobra.Command{ + Use: "callers [path]", + Short: "Show methods that call the given method (CALLS-direction).", + Long: `Return the set of nodes that CALL the given target via CALLS +edges. Use this to trace the upstream invocation chain to a method or +endpoint. Pair with ` + "`codeiq query consumers`" + ` for the runtime-edge +counterpart (consume vs. invoke).`, + Example: ` codeiq query callers method:com.foo.Bar#baz + codeiq query callers method:com.foo.Bar#baz /repo + codeiq query callers method:com.foo.Bar#baz --graph-dir /tmp/scratch.kuzu`, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runQueryFinder(cmd.OutOrStdout(), args, graphDir, + func(s *query.Service, id string) ([]*model.CodeNode, error) { + return s.FindCallers(id) + }) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + return cmd +} + +func newQueryDependencies() *cobra.Command { + var graphDir string + cmd := &cobra.Command{ + Use: "dependencies [path]", + Short: "Show DEPENDS_ON children of the given node (outgoing).", + Long: `Return the set of nodes that the given source DEPENDS_ON via +build-time / declarative edges. Symmetric to ` + "`codeiq query dependents`" + ` — +where dependencies looks downstream, dependents looks upstream.`, + Example: ` codeiq query dependencies svc:fulfilment + codeiq query dependencies svc:fulfilment /repo + codeiq query dependencies svc:fulfilment --graph-dir /tmp/scratch.kuzu`, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runQueryFinder(cmd.OutOrStdout(), args, graphDir, + func(s *query.Service, id string) ([]*model.CodeNode, error) { + return s.FindDependencies(id) + }) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + return cmd +} + +func newQueryDependents() *cobra.Command { + var graphDir string + cmd := &cobra.Command{ + Use: "dependents [path]", + Short: "Show nodes that DEPEND_ON the given node (incoming).", + Long: `Return the set of nodes that DEPENDS_ON the given target via +build-time / declarative edges. Symmetric to +` + "`codeiq query dependencies`" + ` — handy for blast-radius style "what +breaks if I remove X" questions.`, + Example: ` codeiq query dependents svc:fulfilment + codeiq query dependents svc:fulfilment /repo + codeiq query dependents svc:fulfilment --graph-dir /tmp/scratch.kuzu`, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + return runQueryFinder(cmd.OutOrStdout(), args, graphDir, + func(s *query.Service, id string) ([]*model.CodeNode, error) { + return s.FindDependents(id) + }) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + return cmd +} diff --git a/go/internal/cli/query_test.go b/go/internal/cli/query_test.go new file mode 100644 index 00000000..f846c1f1 --- /dev/null +++ b/go/internal/cli/query_test.go @@ -0,0 +1,80 @@ +package cli + +import ( + "bytes" + "path/filepath" + "strings" + "testing" +) + +// TestQuerySubcommandsRegistered asserts every query subcommand is wired +// into the root command, has the docs the §7.1 contract demands, and its +// RunE handler errors out gracefully when handed an unknown node id (instead +// of panicking or printing the entire graph). +func TestQuerySubcommandsRegistered(t *testing.T) { + dir := statsFixtureDir(t) + subs := []string{"consumers", "producers", "callers", "dependencies", "dependents"} + for _, sub := range subs { + t.Run(sub, func(t *testing.T) { + root := NewRootCommand() + root.SetArgs([]string{ + "query", sub, "id-that-does-not-exist", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("query %s: %v\n%s", sub, err, out.String()) + } + // An unknown id yields an empty result, not an error — the body + // is just an empty string (no rows printed). Sanity-check that + // the command exited cleanly. + if strings.Contains(out.String(), "panic") { + t.Fatalf("query %s produced panic in stdout:\n%s", sub, out.String()) + } + }) + } +} + +// TestQueryParentHelp asserts that running `codeiq query` with no +// subcommand prints help rather than erroring. +func TestQueryParentHelp(t *testing.T) { + root := NewRootCommand() + root.SetArgs([]string{"query"}) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("query parent: %v", err) + } + if !strings.Contains(out.String(), "Available Commands") { + t.Fatalf("query parent did not print help:\n%s", out.String()) + } +} + +// TestQueryConsumersAgainstFixture asserts FindConsumers returns the right +// set when called against a real fixture. fixture-minimal has CONTAINS +// edges only (no CONSUMES) so the result is empty for any node — confirms +// the consumers query distinguishes structural edges from runtime ones. +func TestQueryConsumersAgainstFixture(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "query", "consumers", "service:" + filepath.Base(dir), + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("consumers: %v\n%s", err, out.String()) + } + // fixture-minimal has no CONSUMES edges, so consumers of the root + // SERVICE must be empty. + if strings.TrimSpace(out.String()) != "" { + t.Fatalf("expected empty consumers result for fixture-minimal, got:\n%s", out.String()) + } +} diff --git a/go/internal/cli/review.go b/go/internal/cli/review.go new file mode 100644 index 00000000..b7a0a8bd --- /dev/null +++ b/go/internal/cli/review.go @@ -0,0 +1,121 @@ +package cli + +import ( + "context" + "encoding/json" + "fmt" + "os" + "path/filepath" + "strings" + "time" + + "github.com/randomcodespace/codeiq/go/internal/graph" + "github.com/randomcodespace/codeiq/go/internal/review" + + "github.com/spf13/cobra" +) + +func init() { + registerSubcommand(func() *cobra.Command { + var ( + base string + head string + model string + outFile string + format string + focus []string + ) + cmd := &cobra.Command{ + Use: "review [path]", + Short: "LLM-driven review of a PR diff against the indexed graph.", + Long: `Run an LLM review of git diff base..head, using the codeiq graph +as evidence context. Defaults: base=HEAD~1, head=HEAD, model=gpt-oss:20b +via local Ollama (set OLLAMA_API_KEY for Ollama Cloud). + +Output formats: + --format=markdown (default) human-readable review + --format=json structured Report for piping into other tools + +Plan §3 — Phase 3 of the optimization plan.`, + Example: ` codeiq review --base origin/main --head HEAD + OLLAMA_API_KEY=... codeiq review --model gpt-oss:120b + codeiq review --base v1.0 --head v1.1 --out review.md`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + path := "." + if len(args) == 1 { + path = args[0] + } + abs, err := filepath.Abs(path) + if err != nil { + return err + } + cfg := review.DefaultConfig() + if model != "" { + cfg.Model = model + } + client := review.NewClient(cfg) + + // Best-effort: open the enriched Kuzu store read-only so the + // review prompt carries graph evidence per changed file. If + // the store isn't present (no enrich yet) we fall back to + // diff-only review with a stderr warning. + var gctx review.GraphContext + gdir := filepath.Join(abs, ".codeiq", "graph", "codeiq.kuzu") + if store, err := graph.OpenReadOnly(gdir, 30*time.Second); err == nil { + defer store.Close() + gctx = review.NewKuzuGraphContext(store) + } else { + fmt.Fprintf(os.Stderr, "review: graph store not available (%v); falling back to diff-only review. Run 'codeiq enrich' first to include graph evidence.\n", err) + } + svc := review.NewService(client, gctx) + + ctx, cancel := context.WithTimeout(cmd.Context(), cfg.Timeout+30*time.Second) + defer cancel() + rep, err := svc.Review(ctx, abs, base, head, focus) + if err != nil { + return fmt.Errorf("review: %w", err) + } + + var rendered string + if format == "json" { + b, _ := json.MarshalIndent(rep, "", " ") + rendered = string(b) + "\n" + } else { + rendered = renderMarkdown(rep) + } + if outFile == "" { + fmt.Fprint(cmd.OutOrStdout(), rendered) + return nil + } + return os.WriteFile(outFile, []byte(rendered), 0644) + }, + } + cmd.Flags().StringVar(&base, "base", "", "Base git ref (default: HEAD~1)") + cmd.Flags().StringVar(&head, "head", "", "Head git ref (default: HEAD)") + cmd.Flags().StringVar(&model, "model", "", "Override LLM model (default: from config)") + cmd.Flags().StringVarP(&outFile, "out", "o", "", "Write output to file instead of stdout") + cmd.Flags().StringVar(&format, "format", "markdown", "Output format: markdown | json") + cmd.Flags().StringSliceVar(&focus, "focus", nil, "Limit review to these file paths") + return cmd + }) +} + +func renderMarkdown(rep *review.Report) string { + var b strings.Builder + fmt.Fprintf(&b, "# Code Review (model: %s)\n\n", rep.Model) + fmt.Fprintf(&b, "## Summary\n\n%s\n\n", rep.Summary) + if len(rep.Findings) == 0 { + b.WriteString("## Findings\n\nNo findings.\n") + return b.String() + } + b.WriteString("## Findings\n\n") + for _, f := range rep.Findings { + loc := f.File + if f.Line > 0 { + loc = fmt.Sprintf("%s:%d", f.File, f.Line) + } + fmt.Fprintf(&b, "- **[%s] %s** — %s\n", strings.ToUpper(f.Severity), loc, f.Comment) + } + return b.String() +} diff --git a/go/internal/cli/root.go b/go/internal/cli/root.go new file mode 100644 index 00000000..de39aa90 --- /dev/null +++ b/go/internal/cli/root.go @@ -0,0 +1,112 @@ +// Package cli wires Cobra commands. The exported NewRootCommand() builder is +// testable from package _test files; Execute() is the main-entry shim. +package cli + +import ( + "fmt" + "os" + + "github.com/spf13/cobra" +) + +// Global flag state, populated by Cobra at parse time. +var ( + flagConfig string + flagNoColor bool + flagJSON bool + flagVerbose int + flagShowVer bool // --version on root +) + +// NewRootCommand builds the codeiq root command and all subcommands. Each +// subcommand registers itself via init() in this package. +func NewRootCommand() *cobra.Command { + cmd := &cobra.Command{ + Use: "codeiq", + Short: "Deterministic code knowledge graph (CLI + stdio MCP).", + Long: `codeiq -- deterministic code knowledge graph (CLI + stdio MCP) + +codeiq scans a codebase, builds a deterministic knowledge graph from the +detected nodes and edges, and exposes it to humans via a CLI and to LLM +agents via a stdio MCP server. No AI, no external APIs -- pure static +analysis. + +Typical workflow: + codeiq index . # scan files, populate SQLite cache + codeiq enrich . # load cache into Kuzu graph store (phase 2) + codeiq mcp # run stdio MCP server (phase 3) +`, + Example: ` codeiq index . # Scan the current directory. + codeiq enrich . # Build the graph from the cache. + codeiq mcp # Run the MCP server (stdio). + codeiq stats --json # Stats as JSON.`, + RunE: func(cmd *cobra.Command, args []string) error { + if flagShowVer { + return printVersion(cmd.OutOrStdout(), flagJSON) + } + // No args + no --version => print help. + return cmd.Help() + }, + SilenceUsage: true, + SuggestionsMinimumDistance: 1, + } + pf := cmd.PersistentFlags() + pf.StringVar(&flagConfig, "config", "", "Path to codeiq.yml (default: ./codeiq.yml then ~/.codeiq/config.yml).") + pf.BoolVar(&flagNoColor, "no-color", false, "Disable ANSI color in output.") + pf.BoolVar(&flagJSON, "json", false, "Emit JSON output where applicable.") + pf.CountVarP(&flagVerbose, "verbose", "v", "Verbose logging (repeatable: -v / -vv / -vvv).") + + // --version on root, equivalent to `codeiq version`. + cmd.Flags().BoolVar(&flagShowVer, "version", false, "Show version and exit (alias of `codeiq version`).") + + // Register subcommands. + for _, sub := range subcommands() { + cmd.AddCommand(sub) + } + return cmd +} + +// Execute is the main entry point — runs the root command and returns the +// exit code (0 success, 1 usage error, 2 runtime error). +func Execute() int { + cmd := NewRootCommand() + if err := cmd.Execute(); err != nil { + // Cobra already printed the error; choose exit code based on type. + // usageError == 1, runtime/other == 2. + if _, ok := err.(*usageError); ok { + return 1 + } + fmt.Fprintln(os.Stderr, "Error:", err) + return 2 + } + return 0 +} + +// usageError marks errors that are user-input problems (missing arg, unknown +// flag). RunE returns this so exit code is 1, not 2. +type usageError struct{ msg string } + +func (u *usageError) Error() string { return u.msg } + +// newUsageError is the typed constructor. +func newUsageError(format string, args ...any) error { + return &usageError{msg: fmt.Sprintf(format, args...)} +} + +// subcommandRegistry is mutated by subcommand init() funcs. Order doesn't +// matter — Cobra sorts by Name() in help output. +var subcommandRegistry []func() *cobra.Command + +func subcommands() []*cobra.Command { + out := make([]*cobra.Command, 0, len(subcommandRegistry)) + for _, fn := range subcommandRegistry { + out = append(out, fn()) + } + return out +} + +// registerSubcommand appends a subcommand builder. Each subcommand file calls +// this from init(). +func registerSubcommand(fn func() *cobra.Command) { + subcommandRegistry = append(subcommandRegistry, fn) +} diff --git a/go/internal/cli/stats.go b/go/internal/cli/stats.go new file mode 100644 index 00000000..99da3bee --- /dev/null +++ b/go/internal/cli/stats.go @@ -0,0 +1,93 @@ +package cli + +import ( + "fmt" + "path/filepath" + + "github.com/randomcodespace/codeiq/go/internal/graph" + "github.com/randomcodespace/codeiq/go/internal/model" + "github.com/randomcodespace/codeiq/go/internal/query" + "github.com/spf13/cobra" +) + +func init() { + registerSubcommand(func() *cobra.Command { + var ( + graphDir string + asJSON bool + category string + ) + cmd := &cobra.Command{ + Use: "stats [path]", + Short: "Show categorized statistics from the analyzed graph.", + Long: `Show counts and breakdowns from a graph previously built by ` + "`enrich`" + `. + +Seven categories are surfaced: graph (node/edge/file totals), languages, +frameworks, infra (databases, messaging, cloud), connections (REST by +method, gRPC, websocket, producer/consumer edge counts), auth, and +architecture (classes / interfaces / methods / modules). Use ` + "`--category`" + + ` to focus on a single section and ` + "`--json`" + ` to pipe into other tools. + +The default rendering is JSON because the output already carries +deterministic key order via OrderedMap; the ` + "`--json`" + ` flag is therefore +a no-op today but kept for forward compatibility with a future tabular +rendering.`, + Example: ` # Tabular summary + codeiq stats . + + # Just the infrastructure category as JSON + codeiq stats . --category infra --json + + # Pipe into jq + codeiq stats . --json | jq '.languages'`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + root, err := resolvePath(args) + if err != nil { + return err + } + gdir := graphDir + if gdir == "" { + gdir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + store, err := graph.Open(gdir) + if err != nil { + return fmt.Errorf("open graph %s: %w", gdir, err) + } + defer store.Close() + + svc := query.NewStatsServiceFromStore( + func() ([]*model.CodeNode, []*model.CodeEdge, error) { + ns, e := store.LoadAllNodes() + if e != nil { + return nil, nil, e + } + es, e := store.LoadAllEdges() + if e != nil { + return nil, nil, e + } + return ns, es, nil + }, + ) + var out any + if category != "" { + out = svc.ComputeCategory(category) + } else { + out = svc.ComputeStats() + } + if err := svc.LoadErr(); err != nil { + return fmt.Errorf("load graph: %w", err) + } + _ = asJSON // both modes use JSON for now + return printOrdered(cmd.OutOrStdout(), out) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + cmd.Flags().BoolVar(&asJSON, "json", false, + "Emit JSON output (currently always JSON; reserved for a future tabular renderer).") + cmd.Flags().StringVar(&category, "category", "", + "Show only one category (graph|languages|frameworks|infra|connections|auth|architecture).") + return cmd + }) +} diff --git a/go/internal/cli/stats_test.go b/go/internal/cli/stats_test.go new file mode 100644 index 00000000..a5e45e22 --- /dev/null +++ b/go/internal/cli/stats_test.go @@ -0,0 +1,130 @@ +package cli + +import ( + "bytes" + "encoding/json" + "os" + "path/filepath" + "strings" + "testing" + + "github.com/randomcodespace/codeiq/go/internal/analyzer" + "github.com/randomcodespace/codeiq/go/internal/cache" +) + +// statsFixtureDir copies the fixture-minimal corpus into a fresh temp dir, +// runs index + enrich, and returns the absolute path. The returned graph is +// the same shape exercised by every stats subtest — keeps test setup linear. +func statsFixtureDir(t *testing.T) string { + t.Helper() + dir := t.TempDir() + src := filepath.Join("..", "..", "testdata", "fixture-minimal") + entries, err := os.ReadDir(src) + if err != nil { + t.Fatalf("read fixture: %v", err) + } + for _, ent := range entries { + if ent.IsDir() { + continue + } + data, err := os.ReadFile(filepath.Join(src, ent.Name())) + if err != nil { + t.Fatalf("read %s: %v", ent.Name(), err) + } + if err := os.WriteFile(filepath.Join(dir, ent.Name()), data, 0o644); err != nil { + t.Fatalf("write %s: %v", ent.Name(), err) + } + } + c, err := cache.Open(filepath.Join(dir, "cache.sqlite")) + if err != nil { + t.Fatalf("cache open: %v", err) + } + defer c.Close() + a := analyzer.NewAnalyzer(analyzer.Options{Cache: c}) + if _, err := a.Run(dir); err != nil { + t.Fatalf("index: %v", err) + } + if _, err := analyzer.Enrich(dir, c, analyzer.EnrichOptions{ + GraphDir: filepath.Join(dir, "graph.kuzu"), + }); err != nil { + t.Fatalf("enrich: %v", err) + } + return dir +} + +// TestStatsCommandJSON asserts the stats command emits a JSON object with +// the seven canonical categories when --json is set. +func TestStatsCommandJSON(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "stats", "--json", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("stats: %v\n%s", err, out.String()) + } + var got map[string]any + if err := json.Unmarshal(out.Bytes(), &got); err != nil { + t.Fatalf("stats output is not valid JSON: %v\n%s", err, out.String()) + } + for _, k := range []string{ + "graph", "languages", "frameworks", "infra", + "connections", "auth", "architecture", + } { + if _, ok := got[k]; !ok { + t.Errorf("stats JSON missing category %q\nfull output:\n%s", k, out.String()) + } + } +} + +// TestStatsCommandCategory asserts --category restricts the output to a +// single category and that the JSON is non-empty for `graph`. +func TestStatsCommandCategory(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "stats", "--json", "--category", "graph", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("stats: %v\n%s", err, out.String()) + } + var got map[string]any + if err := json.Unmarshal(out.Bytes(), &got); err != nil { + t.Fatalf("stats output is not valid JSON: %v\n%s", err, out.String()) + } + if _, ok := got["nodes"]; !ok { + t.Errorf("category=graph response missing `nodes` key:\n%s", out.String()) + } +} + +// TestStatsCommandDefaultRendering asserts the default (non-JSON) rendering +// emits at least the "nodes" key — we use JSON for human view too because +// it's deterministic and trivial to grep. +func TestStatsCommandDefaultRendering(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "stats", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("stats: %v\n%s", err, out.String()) + } + if !strings.Contains(out.String(), "nodes") { + t.Fatalf("stats default render missing nodes counter:\n%s", out.String()) + } +} diff --git a/go/internal/cli/topology.go b/go/internal/cli/topology.go new file mode 100644 index 00000000..dedb3e03 --- /dev/null +++ b/go/internal/cli/topology.go @@ -0,0 +1,325 @@ +package cli + +import ( + "fmt" + "path/filepath" + + "github.com/randomcodespace/codeiq/go/internal/graph" + "github.com/randomcodespace/codeiq/go/internal/query" + "github.com/spf13/cobra" +) + +func init() { + registerSubcommand(newTopologyCommand) +} + +// newTopologyCommand assembles the `topology` parent and its sub-views. +// The bare parent renders the full service map; sub-views surface specific +// analyses (service-detail / blast-radius / bottlenecks / circular / dead). +func newTopologyCommand() *cobra.Command { + var graphDir string + cmd := &cobra.Command{ + Use: "topology [path]", + Short: "Show the service topology map (services + cross-service connections).", + Long: `Render the service topology: every SERVICE node ServiceDetector +synthesised plus every cross-service runtime edge (CALLS / PRODUCES / +CONSUMES / QUERIES / CONNECTS_TO / PUBLISHES / LISTENS / SENDS_TO / +RECEIVES_FROM / INVOKES_RMI / EXPORTS_RMI). The output carries +` + "`services`" + `, ` + "`connections`" + `, and ` + "`service_count`" + ` / ` + "`connection_count`" + + ` aggregates. + +Subcommands narrow the view: + service-detail endpoints / entities / guards / databases / + queues for one service. + blast-radius nodes reachable from the given node. + bottlenecks services ordered by total connection count. + circular cross-service dependency cycles. + dead services with no incoming runtime edges.`, + Example: ` # Bare topology map + codeiq topology . + + # Detail for one service + codeiq topology service-detail checkout-svc + + # Blast radius for a node + codeiq topology blast-radius svc:checkout-svc --depth 3`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + root, err := resolvePath(args) + if err != nil { + return err + } + gdir := graphDir + if gdir == "" { + gdir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + store, err := graph.Open(gdir) + if err != nil { + return fmt.Errorf("open graph %s: %w", gdir, err) + } + defer store.Close() + t := query.NewTopology(store) + out, err := t.GetTopology() + if err != nil { + return err + } + return printOrdered(cmd.OutOrStdout(), out) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + cmd.AddCommand(newTopologyServiceDetail()) + cmd.AddCommand(newTopologyBlastRadius()) + cmd.AddCommand(newTopologyBottlenecks()) + cmd.AddCommand(newTopologyCircular()) + cmd.AddCommand(newTopologyDead()) + cmd.AddCommand(newTopologyPath()) + return cmd +} + +func newTopologyServiceDetail() *cobra.Command { + var graphDir string + cmd := &cobra.Command{ + Use: "service-detail [path]", + Short: "Show endpoints / entities / guards / databases / queues for one service.", + Long: `Render the detail object for the named SERVICE — endpoints, +entities, guards, databases, and queues that ServiceDetector pivoted under +this service via CONTAINS edges. Use ` + "`codeiq find services`" + ` to list +candidate names.`, + Example: ` codeiq topology service-detail checkout-svc + codeiq topology service-detail web-ui /repo + codeiq topology service-detail notifier --graph-dir /tmp/scratch.kuzu`, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + name := args[0] + root, err := resolvePath(args[1:]) + if err != nil { + return err + } + gdir := graphDir + if gdir == "" { + gdir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + store, err := graph.Open(gdir) + if err != nil { + return fmt.Errorf("open graph %s: %w", gdir, err) + } + defer store.Close() + t := query.NewTopology(store) + out, err := t.ServiceDetail(name) + if err != nil { + return err + } + return printOrdered(cmd.OutOrStdout(), out) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + return cmd +} + +func newTopologyBlastRadius() *cobra.Command { + var ( + graphDir string + depth int + ) + cmd := &cobra.Command{ + Use: "blast-radius [path]", + Short: "Show nodes reachable from the given node, up to --depth hops.", + Long: `Render the blast-radius object for the given node — the set of +reachable nodes (via any runtime edge) and the services those nodes belong +to. Default depth is 5 hops; cap with ` + "`--depth`" + ` for tighter scopes.`, + Example: ` codeiq topology blast-radius svc:checkout-svc + codeiq topology blast-radius svc:checkout-svc --depth 3 + codeiq topology blast-radius method:com.foo.Bar#baz --depth 2`, + Args: cobra.MinimumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + id := args[0] + root, err := resolvePath(args[1:]) + if err != nil { + return err + } + gdir := graphDir + if gdir == "" { + gdir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + store, err := graph.Open(gdir) + if err != nil { + return fmt.Errorf("open graph %s: %w", gdir, err) + } + defer store.Close() + t := query.NewTopology(store) + out, err := t.BlastRadius(id, depth) + if err != nil { + return err + } + return printOrdered(cmd.OutOrStdout(), out) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + cmd.Flags().IntVar(&depth, "depth", 5, + "Maximum traversal depth in hops (default: 5).") + return cmd +} + +func newTopologyBottlenecks() *cobra.Command { + var graphDir string + cmd := &cobra.Command{ + Use: "bottlenecks [path]", + Short: "List services ordered by total connection count (in + out).", + Long: `Render services ranked by combined connection degree. +Services with zero connections are omitted. Sort order: total desc, then +service name asc — deterministic for diffing.`, + Example: ` codeiq topology bottlenecks + codeiq topology bottlenecks /repo + codeiq topology bottlenecks --graph-dir /tmp/scratch.kuzu`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + root, err := resolvePath(args) + if err != nil { + return err + } + gdir := graphDir + if gdir == "" { + gdir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + store, err := graph.Open(gdir) + if err != nil { + return fmt.Errorf("open graph %s: %w", gdir, err) + } + defer store.Close() + t := query.NewTopology(store) + out, err := t.FindBottlenecks() + if err != nil { + return err + } + return printOrdered(cmd.OutOrStdout(), out) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + return cmd +} + +func newTopologyCircular() *cobra.Command { + var graphDir string + cmd := &cobra.Command{ + Use: "circular [path]", + Short: "Show cross-service dependency cycles.", + Long: `Render the list of cross-service cycles — each entry is a +service-name slice with the same first and last element (closed loop). +Cycles are normalised so the smallest service name is at index 0 for +stable comparison across runs.`, + Example: ` codeiq topology circular + codeiq topology circular /repo + codeiq topology circular --graph-dir /tmp/scratch.kuzu`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + root, err := resolvePath(args) + if err != nil { + return err + } + gdir := graphDir + if gdir == "" { + gdir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + store, err := graph.Open(gdir) + if err != nil { + return fmt.Errorf("open graph %s: %w", gdir, err) + } + defer store.Close() + t := query.NewTopology(store) + out, err := t.FindCircular() + if err != nil { + return err + } + return printOrdered(cmd.OutOrStdout(), out) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + return cmd +} + +func newTopologyDead() *cobra.Command { + var graphDir string + cmd := &cobra.Command{ + Use: "dead [path]", + Short: "List services with no incoming runtime edges.", + Long: `Render services that have no incoming cross-service runtime +edge. Useful for spotting services nobody consumes (potential dead code, +or services with only outbound publishes). Excludes structural CONTAINS +edges by design.`, + Example: ` codeiq topology dead + codeiq topology dead /repo + codeiq topology dead --graph-dir /tmp/scratch.kuzu`, + Args: cobra.MaximumNArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + root, err := resolvePath(args) + if err != nil { + return err + } + gdir := graphDir + if gdir == "" { + gdir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + store, err := graph.Open(gdir) + if err != nil { + return fmt.Errorf("open graph %s: %w", gdir, err) + } + defer store.Close() + t := query.NewTopology(store) + out, err := t.FindDeadServices() + if err != nil { + return err + } + return printOrdered(cmd.OutOrStdout(), out) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + return cmd +} + +func newTopologyPath() *cobra.Command { + var graphDir string + cmd := &cobra.Command{ + Use: "path [path]", + Short: "Find the shortest cross-service path between two services.", + Long: `Render the list of hops between two services via BFS over the +cross-service runtime adjacency. Each hop is ` + "`{from, to, type}`" + `; the +` + "`type`" + ` is the lowercased edge kind that linked the two hops in the +underlying graph.`, + Example: ` codeiq topology path checkout-svc payments-svc + codeiq topology path web-ui notifier /repo + codeiq topology path checkout-svc fulfilment --graph-dir /tmp/scratch.kuzu`, + Args: cobra.MinimumNArgs(2), + RunE: func(cmd *cobra.Command, args []string) error { + source := args[0] + target := args[1] + root, err := resolvePath(args[2:]) + if err != nil { + return err + } + gdir := graphDir + if gdir == "" { + gdir = filepath.Join(root, ".codeiq", "graph", "codeiq.kuzu") + } + store, err := graph.Open(gdir) + if err != nil { + return fmt.Errorf("open graph %s: %w", gdir, err) + } + defer store.Close() + t := query.NewTopology(store) + out, err := t.FindPath(source, target) + if err != nil { + return err + } + return printOrdered(cmd.OutOrStdout(), out) + }, + } + cmd.Flags().StringVar(&graphDir, "graph-dir", "", + "Path to the Kuzu graph store (default: /.codeiq/graph/codeiq.kuzu).") + return cmd +} diff --git a/go/internal/cli/topology_test.go b/go/internal/cli/topology_test.go new file mode 100644 index 00000000..c3169fd5 --- /dev/null +++ b/go/internal/cli/topology_test.go @@ -0,0 +1,157 @@ +package cli + +import ( + "bytes" + "encoding/json" + "path/filepath" + "strings" + "testing" +) + +// TestTopologyBareReturnsJSON asserts that running `codeiq topology` against +// fixture-minimal produces a JSON object with services / connections. +func TestTopologyBareReturnsJSON(t *testing.T) { + dir := statsFixtureDir(t) + root := NewRootCommand() + root.SetArgs([]string{ + "topology", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("topology: %v\n%s", err, out.String()) + } + var got map[string]any + if err := json.Unmarshal(out.Bytes(), &got); err != nil { + t.Fatalf("topology output is not valid JSON: %v\n%s", err, out.String()) + } + for _, k := range []string{"services", "connections", "service_count", "connection_count"} { + if _, ok := got[k]; !ok { + t.Errorf("topology JSON missing %q\n%s", k, out.String()) + } + } +} + +// TestTopologyServiceDetail asserts that `topology service-detail ` +// returns a detail object for the named service. fixture-minimal produces +// one SERVICE node named after the temp dir; we resolve the name from the +// bare topology call. +func TestTopologyServiceDetail(t *testing.T) { + dir := statsFixtureDir(t) + // Fetch the service name from the bare topology call. + bare := NewRootCommand() + bare.SetArgs([]string{ + "topology", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var bareOut bytes.Buffer + bare.SetOut(&bareOut) + bare.SetErr(&bareOut) + if err := bare.Execute(); err != nil { + t.Fatalf("topology bare: %v\n%s", err, bareOut.String()) + } + var got struct { + Services []map[string]any `json:"services"` + } + if err := json.Unmarshal(bareOut.Bytes(), &got); err != nil { + t.Fatalf("decode bare: %v\n%s", err, bareOut.String()) + } + if len(got.Services) == 0 { + t.Fatalf("no services in topology:\n%s", bareOut.String()) + } + svcName, _ := got.Services[0]["name"].(string) + if svcName == "" { + t.Fatalf("service name missing from %v", got.Services[0]) + } + + root := NewRootCommand() + root.SetArgs([]string{ + "topology", "service-detail", svcName, + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("service-detail: %v\n%s", err, out.String()) + } + var detail map[string]any + if err := json.Unmarshal(out.Bytes(), &detail); err != nil { + t.Fatalf("decode service-detail: %v\n%s", err, out.String()) + } + if detail["name"] != svcName { + t.Fatalf("service-detail name=%v, want %s", detail["name"], svcName) + } +} + +// TestTopologyBlastRadius asserts that `topology blast-radius ` returns +// reachable nodes. Use a SERVICE id from the fixture; the SERVICE has +// CONTAINS edges to every node so depth=2 should reach plenty. +func TestTopologyBlastRadius(t *testing.T) { + dir := statsFixtureDir(t) + // Look up a service id via `find services`. + finder := NewRootCommand() + finder.SetArgs([]string{ + "find", "services", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var finderOut bytes.Buffer + finder.SetOut(&finderOut) + finder.SetErr(&finderOut) + if err := finder.Execute(); err != nil { + t.Fatalf("find services: %v\n%s", err, finderOut.String()) + } + line := strings.SplitN(strings.TrimSpace(finderOut.String()), "\n", 2)[0] + id := strings.SplitN(line, "\t", 2)[0] + if id == "" { + t.Fatalf("no service id in find output: %q", finderOut.String()) + } + + root := NewRootCommand() + root.SetArgs([]string{ + "topology", "blast-radius", id, + "--depth", "2", + "--graph-dir", filepath.Join(dir, "graph.kuzu"), + dir, + }) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("blast-radius: %v\n%s", err, out.String()) + } + var br map[string]any + if err := json.Unmarshal(out.Bytes(), &br); err != nil { + t.Fatalf("decode blast-radius: %v\n%s", err, out.String()) + } + if br["source"] != id { + t.Fatalf("blast-radius source=%v, want %s", br["source"], id) + } + if br["affected_node_count"] == nil { + t.Fatalf("blast-radius missing affected_node_count:\n%s", out.String()) + } +} + +// TestTopologyParentHelp asserts the bare topology renders without help +// fallback when service map JSON is the expected output. With no +// subcommand and no --help flag, the parent prints the bare topology +// (the parent IS the bare command, not a help router). +func TestTopologyParentHelp(t *testing.T) { + root := NewRootCommand() + root.SetArgs([]string{"topology", "--help"}) + var out bytes.Buffer + root.SetOut(&out) + root.SetErr(&out) + if err := root.Execute(); err != nil { + t.Fatalf("topology --help: %v", err) + } + if !strings.Contains(out.String(), "service-detail") { + t.Fatalf("topology --help missing service-detail subcommand:\n%s", out.String()) + } +} diff --git a/go/internal/cli/util.go b/go/internal/cli/util.go new file mode 100644 index 00000000..3d7f7cb3 --- /dev/null +++ b/go/internal/cli/util.go @@ -0,0 +1,52 @@ +package cli + +import ( + "encoding/json" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/randomcodespace/codeiq/go/internal/query" +) + +// resolvePath turns the optional [path] positional that most subcommands +// accept into an absolute, directory-validated path. An empty args slice is +// the current working directory. A non-empty args slice uses args[0]. +// +// Returns a usageError when the resolved path does not exist or is not a +// directory — that path-type problem is a user-input issue (exit code 1) per +// root.go's exit-code mapping. +func resolvePath(args []string) (string, error) { + path := "." + if len(args) >= 1 && args[0] != "" { + path = args[0] + } + abs, err := filepath.Abs(path) + if err != nil { + return "", fmt.Errorf("resolve %q: %w", path, err) + } + st, err := os.Stat(abs) + if err != nil { + return "", newUsageError("path %q does not exist", abs) + } + if !st.IsDir() { + return "", newUsageError("path %q is not a directory", abs) + } + return abs, nil +} + +// printOrdered writes a query.OrderedMap (or any other deterministic +// structure) as indented JSON. We use JSON for the default human view too — +// it's already deterministic, easily diffable in tests, and matches the +// JSON-by-default convention the Java CLI moved to in PR-5. Callers who want +// a more aggressive text rendering can opt-out by re-implementing this in +// the specific command. +func printOrdered(w io.Writer, v any) error { + enc := json.NewEncoder(w) + enc.SetIndent("", " ") + if om, ok := v.(*query.OrderedMap); ok && om != nil { + return enc.Encode(om) + } + return enc.Encode(v) +} diff --git a/go/internal/cli/version.go b/go/internal/cli/version.go new file mode 100644 index 00000000..6a4d800d --- /dev/null +++ b/go/internal/cli/version.go @@ -0,0 +1,86 @@ +package cli + +import ( + "encoding/json" + "fmt" + "io" + + "github.com/randomcodespace/codeiq/go/internal/buildinfo" + "github.com/spf13/cobra" +) + +// versionPayload is the JSON shape spec'd in §7.1. +type versionPayload struct { + Version string `json:"version"` + Commit string `json:"commit"` + CommitDirty bool `json:"commit_dirty"` + Built string `json:"built"` + GoVersion string `json:"go_version"` + Platform string `json:"platform"` + Features []string `json:"features"` +} + +func versionInfo() versionPayload { + return versionPayload{ + Version: buildinfo.Version, + Commit: buildinfo.Commit, + CommitDirty: buildinfo.DirtyBool(), + Built: buildinfo.Date, + GoVersion: buildinfo.GoVersion(), + Platform: buildinfo.Platform(), + Features: buildinfo.Features(), + } +} + +func printVersion(w io.Writer, asJSON bool) error { + info := versionInfo() + if asJSON { + b, err := json.MarshalIndent(info, "", " ") + if err != nil { + return err + } + _, err = fmt.Fprintln(w, string(b)) + return err + } + dirtyTag := "(clean)" + if info.CommitDirty { + dirtyTag = "(dirty)" + } + fmt.Fprintf(w, "codeiq %s\n", info.Version) + fmt.Fprintf(w, " commit: %s %s\n", info.Commit, dirtyTag) + fmt.Fprintf(w, " built: %s\n", info.Built) + fmt.Fprintf(w, " go: %s\n", info.GoVersion) + fmt.Fprintf(w, " platform: %s\n", info.Platform) + fmt.Fprintf(w, " features: %s\n", joinFeatures(info.Features)) + return nil +} + +func joinFeatures(f []string) string { + out := "" + for i, s := range f { + if i > 0 { + out += ", " + } + out += s + } + return out +} + +func init() { + registerSubcommand(func() *cobra.Command { + cmd := &cobra.Command{ + Use: "version", + Short: "Show version, commit, build date, and platform.", + Long: `Print the codeiq version, git commit hash, build date, Go +toolchain version, platform, and compiled-in feature flags. Use --json to +emit the same data as a single JSON object suitable for scripting.`, + Example: ` codeiq version + codeiq version --json + codeiq --version # alias of "codeiq version"`, + RunE: func(cmd *cobra.Command, args []string) error { + return printVersion(cmd.OutOrStdout(), flagJSON) + }, + } + return cmd + }) +} diff --git a/go/internal/cli/version_test.go b/go/internal/cli/version_test.go new file mode 100644 index 00000000..79cbeec8 --- /dev/null +++ b/go/internal/cli/version_test.go @@ -0,0 +1,61 @@ +package cli + +import ( + "bytes" + "encoding/json" + "strings" + "testing" + + "github.com/randomcodespace/codeiq/go/internal/buildinfo" +) + +func TestVersionTextFormat(t *testing.T) { + var buf bytes.Buffer + if err := printVersion(&buf, false); err != nil { + t.Fatal(err) + } + out := buf.String() + if !strings.HasPrefix(out, "codeiq "+buildinfo.Version) { + t.Errorf("expected prefix \"codeiq %s\", got %q", buildinfo.Version, out) + } + for _, want := range []string{"commit:", "built:", "go:", "platform:", "features:"} { + if !strings.Contains(out, want) { + t.Errorf("missing line %q in output:\n%s", want, out) + } + } +} + +func TestVersionJSONFormat(t *testing.T) { + var buf bytes.Buffer + if err := printVersion(&buf, true); err != nil { + t.Fatal(err) + } + var obj map[string]any + if err := json.Unmarshal(buf.Bytes(), &obj); err != nil { + t.Fatalf("invalid JSON: %v\n%s", err, buf.String()) + } + wantKeys := []string{"version", "commit", "commit_dirty", "built", "go_version", "platform", "features"} + for _, k := range wantKeys { + if _, ok := obj[k]; !ok { + t.Errorf("missing JSON key %q in %v", k, obj) + } + } +} + +func TestVersionCommitDirtyMarker(t *testing.T) { + orig := buildinfo.Dirty + t.Cleanup(func() { buildinfo.Dirty = orig }) + + buildinfo.Dirty = "true" + var buf bytes.Buffer + _ = printVersion(&buf, false) + if !strings.Contains(buf.String(), "(dirty)") { + t.Errorf("dirty marker missing when Dirty=true:\n%s", buf.String()) + } + buildinfo.Dirty = "false" + buf.Reset() + _ = printVersion(&buf, false) + if !strings.Contains(buf.String(), "(clean)") { + t.Errorf("clean marker missing when Dirty=false:\n%s", buf.String()) + } +} diff --git a/go/internal/detector/auth/certificate.go b/go/internal/detector/auth/certificate.go new file mode 100644 index 00000000..cda538b3 --- /dev/null +++ b/go/internal/detector/auth/certificate.go @@ -0,0 +1,165 @@ +package auth + +import ( + "regexp" + "strings" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/detector/base" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// CertificateAuthDetector detects certificate-based authentication (mTLS, +// X.509, TLS config, Azure AD client-cert flows). Mirrors Java +// CertificateAuthDetector — same multi-pattern + auth_type tag table. +type CertificateAuthDetector struct{} + +func NewCertificateAuthDetector() *CertificateAuthDetector { return &CertificateAuthDetector{} } + +func (CertificateAuthDetector) Name() string { return "certificate_auth" } +func (CertificateAuthDetector) SupportedLanguages() []string { + return []string{"java", "python", "typescript", "csharp", "json", "yaml"} +} +func (CertificateAuthDetector) DefaultConfidence() model.Confidence { + return base.RegexDetectorDefaultConfidence +} + +func init() { detector.RegisterDefault(NewCertificateAuthDetector()) } + +type certPatternDef struct { + regex *regexp.Regexp + authType string +} + +var ( + certMtlsPatterns = []certPatternDef{ + {regexp.MustCompile(`\bssl_verify_client\b`), "mtls"}, + {regexp.MustCompile(`\brequestCert\s*:\s*true\b`), "mtls"}, + {regexp.MustCompile(`\bclientAuth\s*=\s*"true"`), "mtls"}, + {regexp.MustCompile(`\bX509AuthenticationFilter\b`), "mtls"}, + {regexp.MustCompile(`\bAddCertificateForwarding\b`), "mtls"}, + } + certX509Patterns = []certPatternDef{ + {regexp.MustCompile(`\bX509AuthenticationFilter\b`), "x509"}, + {regexp.MustCompile(`\bCertificateAuthenticationDefaults\b`), "x509"}, + {regexp.MustCompile(`\.x509\s*\(`), "x509"}, + } + certTlsConfigPatterns = []certPatternDef{ + {regexp.MustCompile(`\bjavax\.net\.ssl\.keyStore\b`), "tls_config"}, + {regexp.MustCompile(`\bssl\.SSLContext\b`), "tls_config"}, + {regexp.MustCompile(`\btls\.createServer\b`), "tls_config"}, + {regexp.MustCompile(`(?:cert|key|ca)\s*[=:]\s*(?:fs\.readFileSync\s*\(|['"][\w/.\\-]+\.(?:pem|crt|key|cert)['"])`), "tls_config"}, + {regexp.MustCompile(`\btrustStore\b`), "tls_config"}, + } + certAzureAdPatterns = []certPatternDef{ + {regexp.MustCompile(`\bAzureAd\b`), "azure_ad"}, + {regexp.MustCompile(`\bAZURE_TENANT_ID\b`), "azure_ad"}, + {regexp.MustCompile(`\bAZURE_CLIENT_ID\b`), "azure_ad"}, + {regexp.MustCompile(`\bmsal\b`), "azure_ad"}, + {regexp.MustCompile(`['"]@azure/msal-browser['"]`), "azure_ad"}, + {regexp.MustCompile(`\bAddMicrosoftIdentityWebApi\b`), "azure_ad"}, + {regexp.MustCompile(`\bClientCertificateCredential\b`), "azure_ad"}, + } + certCertPathRE = regexp.MustCompile(`['"]([^'"]*\.(?:pem|crt|key|cert|pfx|p12))['"]`) + certTenantIDRE = regexp.MustCompile(`AZURE_TENANT_ID\s*[=:]\s*['"]?([a-f0-9-]+)['"]?`) + // certStrictKeywords gate detector entry. STRICT subset: file must + // contain at least one of these high-signal markers before we even + // consider running the 20 per-pattern regexes. Loose keywords like + // ".pem"/".crt"/".cert" are NOT in this set because they show up as + // path/extension references in millions of unrelated lines (e.g. C# + // `using System.Security.Cryptography.X509Certificates`) and would + // turn the per-line gate into a no-op. + // + // Profiling on PSScriptAnalyzer (593 files, 203 C#) showed + // CertificateAuthDetector consuming 99% of indexing CPU before this + // pre-screen. Tighter gate keeps the detector fast on cert-free repos. + certStrictKeywords = []string{ + "ssl_verify_client", "requestCert", "clientAuth=", + "AddCertificateForwarding", "CertificateAuthenticationDefaults", + ".x509(", "X509AuthenticationFilter", + "javax.net.ssl", "SSLContext", "tls.createServer", + "trustStore", "AzureAd", "AZURE_TENANT_ID", "AZURE_CLIENT_ID", + "ClientCertificateCredential", "AddMicrosoftIdentityWebApi", + "@azure/msal", + } +) + +var certAllPatterns []certPatternDef + +func init() { + certAllPatterns = append(certAllPatterns, certMtlsPatterns...) + certAllPatterns = append(certAllPatterns, certX509Patterns...) + certAllPatterns = append(certAllPatterns, certTlsConfigPatterns...) + certAllPatterns = append(certAllPatterns, certAzureAdPatterns...) +} + +// certLineQuickScan returns true if s contains any of the auth-cert +// keywords. Cheap O(n*k) byte scan beats running 20 regex alternation +// engines per line. Used both as a file-level and a per-line gate. +func certLineQuickScan(s string) bool { + for _, kw := range certStrictKeywords { + if strings.Contains(s, kw) { + return true + } + } + return false +} + +func (d CertificateAuthDetector) Detect(ctx *detector.Context) *detector.Result { + text := ctx.Content + if text == "" { + return detector.EmptyResult() + } + if !certLineQuickScan(text) { + return detector.EmptyResult() + } + + var nodes []*model.CodeNode + lines := strings.Split(text, "\n") + seenLines := map[int]bool{} + + for lineIdx, line := range lines { + // Per-line pre-screen: skip the 20 regex passes on lines without + // any cert-auth keyword. ~99% reduction on real codebases. + if !certLineQuickScan(line) { + continue + } + for _, pdef := range certAllPatterns { + if seenLines[lineIdx] { + break + } + if pdef.regex.MatchString(line) { + seenLines[lineIdx] = true + lineNum := lineIdx + 1 + matched := strings.TrimSpace(line) + n := model.NewCodeNode( + "auth:"+ctx.FilePath+":cert:"+itoa(lineNum), + model.NodeGuard, "Certificate auth ("+pdef.authType+"): "+truncate(matched, 60), + ) + n.FilePath = ctx.FilePath + n.LineStart = lineNum + n.LineEnd = lineNum + n.Source = "CertificateAuthDetector" + n.Properties["auth_type"] = pdef.authType + n.Properties["language"] = ctx.Language + n.Properties["pattern"] = truncate(matched, 120) + + if cm := certCertPathRE.FindStringSubmatch(line); len(cm) >= 2 { + n.Properties["cert_path"] = cm[1] + } + if tm := certTenantIDRE.FindStringSubmatch(line); len(tm) >= 2 { + n.Properties["tenant_id"] = tm[1] + } + if pdef.authType == "azure_ad" { + if strings.Contains(line, "ClientCertificateCredential") { + n.Properties["auth_flow"] = "client_certificate" + } else if strings.Contains(strings.ToLower(line), "msal") { + n.Properties["auth_flow"] = "msal" + } + } + nodes = append(nodes, n) + } + } + } + return detector.ResultOf(nodes, nil) +} diff --git a/go/internal/detector/auth/certificate_test.go b/go/internal/detector/auth/certificate_test.go new file mode 100644 index 00000000..fbcb6e4c --- /dev/null +++ b/go/internal/detector/auth/certificate_test.go @@ -0,0 +1,111 @@ +package auth + +import ( + "testing" + + "github.com/randomcodespace/codeiq/go/internal/detector" +) + +func TestCertificateMTLS(t *testing.T) { + d := NewCertificateAuthDetector() + src := `ssl_verify_client on; +clientAuth="true" +` + r := d.Detect(&detector.Context{FilePath: "nginx.conf", Language: "yaml", Content: src}) + if len(r.Nodes) < 2 { + t.Errorf("expected >=2 mtls guards, got %d", len(r.Nodes)) + } + for _, n := range r.Nodes { + if n.Properties["auth_type"] != "mtls" { + t.Errorf("auth_type = %v, want mtls", n.Properties["auth_type"]) + } + } +} + +func TestCertificateX509(t *testing.T) { + d := NewCertificateAuthDetector() + src := `import org.springframework.security.web.authentication.preauth.x509.X509AuthenticationFilter; +http.x509(); +` + r := d.Detect(&detector.Context{FilePath: "Sec.java", Language: "java", Content: src}) + found := false + for _, n := range r.Nodes { + if n.Properties["auth_type"] == "x509" { + found = true + } + } + if !found { + t.Error("expected x509 guard") + } +} + +func TestCertificateAzureAd(t *testing.T) { + d := NewCertificateAuthDetector() + src := `var tenantId = AZURE_TENANT_ID="abc123-def456"; +var cred = new ClientCertificateCredential(); +` + r := d.Detect(&detector.Context{FilePath: "Auth.cs", Language: "csharp", Content: src}) + azureFound := false + clientCertFlowFound := false + tenantFound := false + for _, n := range r.Nodes { + if n.Properties["auth_type"] == "azure_ad" { + azureFound = true + if n.Properties["auth_flow"] == "client_certificate" { + clientCertFlowFound = true + } + if n.Properties["tenant_id"] == "abc123-def456" { + tenantFound = true + } + } + } + if !azureFound { + t.Error("expected azure_ad guard") + } + if !clientCertFlowFound { + t.Error("expected client_certificate auth_flow") + } + if !tenantFound { + t.Error("expected extracted tenant_id") + } +} + +func TestCertificateTlsConfig(t *testing.T) { + d := NewCertificateAuthDetector() + src := `const tls = require('tls'); +const server = tls.createServer({ cert: 'server.pem', key: 'server.key' }); +` + r := d.Detect(&detector.Context{FilePath: "server.ts", Language: "typescript", Content: src}) + if len(r.Nodes) < 1 { + t.Error("expected >=1 tls_config guard") + } +} + +func TestCertificatePreScreenSkip(t *testing.T) { + d := NewCertificateAuthDetector() + r := d.Detect(&detector.Context{ + FilePath: "x.java", Language: "java", + Content: "public class Foo {}", + }) + if len(r.Nodes) != 0 { + t.Error("expected pre-screen to short-circuit on text with no auth keywords") + } +} + +func TestCertificateNegative(t *testing.T) { + d := NewCertificateAuthDetector() + r := d.Detect(&detector.Context{FilePath: "x.java", Language: "java", Content: ""}) + if len(r.Nodes) != 0 { + t.Fatal("expected 0 nodes") + } +} + +func TestCertificateDeterminism(t *testing.T) { + d := NewCertificateAuthDetector() + ctx := &detector.Context{FilePath: "nginx.conf", Language: "yaml", Content: "ssl_verify_client on;\n"} + r1 := d.Detect(ctx) + r2 := d.Detect(ctx) + if len(r1.Nodes) != len(r2.Nodes) { + t.Fatal("non-deterministic") + } +} diff --git a/go/internal/detector/auth/ldap.go b/go/internal/detector/auth/ldap.go new file mode 100644 index 00000000..7dc713e3 --- /dev/null +++ b/go/internal/detector/auth/ldap.go @@ -0,0 +1,103 @@ +// Package auth holds cross-cutting authentication-related detectors. +package auth + +import ( + "regexp" + "strings" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/detector/base" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// LdapAuthDetector detects LDAP / Active Directory authentication +// configuration across Java, Python, TypeScript, and C#. Mirrors Java +// LdapAuthDetector. +type LdapAuthDetector struct{} + +func NewLdapAuthDetector() *LdapAuthDetector { return &LdapAuthDetector{} } + +func (LdapAuthDetector) Name() string { return "ldap_auth" } +func (LdapAuthDetector) SupportedLanguages() []string { + return []string{"java", "python", "typescript", "csharp"} +} +func (LdapAuthDetector) DefaultConfidence() model.Confidence { return base.RegexDetectorDefaultConfidence } + +func init() { detector.RegisterDefault(NewLdapAuthDetector()) } + +var ( + ldapJavaPatterns = []*regexp.Regexp{ + regexp.MustCompile(`\bLdapContextSource\b`), + regexp.MustCompile(`\bLdapTemplate\b`), + regexp.MustCompile(`\bActiveDirectoryLdapAuthenticationProvider\b`), + regexp.MustCompile(`@EnableLdapRepositories\b`), + } + ldapPythonPatterns = []*regexp.Regexp{ + regexp.MustCompile(`\bldap3\.Connection\b`), + regexp.MustCompile(`\bldap3\.Server\b`), + regexp.MustCompile(`\bAUTH_LDAP_SERVER_URI\b`), + regexp.MustCompile(`\bAUTH_LDAP_BIND_DN\b`), + } + ldapTsPatterns = []*regexp.Regexp{ + regexp.MustCompile(`require\s*\(\s*['"]ldapjs['"]\s*\)`), + regexp.MustCompile(`(?:import\s+.*\s+from\s+['"]ldapjs['"]|import\s+ldapjs\b)`), + regexp.MustCompile(`['"]passport-ldapauth['"]`), + } + ldapCsharpPatterns = []*regexp.Regexp{ + regexp.MustCompile(`\bSystem\.DirectoryServices\b`), + regexp.MustCompile(`\bLdapConnection\b`), + regexp.MustCompile(`\bDirectoryEntry\b`), + } + ldapPreScreen = regexp.MustCompile(`(?i:ldap)|DirectoryServices|DirectoryEntry`) +) + +var ldapPatternsByLang = map[string][]*regexp.Regexp{ + "java": ldapJavaPatterns, + "python": ldapPythonPatterns, + "typescript": ldapTsPatterns, + "csharp": ldapCsharpPatterns, +} + +func (d LdapAuthDetector) Detect(ctx *detector.Context) *detector.Result { + patterns, ok := ldapPatternsByLang[ctx.Language] + if !ok { + return detector.EmptyResult() + } + text := ctx.Content + if text == "" { + return detector.EmptyResult() + } + if !ldapPreScreen.MatchString(text) { + return detector.EmptyResult() + } + + var nodes []*model.CodeNode + lines := strings.Split(text, "\n") + seenLines := map[int]bool{} + + for lineIdx, line := range lines { + for _, pat := range patterns { + if seenLines[lineIdx] { + break + } + if pat.MatchString(line) { + seenLines[lineIdx] = true + lineNum := lineIdx + 1 + matched := strings.TrimSpace(line) + n := model.NewCodeNode( + "auth:"+ctx.FilePath+":ldap:"+itoa(lineNum), + model.NodeGuard, "LDAP auth: "+truncate(matched, 80), + ) + n.FilePath = ctx.FilePath + n.LineStart = lineNum + n.LineEnd = lineNum + n.Source = "LdapAuthDetector" + n.Properties["auth_type"] = "ldap" + n.Properties["language"] = ctx.Language + n.Properties["pattern"] = truncate(matched, 120) + nodes = append(nodes, n) + } + } + } + return detector.ResultOf(nodes, nil) +} diff --git a/go/internal/detector/auth/ldap_test.go b/go/internal/detector/auth/ldap_test.go new file mode 100644 index 00000000..ba7d001d --- /dev/null +++ b/go/internal/detector/auth/ldap_test.go @@ -0,0 +1,101 @@ +package auth + +import ( + "testing" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +func TestLdapJavaPositive(t *testing.T) { + d := NewLdapAuthDetector() + src := `import org.springframework.security.ldap.authentication.LdapAuthenticationProvider; + +@Bean +public LdapContextSource contextSource() { + return new LdapContextSource(); +} + +@Bean +public LdapTemplate ldapTemplate() { + return new LdapTemplate(contextSource()); +} +` + r := d.Detect(&detector.Context{FilePath: "Auth.java", Language: "java", Content: src}) + guards := 0 + for _, n := range r.Nodes { + if n.Kind == model.NodeGuard { + guards++ + if n.Properties["auth_type"] != "ldap" { + t.Errorf("auth_type = %v", n.Properties["auth_type"]) + } + if n.Properties["language"] != "java" { + t.Errorf("language = %v", n.Properties["language"]) + } + } + } + if guards < 2 { + t.Errorf("expected >=2 GUARD, got %d", guards) + } +} + +func TestLdapPython(t *testing.T) { + d := NewLdapAuthDetector() + src := `import ldap3 +server = ldap3.Server('ldap://example.com') +conn = ldap3.Connection(server, user='cn=admin', password='secret') +` + r := d.Detect(&detector.Context{FilePath: "auth.py", Language: "python", Content: src}) + if len(r.Nodes) < 2 { + t.Errorf("expected >=2 GUARD, got %d", len(r.Nodes)) + } +} + +func TestLdapTypescript(t *testing.T) { + d := NewLdapAuthDetector() + src := `const ldap = require('ldapjs'); +const passportLdap = require('passport-ldapauth'); +` + r := d.Detect(&detector.Context{FilePath: "auth.ts", Language: "typescript", Content: src}) + if len(r.Nodes) < 1 { + t.Error("expected >=1 GUARD") + } +} + +func TestLdapCsharp(t *testing.T) { + d := NewLdapAuthDetector() + src := `using System.DirectoryServices; +var entry = new DirectoryEntry("LDAP://example.com"); +` + r := d.Detect(&detector.Context{FilePath: "Auth.cs", Language: "csharp", Content: src}) + if len(r.Nodes) < 1 { + t.Error("expected >=1 GUARD") + } +} + +func TestLdapUnsupportedLanguage(t *testing.T) { + d := NewLdapAuthDetector() + r := d.Detect(&detector.Context{FilePath: "x.rs", Language: "rust", Content: "LdapTemplate"}) + if len(r.Nodes) != 0 { + t.Error("rust not supported — expect 0") + } +} + +func TestLdapNegative(t *testing.T) { + d := NewLdapAuthDetector() + r := d.Detect(&detector.Context{FilePath: "x.java", Language: "java", Content: "// no auth here"}) + if len(r.Nodes) != 0 { + t.Error("expected 0 nodes when no auth keyword") + } +} + +func TestLdapDeterminism(t *testing.T) { + d := NewLdapAuthDetector() + ctx := &detector.Context{FilePath: "Auth.java", Language: "java", + Content: "LdapContextSource ctx;\nLdapTemplate tpl;\n"} + r1 := d.Detect(ctx) + r2 := d.Detect(ctx) + if len(r1.Nodes) != len(r2.Nodes) { + t.Fatal("non-deterministic") + } +} diff --git a/go/internal/detector/auth/session_header.go b/go/internal/detector/auth/session_header.go new file mode 100644 index 00000000..20086321 --- /dev/null +++ b/go/internal/detector/auth/session_header.go @@ -0,0 +1,128 @@ +package auth + +import ( + "regexp" + "strings" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/detector/base" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// SessionHeaderAuthDetector detects session-, header-, and API-key-based +// authentication. Mirrors Java SessionHeaderAuthDetector. +type SessionHeaderAuthDetector struct{} + +func NewSessionHeaderAuthDetector() *SessionHeaderAuthDetector { + return &SessionHeaderAuthDetector{} +} + +func (SessionHeaderAuthDetector) Name() string { return "session_header_auth" } +func (SessionHeaderAuthDetector) SupportedLanguages() []string { + return []string{"java", "python", "typescript"} +} +func (SessionHeaderAuthDetector) DefaultConfidence() model.Confidence { + return base.RegexDetectorDefaultConfidence +} + +func init() { detector.RegisterDefault(NewSessionHeaderAuthDetector()) } + +type sessionPatternDef struct { + regex *regexp.Regexp + authType string + nodeKind model.NodeKind +} + +var ( + sessionSessionPatterns = []sessionPatternDef{ + {regexp.MustCompile(`['"]express-session['"]`), "session", model.NodeMiddleware}, + {regexp.MustCompile(`['"]cookie-session['"]`), "session", model.NodeMiddleware}, + {regexp.MustCompile(`@SessionAttributes\b`), "session", model.NodeGuard}, + {regexp.MustCompile(`\bSessionMiddleware\b`), "session", model.NodeMiddleware}, + {regexp.MustCompile(`\bHttpSession\b`), "session", model.NodeGuard}, + {regexp.MustCompile(`\bSESSION_ENGINE\b`), "session", model.NodeGuard}, + } + sessionHeaderPatterns = []sessionPatternDef{ + {regexp.MustCompile(`(?i)['"]X-API-Key['"]`), "header", model.NodeGuard}, + {regexp.MustCompile(`(?i)(?:req|request|ctx)\.headers?\s*\[\s*['"]authorization['"]\s*]`), "header", model.NodeGuard}, + {regexp.MustCompile(`(?i)getHeader\s*\(\s*['"]Authorization['"]`), "header", model.NodeGuard}, + } + sessionApiKeyPatterns = []sessionPatternDef{ + {regexp.MustCompile(`(?i)(?:req|request)\.headers?\s*\[\s*['"]x-api-key['"]\s*]`), "api_key", model.NodeGuard}, + {regexp.MustCompile(`(?i)\bapi[_-]?key\s*[=:]\s*`), "api_key", model.NodeGuard}, + {regexp.MustCompile(`(?i)\bvalidate_?api_?key\b`), "api_key", model.NodeGuard}, + } + sessionCsrfPatterns = []sessionPatternDef{ + {regexp.MustCompile(`@csrf_protect\b`), "csrf", model.NodeGuard}, + {regexp.MustCompile(`\bcsrf_exempt\b`), "csrf", model.NodeGuard}, + {regexp.MustCompile(`\bCsrfViewMiddleware\b`), "csrf", model.NodeMiddleware}, + {regexp.MustCompile(`['"]csurf['"]`), "csrf", model.NodeMiddleware}, + } + sessionPreScreen = regexp.MustCompile( + `express-session|cookie-session|@SessionAttributes|SessionMiddleware|` + + `HttpSession|SESSION_ENGINE|` + + `(?i:X-API|Authorization|api[_-]?key|csurf|csrf|getHeader)`, + ) +) + +var sessionAllPatterns []sessionPatternDef +var sessionIDTag = map[string]string{ + "session": "session", + "header": "header", + "api_key": "apikey", + "csrf": "csrf", +} + +func init() { + sessionAllPatterns = append(sessionAllPatterns, sessionSessionPatterns...) + sessionAllPatterns = append(sessionAllPatterns, sessionHeaderPatterns...) + sessionAllPatterns = append(sessionAllPatterns, sessionApiKeyPatterns...) + sessionAllPatterns = append(sessionAllPatterns, sessionCsrfPatterns...) +} + +func (d SessionHeaderAuthDetector) Detect(ctx *detector.Context) *detector.Result { + switch ctx.Language { + case "java", "python", "typescript": + // ok + default: + return detector.EmptyResult() + } + text := ctx.Content + if text == "" { + return detector.EmptyResult() + } + if !sessionPreScreen.MatchString(text) { + return detector.EmptyResult() + } + + var nodes []*model.CodeNode + lines := strings.Split(text, "\n") + seenLines := map[int]bool{} + + for lineIdx, line := range lines { + for _, pdef := range sessionAllPatterns { + if seenLines[lineIdx] { + break + } + if pdef.regex.MatchString(line) { + seenLines[lineIdx] = true + lineNum := lineIdx + 1 + matched := strings.TrimSpace(line) + tag := sessionIDTag[pdef.authType] + n := model.NewCodeNode( + "auth:"+ctx.FilePath+":"+tag+":"+itoa(lineNum), + pdef.nodeKind, pdef.authType+" auth: "+truncate(matched, 70), + ) + n.FilePath = ctx.FilePath + n.LineStart = lineNum + n.LineEnd = lineNum + n.Source = "SessionHeaderAuthDetector" + n.Properties["auth_type"] = pdef.authType + n.Properties["language"] = ctx.Language + n.Properties["pattern"] = truncate(matched, 120) + nodes = append(nodes, n) + } + } + } + return detector.ResultOf(nodes, nil) +} diff --git a/go/internal/detector/auth/session_header_test.go b/go/internal/detector/auth/session_header_test.go new file mode 100644 index 00000000..e3b0513b --- /dev/null +++ b/go/internal/detector/auth/session_header_test.go @@ -0,0 +1,114 @@ +package auth + +import ( + "testing" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +func TestSessionHeaderSession(t *testing.T) { + d := NewSessionHeaderAuthDetector() + src := `const session = require('express-session'); +app.use(session({ secret: 's' })); +` + r := d.Detect(&detector.Context{FilePath: "app.ts", Language: "typescript", Content: src}) + hasSession := false + for _, n := range r.Nodes { + if n.Properties["auth_type"] == "session" { + hasSession = true + if n.Kind != model.NodeMiddleware { + t.Errorf("expected MIDDLEWARE for express-session, got %v", n.Kind) + } + } + } + if !hasSession { + t.Error("expected session guard") + } +} + +func TestSessionHeaderApiKey(t *testing.T) { + d := NewSessionHeaderAuthDetector() + src := `const key = req.headers['x-api-key']; +def validate_api_key(k): pass +` + r := d.Detect(&detector.Context{FilePath: "h.ts", Language: "typescript", Content: src}) + hasApiKey := false + for _, n := range r.Nodes { + if n.Properties["auth_type"] == "api_key" { + hasApiKey = true + } + } + if !hasApiKey { + t.Error("expected api_key guard") + } +} + +func TestSessionHeaderCsrf(t *testing.T) { + d := NewSessionHeaderAuthDetector() + src := `from django.views.decorators.csrf import csrf_exempt + +@csrf_exempt +def view(request): pass +` + r := d.Detect(&detector.Context{FilePath: "v.py", Language: "python", Content: src}) + hasCsrf := false + for _, n := range r.Nodes { + if n.Properties["auth_type"] == "csrf" { + hasCsrf = true + } + } + if !hasCsrf { + t.Error("expected csrf guard") + } +} + +func TestSessionHeaderHeader(t *testing.T) { + d := NewSessionHeaderAuthDetector() + src := `const auth = req.headers['authorization'];` + r := d.Detect(&detector.Context{FilePath: "h.ts", Language: "typescript", Content: src}) + hasHeader := false + for _, n := range r.Nodes { + if n.Properties["auth_type"] == "header" { + hasHeader = true + } + } + if !hasHeader { + t.Error("expected header guard") + } +} + +func TestSessionHeaderUnsupportedLanguage(t *testing.T) { + d := NewSessionHeaderAuthDetector() + r := d.Detect(&detector.Context{ + FilePath: "x.rs", Language: "rust", + Content: "HttpSession s;", + }) + if len(r.Nodes) != 0 { + t.Error("rust not supported") + } +} + +func TestSessionHeaderPreScreenSkip(t *testing.T) { + d := NewSessionHeaderAuthDetector() + r := d.Detect(&detector.Context{ + FilePath: "x.java", Language: "java", + Content: "public class Foo {}", + }) + if len(r.Nodes) != 0 { + t.Error("pre-screen should short-circuit") + } +} + +func TestSessionHeaderDeterminism(t *testing.T) { + d := NewSessionHeaderAuthDetector() + ctx := &detector.Context{ + FilePath: "a.ts", Language: "typescript", + Content: "const auth = req.headers['authorization'];", + } + r1 := d.Detect(ctx) + r2 := d.Detect(ctx) + if len(r1.Nodes) != len(r2.Nodes) { + t.Fatal("non-deterministic") + } +} diff --git a/go/internal/detector/auth/util.go b/go/internal/detector/auth/util.go new file mode 100644 index 00000000..8ef249ab --- /dev/null +++ b/go/internal/detector/auth/util.go @@ -0,0 +1,15 @@ +package auth + +import "strconv" + +// itoa is a tiny strconv.Itoa wrapper for readable call sites in this package. +func itoa(n int) string { return strconv.Itoa(n) } + +// truncate returns s clipped to at most max bytes (no ellipsis added — +// matches Java's String.substring(0, n) semantics). +func truncate(s string, max int) string { + if len(s) <= max { + return s + } + return s[:max] +} diff --git a/go/internal/detector/base/frontend.go b/go/internal/detector/base/frontend.go new file mode 100644 index 00000000..787e4645 --- /dev/null +++ b/go/internal/detector/base/frontend.go @@ -0,0 +1,38 @@ +// Package base frontend.go provides shared helpers for frontend component +// detectors (Angular, React, Vue). Mirrors the Java FrontendDetectorHelper. +package base + +import ( + "strings" + + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// CreateComponentNode constructs a frontend component / hook / service node +// with the standard fields populated. Equivalent to Java +// FrontendDetectorHelper.createComponentNode. +// +// framework e.g. "angular", "react", "vue" +// filePath source file path (forward-slash, relative to repo root) +// idType namespace segment for the ID ("component", "hook", "service") +// name component / class / function name +// kind model.NodeComponent | NodeHook | NodeMiddleware +// line 1-based line number +func CreateComponentNode(framework, filePath, idType, name string, kind model.NodeKind, line int) *model.CodeNode { + id := framework + ":" + filePath + ":" + idType + ":" + name + n := model.NewCodeNode(id, kind, name) + n.FQN = filePath + "::" + name + n.FilePath = filePath + n.LineStart = line + n.Properties["framework"] = framework + return n +} + +// LineAt returns the 1-based line number for a byte offset in text. Mirrors +// the Java lineAt helper (counts \n characters up to offset and adds 1). +func LineAt(text string, offset int) int { + if offset > len(text) { + offset = len(text) + } + return strings.Count(text[:offset], "\n") + 1 +} diff --git a/go/internal/detector/base/imports_helpers.go b/go/internal/detector/base/imports_helpers.go new file mode 100644 index 00000000..e2580ab6 --- /dev/null +++ b/go/internal/detector/base/imports_helpers.go @@ -0,0 +1,67 @@ +// Anchor helpers — Plan §1.2 follow-on. +// +// Many regex detectors emit cross-file "imports" / "depends_on" edges +// using the source file path and the imported name as endpoints. Both +// endpoints were free-form strings with no matching CodeNode, so every +// such edge got dropped at GraphBuilder.Snapshot's phantom filter. +// +// EnsureFileAnchor and EnsureExternalAnchor materialize anchor nodes so +// the edges survive. The GraphBuilder dedup map collapses the per-file +// and per-external nodes across files at zero extra cost (every Python +// file importing "requests" gets one shared py:external:requests node). +package base + +import ( + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// EnsureFileAnchor returns the canonical ID of the file-as-module anchor +// node for ctx.FilePath and appends the node to nodes once. Caller must +// pass the same `seen` map across invocations within a single detector +// run (or nil for one-shot calls). +// +// langPrefix scopes the anchor namespace ("py" for Python, "ts" for +// TypeScript, etc.) so cross-language detectors don't collide on the +// same path. +// +// Detector source/confidence are stamped onto the anchor — pick a +// confidence that's at-or-below the actual emission detector so the +// merge rule (higher wins) doesn't accidentally demote a high-confidence +// emission later. +func EnsureFileAnchor(ctx *detector.Context, langPrefix, detectorName string, conf model.Confidence, nodes *[]*model.CodeNode, seen map[string]bool) string { + id := langPrefix + ":file:" + ctx.FilePath + if seen != nil && seen[id] { + return id + } + if seen != nil { + seen[id] = true + } + n := model.NewCodeNode(id, model.NodeModule, ctx.FilePath) + n.FilePath = ctx.FilePath + n.Source = detectorName + n.Confidence = conf + n.Properties["module_type"] = langPrefix + "_file" + *nodes = append(*nodes, n) + return id +} + +// EnsureExternalAnchor returns the canonical ID of an external module / +// package / image target and appends it to nodes once per unique name. +// idPrefix scopes the namespace ("py:external", "rust:external", +// "docker:image", etc.). +func EnsureExternalAnchor(name, idPrefix, detectorName string, conf model.Confidence, nodes *[]*model.CodeNode, seen map[string]bool) string { + id := idPrefix + ":" + name + if seen != nil && seen[id] { + return id + } + if seen != nil { + seen[id] = true + } + n := model.NewCodeNode(id, model.NodeExternal, name) + n.Source = detectorName + n.Confidence = conf + n.Properties["module"] = name + *nodes = append(*nodes, n) + return id +} diff --git a/go/internal/detector/base/regex.go b/go/internal/detector/base/regex.go new file mode 100644 index 00000000..4042c9cc --- /dev/null +++ b/go/internal/detector/base/regex.go @@ -0,0 +1,49 @@ +// Package base provides shared helpers for detector implementations. +// Mirrors the Java Abstract* detector hierarchy collapsed for tree-sitter. +package base + +import ( + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// RegexDetectorDefaultConfidence is the floor for regex-only detectors. +// Java equivalent: AbstractRegexDetector.defaultConfidence() = LEXICAL. +const RegexDetectorDefaultConfidence = model.ConfidenceLexical + +// FindLineNumber returns the 1-based line number for a character offset in +// text. Offsets past the end clamp to the last line; empty input returns 1. +// Mirrors Java's findLineNumber helper used throughout the regex detectors. +func FindLineNumber(text string, offset int) int { + if offset < 0 { + offset = 0 + } + if offset > len(text) { + offset = len(text) + } + line := 1 + for i := 0; i < offset; i++ { + if text[i] == '\n' { + line++ + } + } + return line +} + +// FileName extracts just the filename component of a path (after the last +// '/' or '\\'). Mirrors Java AbstractRegexDetector.fileName(). +func FileName(path string) string { + if path == "" { + return "" + } + lastSlash := -1 + for i := len(path) - 1; i >= 0; i-- { + if path[i] == '/' || path[i] == '\\' { + lastSlash = i + break + } + } + if lastSlash >= 0 { + return path[lastSlash+1:] + } + return path +} diff --git a/go/internal/detector/base/regex_test.go b/go/internal/detector/base/regex_test.go new file mode 100644 index 00000000..da137dce --- /dev/null +++ b/go/internal/detector/base/regex_test.go @@ -0,0 +1,35 @@ +package base + +import ( + "testing" +) + +func TestFindLineNumber(t *testing.T) { + text := "line1\nline2\nline3\n" + cases := map[int]int{ + 0: 1, + 5: 1, // newline at index 5 still on line 1 + 6: 2, + 11: 2, + 12: 3, + 17: 3, + } + for offset, want := range cases { + if got := FindLineNumber(text, offset); got != want { + t.Errorf("FindLineNumber(_, %d) = %d, want %d", offset, got, want) + } + } +} + +func TestFindLineNumberEmpty(t *testing.T) { + if got := FindLineNumber("", 0); got != 1 { + t.Fatalf("empty input: got %d, want 1", got) + } +} + +func TestFindLineNumberPastEnd(t *testing.T) { + // Out-of-range offsets clamp to last line — safer than panicking. + if got := FindLineNumber("a\nb", 99); got != 2 { + t.Fatalf("past-end: got %d, want 2", got) + } +} diff --git a/go/internal/detector/base/structured.go b/go/internal/detector/base/structured.go new file mode 100644 index 00000000..3719b580 --- /dev/null +++ b/go/internal/detector/base/structured.go @@ -0,0 +1,114 @@ +// Package base structured.go provides shared helpers for structured-data +// detectors (YAML / JSON / TOML / INI / properties). Mirrors the Java +// AbstractStructuredDetector helpers. +package base + +import ( + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// StructuredDetectorDefaultConfidence is the floor for structured detectors. +// Structured parsing produces a parsed shape, not just a regex match, so the +// confidence floor is SYNTACTIC (matches Java +// AbstractStructuredDetector.defaultConfidence()). +const StructuredDetectorDefaultConfidence = model.ConfidenceSyntactic + +// AsMap returns obj coerced to map[string]any. Returns nil when obj is nil or +// not a map. Used by structured detectors to navigate parsed data. +func AsMap(obj any) map[string]any { + if m, ok := obj.(map[string]any); ok { + return m + } + return nil +} + +// GetMap returns the nested map at key in container. Returns nil when key is +// missing or the value is not a map. +func GetMap(container any, key string) map[string]any { + m := AsMap(container) + if m == nil { + return nil + } + v, ok := m[key] + if !ok { + return nil + } + return AsMap(v) +} + +// GetList returns the nested list at key in container. Returns nil when key +// is missing or the value is not a list. +func GetList(container any, key string) []any { + m := AsMap(container) + if m == nil { + return nil + } + v, ok := m[key] + if !ok { + return nil + } + l, ok := v.([]any) + if !ok { + return nil + } + return l +} + +// GetString returns the string at key in container. Returns "" when the key +// is missing or the value is not a string. +func GetString(container any, key string) string { + m := AsMap(container) + if m == nil { + return "" + } + v, ok := m[key] + if !ok { + return "" + } + if s, ok := v.(string); ok { + return s + } + return "" +} + +// GetStringOrDefault returns the string at key or fallback when missing or +// non-string. +func GetStringOrDefault(container any, key, fallback string) string { + s := GetString(container, key) + if s == "" { + return fallback + } + return s +} + +// BuildFileNode constructs a CONFIG_FILE node for ctx's file. Mirrors the +// Java buildFileNode helper; callers append the returned node themselves. +func BuildFileNode(ctx *detector.Context, format string) *model.CodeNode { + fp := ctx.FilePath + fileID := format + ":" + fp + n := model.NewCodeNode(fileID, model.NodeConfigFile, fp) + n.FQN = fp + n.Module = ctx.ModuleName + n.FilePath = fp + n.LineStart = 1 + n.Confidence = StructuredDetectorDefaultConfidence + n.Properties["format"] = format + return n +} + +// AddKeyNode appends a CONFIG_KEY node and a CONTAINS edge from fileID to it. +// Mirrors Java addKeyNode. +func AddKeyNode(fileID, fp, key, format string, ctx *detector.Context, + nodes *[]*model.CodeNode, edges *[]*model.CodeEdge) { + keyID := format + ":" + fp + ":" + key + n := model.NewCodeNode(keyID, model.NodeConfigKey, key) + n.FQN = fp + ":" + key + n.Module = ctx.ModuleName + n.FilePath = fp + n.Confidence = StructuredDetectorDefaultConfidence + *nodes = append(*nodes, n) + e := model.NewCodeEdge(fileID+"->"+keyID, model.EdgeContains, fileID, keyID) + e.Confidence = StructuredDetectorDefaultConfidence + *edges = append(*edges, e) +} diff --git a/go/internal/detector/base/treesitter.go b/go/internal/detector/base/treesitter.go new file mode 100644 index 00000000..b7e96858 --- /dev/null +++ b/go/internal/detector/base/treesitter.go @@ -0,0 +1,62 @@ +package base + +import ( + sitter "github.com/smacker/go-tree-sitter" + + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// TreeSitterDetectorDefaultConfidence is the floor for AST-backed detectors. +// Java equivalent: AbstractJavaParserDetector.defaultConfidence() = SYNTACTIC. +const TreeSitterDetectorDefaultConfidence = model.ConfidenceSyntactic + +// Walk performs a pre-order DFS over the tree-sitter subtree rooted at root. +// The visitor returns false to abort the walk (siblings + descendants of the +// current node are still skipped if false is returned at that node). +func Walk(root *sitter.Node, visit func(*sitter.Node) bool) { + if root == nil { + return + } + if !visit(root) { + return + } + for i := 0; i < int(root.NamedChildCount()); i++ { + walkAborted := false + Walk(root.NamedChild(i), func(n *sitter.Node) bool { + if walkAborted { + return false + } + ok := visit(n) + if !ok { + walkAborted = true + } + return ok + }) + } +} + +// FindFirstByType returns the first descendant whose type matches t (pre-order +// DFS). Returns nil when not found. +func FindFirstByType(root *sitter.Node, t string) *sitter.Node { + var result *sitter.Node + Walk(root, func(n *sitter.Node) bool { + if n.Type() == t { + result = n + return false + } + return true + }) + return result +} + +// FindAllByType returns every descendant whose type matches t (pre-order DFS). +func FindAllByType(root *sitter.Node, t string) []*sitter.Node { + var out []*sitter.Node + Walk(root, func(n *sitter.Node) bool { + if n.Type() == t { + out = append(out, n) + } + return true + }) + return out +} diff --git a/go/internal/detector/base/treesitter_test.go b/go/internal/detector/base/treesitter_test.go new file mode 100644 index 00000000..15f5f4d2 --- /dev/null +++ b/go/internal/detector/base/treesitter_test.go @@ -0,0 +1,58 @@ +package base + +import ( + "context" + "testing" + + sitter "github.com/smacker/go-tree-sitter" + "github.com/smacker/go-tree-sitter/python" +) + +func TestWalkVisitsAllNodes(t *testing.T) { + src := []byte("def f():\n return 1\n") + p := sitter.NewParser() + p.SetLanguage(python.GetLanguage()) + tree, err := p.ParseCtx(context.Background(), nil, src) + if err != nil { + t.Fatal(err) + } + defer tree.Close() + + var types []string + Walk(tree.RootNode(), func(n *sitter.Node) bool { + types = append(types, n.Type()) + return true + }) + // Sanity: root should be "module" and we should have visited at least + // the function_definition node. + if len(types) == 0 || types[0] != "module" { + t.Fatalf("unexpected walk order: %v", types) + } + found := false + for _, ty := range types { + if ty == "function_definition" { + found = true + break + } + } + if !found { + t.Fatalf("walk did not visit function_definition; saw %v", types) + } +} + +func TestWalkAbortsOnFalse(t *testing.T) { + src := []byte("def f():\n return 1\n") + p := sitter.NewParser() + p.SetLanguage(python.GetLanguage()) + tree, _ := p.ParseCtx(context.Background(), nil, src) + defer tree.Close() + + count := 0 + Walk(tree.RootNode(), func(n *sitter.Node) bool { + count++ + return count < 2 // stop after the second visit + }) + if count != 2 { + t.Fatalf("Walk did not abort at count=2: count = %d", count) + } +} diff --git a/go/internal/detector/csharp/efcore.go b/go/internal/detector/csharp/efcore.go new file mode 100644 index 00000000..d1c6c0c3 --- /dev/null +++ b/go/internal/detector/csharp/efcore.go @@ -0,0 +1,117 @@ +// Package csharp holds C#/.NET detectors. +package csharp + +import ( + "regexp" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/detector/base" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// EfcoreDetector detects Entity Framework Core DbContexts, DbSet entities, +// and migration classes / CreateTable calls. Mirrors Java CSharpEfcoreDetector. +type EfcoreDetector struct{} + +func NewEfcoreDetector() *EfcoreDetector { return &EfcoreDetector{} } + +func (EfcoreDetector) Name() string { return "csharp_efcore" } +func (EfcoreDetector) SupportedLanguages() []string { return []string{"csharp"} } +func (EfcoreDetector) DefaultConfidence() model.Confidence { return base.RegexDetectorDefaultConfidence } + +func init() { detector.RegisterDefault(NewEfcoreDetector()) } + +var ( + efcoreDbContextRE = regexp.MustCompile(`(?m)class\s+(\w+)\s*:\s*(?:[\w.]+\.)?DbContext`) + efcoreDbSetRE = regexp.MustCompile(`(?m)DbSet<(\w+)>`) + efcoreMigrationRE = regexp.MustCompile(`(?m)class\s+(\w+)\s*:\s*Migration`) + efcoreCreateTableRE = regexp.MustCompile(`(?m)CreateTable\s*\(\s*(?:name:\s*)?"(\w+)"`) +) + +const propEfcore = "efcore" + +func (d EfcoreDetector) Detect(ctx *detector.Context) *detector.Result { + text := ctx.Content + if text == "" { + return detector.EmptyResult() + } + var nodes []*model.CodeNode + var edges []*model.CodeEdge + filePath := ctx.FilePath + var contextIDs []string + + // DbContexts → REPOSITORY + for _, m := range efcoreDbContextRE.FindAllStringSubmatchIndex(text, -1) { + name := text[m[2]:m[3]] + nodeID := "efcore:" + filePath + ":context:" + name + contextIDs = append(contextIDs, nodeID) + n := model.NewCodeNode(nodeID, model.NodeRepository, name) + n.FQN = name + n.FilePath = filePath + n.LineStart = base.FindLineNumber(text, m[0]) + n.Source = "CSharpEfcoreDetector" + n.Properties["framework"] = propEfcore + nodes = append(nodes, n) + } + + // DbSet entities — track seen IDs to avoid duplicates from CreateTable + seen := map[string]bool{} + for _, m := range efcoreDbSetRE.FindAllStringSubmatchIndex(text, -1) { + entity := text[m[2]:m[3]] + entityID := "efcore:" + filePath + ":entity:" + entity + if !seen[entityID] { + seen[entityID] = true + n := model.NewCodeNode(entityID, model.NodeEntity, entity) + n.FQN = entity + n.FilePath = filePath + n.LineStart = base.FindLineNumber(text, m[0]) + n.Source = "CSharpEfcoreDetector" + n.Properties["framework"] = propEfcore + nodes = append(nodes, n) + } + // QUERIES edge for each context + for _, ctxID := range contextIDs { + e := model.NewCodeEdge( + ctxID+":queries:"+entity, + model.EdgeQueries, ctxID, entityID, + ) + e.Source = "CSharpEfcoreDetector" + edges = append(edges, e) + } + } + + // Migration classes + for _, m := range efcoreMigrationRE.FindAllStringSubmatchIndex(text, -1) { + name := text[m[2]:m[3]] + n := model.NewCodeNode( + "efcore:"+filePath+":migration:"+name, + model.NodeMigration, name, + ) + n.FQN = name + n.FilePath = filePath + n.LineStart = base.FindLineNumber(text, m[0]) + n.Source = "CSharpEfcoreDetector" + n.Properties["framework"] = propEfcore + nodes = append(nodes, n) + } + + // CreateTable entries — emit entities for tables not already seen + for _, m := range efcoreCreateTableRE.FindAllStringSubmatchIndex(text, -1) { + table := text[m[2]:m[3]] + entityID := "efcore:" + filePath + ":entity:" + table + if seen[entityID] { + continue + } + seen[entityID] = true + n := model.NewCodeNode(entityID, model.NodeEntity, table) + n.FQN = table + n.FilePath = filePath + n.LineStart = base.FindLineNumber(text, m[0]) + n.Source = "CSharpEfcoreDetector" + n.Properties["framework"] = propEfcore + n.Properties["source"] = "migration" + nodes = append(nodes, n) + } + + return detector.ResultOf(nodes, edges) +} diff --git a/go/internal/detector/csharp/efcore_test.go b/go/internal/detector/csharp/efcore_test.go new file mode 100644 index 00000000..a2518cf4 --- /dev/null +++ b/go/internal/detector/csharp/efcore_test.go @@ -0,0 +1,75 @@ +package csharp + +import ( + "testing" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +const efcoreSource = `using Microsoft.EntityFrameworkCore; + +public class AppDbContext : DbContext { + public DbSet Users { get; set; } + public DbSet Orders { get; set; } +} + +public class AddUserTable : Migration { + protected override void Up(MigrationBuilder b) { + b.CreateTable(name: "users"); + b.CreateTable("audit"); + } +} +` + +func TestCSharpEfcorePositive(t *testing.T) { + d := NewEfcoreDetector() + r := d.Detect(&detector.Context{FilePath: "Db.cs", Language: "csharp", Content: efcoreSource}) + if r == nil { + t.Fatal("nil result") + } + + kinds := map[model.NodeKind]int{} + for _, n := range r.Nodes { + kinds[n.Kind]++ + } + if kinds[model.NodeRepository] != 1 { + t.Errorf("expected 1 REPOSITORY, got %d", kinds[model.NodeRepository]) + } + // Entities: User, Order from DbSet + audit from CreateTable (users already exists by name from CreateTable but no — DbSet creates "User"/"Order" entities; CreateTable creates "users", "audit") + if kinds[model.NodeEntity] < 3 { + t.Errorf("expected >=3 ENTITY, got %d", kinds[model.NodeEntity]) + } + if kinds[model.NodeMigration] != 1 { + t.Errorf("expected 1 MIGRATION, got %d", kinds[model.NodeMigration]) + } + + queryEdges := 0 + for _, e := range r.Edges { + if e.Kind == model.EdgeQueries { + queryEdges++ + } + } + // 2 DbSet * 1 context = 2 query edges + if queryEdges != 2 { + t.Errorf("expected 2 QUERIES edges, got %d", queryEdges) + } +} + +func TestCSharpEfcoreNegative(t *testing.T) { + d := NewEfcoreDetector() + r := d.Detect(&detector.Context{FilePath: "x.cs", Language: "csharp", Content: "public class Foo {}"}) + if len(r.Nodes) != 0 { + t.Fatal("expected 0 nodes") + } +} + +func TestCSharpEfcoreDeterminism(t *testing.T) { + d := NewEfcoreDetector() + ctx := &detector.Context{FilePath: "Db.cs", Language: "csharp", Content: efcoreSource} + r1 := d.Detect(ctx) + r2 := d.Detect(ctx) + if len(r1.Nodes) != len(r2.Nodes) || len(r1.Edges) != len(r2.Edges) { + t.Fatal("non-deterministic counts") + } +} diff --git a/go/internal/detector/csharp/minimal_apis.go b/go/internal/detector/csharp/minimal_apis.go new file mode 100644 index 00000000..7ef90f8d --- /dev/null +++ b/go/internal/detector/csharp/minimal_apis.go @@ -0,0 +1,119 @@ +package csharp + +import ( + "regexp" + "strconv" + "strings" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/detector/base" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// MinimalApisDetector detects ASP.NET Core Minimal API endpoints +// (.MapGet/.MapPost/...) plus Use/AddAuthentication/Authorization guards. +// Mirrors Java CSharpMinimalApisDetector. +type MinimalApisDetector struct{} + +func NewMinimalApisDetector() *MinimalApisDetector { return &MinimalApisDetector{} } + +func (MinimalApisDetector) Name() string { return "csharp_minimal_apis" } +func (MinimalApisDetector) SupportedLanguages() []string { return []string{"csharp"} } +func (MinimalApisDetector) DefaultConfidence() model.Confidence { return base.RegexDetectorDefaultConfidence } + +func init() { detector.RegisterDefault(NewMinimalApisDetector()) } + +var ( + minApisMapRE = regexp.MustCompile(`(?m)\.Map(Get|Post|Put|Delete|Patch)\s*\(\s*"([^"]*)"`) + minApisBuilderRE = regexp.MustCompile(`(?m)WebApplication\.CreateBuilder\s*\(`) + minApisUseAuthRE = regexp.MustCompile(`(?m)\.Use(Authentication|Authorization)\s*\(`) + minApisAddAuthRE = regexp.MustCompile(`(?m)\.Add(Authentication|Authorization)\s*\(`) +) + +const propDotnetMinimalApi = "dotnet_minimal_api" + +func (d MinimalApisDetector) Detect(ctx *detector.Context) *detector.Result { + text := ctx.Content + if text == "" { + return detector.EmptyResult() + } + var nodes []*model.CodeNode + var edges []*model.CodeEdge + filePath := ctx.FilePath + var appModuleID string + + // Find WebApplication.CreateBuilder => app MODULE + if loc := minApisBuilderRE.FindStringIndex(text); loc != nil { + appModuleID = "dotnet:" + filePath + ":app" + n := model.NewCodeNode(appModuleID, model.NodeModule, "WebApplication("+filePath+")") + n.FQN = filePath + n.FilePath = filePath + n.LineStart = base.FindLineNumber(text, loc[0]) + n.Source = "CSharpMinimalApisDetector" + n.Properties["framework"] = propDotnetMinimalApi + nodes = append(nodes, n) + } + + // MapGet/MapPost/etc endpoints + for _, m := range minApisMapRE.FindAllStringSubmatchIndex(text, -1) { + httpMethod := strings.ToUpper(text[m[2]:m[3]]) + path := text[m[4]:m[5]] + line := base.FindLineNumber(text, m[0]) + endpointID := "dotnet:" + filePath + ":endpoint:" + httpMethod + ":" + path + ":" + strconv.Itoa(line) + + n := model.NewCodeNode(endpointID, model.NodeEndpoint, httpMethod+" "+path) + n.FQN = httpMethod + " " + path + n.FilePath = filePath + n.LineStart = line + n.Source = "CSharpMinimalApisDetector" + n.Properties["http_method"] = httpMethod + n.Properties["path"] = path + n.Properties["framework"] = propDotnetMinimalApi + nodes = append(nodes, n) + + if appModuleID != "" { + e := model.NewCodeEdge( + appModuleID+":exposes:"+endpointID, + model.EdgeExposes, appModuleID, endpointID, + ) + e.Source = "CSharpMinimalApisDetector" + edges = append(edges, e) + } + } + + // Guards from .UseAuthentication/Authorization + for _, m := range minApisUseAuthRE.FindAllStringSubmatchIndex(text, -1) { + authType := text[m[2]:m[3]] + line := base.FindLineNumber(text, m[0]) + n := model.NewCodeNode( + "dotnet:"+filePath+":guard:Use"+authType+":"+strconv.Itoa(line), + model.NodeGuard, "Use"+authType, + ) + n.FQN = "Use" + authType + n.FilePath = filePath + n.LineStart = line + n.Source = "CSharpMinimalApisDetector" + n.Properties["guard_type"] = strings.ToLower(authType) + n.Properties["framework"] = propDotnetMinimalApi + nodes = append(nodes, n) + } + + // Guards from .AddAuthentication/Authorization + for _, m := range minApisAddAuthRE.FindAllStringSubmatchIndex(text, -1) { + authType := text[m[2]:m[3]] + line := base.FindLineNumber(text, m[0]) + n := model.NewCodeNode( + "dotnet:"+filePath+":guard:Add"+authType+":"+strconv.Itoa(line), + model.NodeGuard, "Add"+authType, + ) + n.FQN = "Add" + authType + n.FilePath = filePath + n.LineStart = line + n.Source = "CSharpMinimalApisDetector" + n.Properties["guard_type"] = strings.ToLower(authType) + n.Properties["framework"] = propDotnetMinimalApi + nodes = append(nodes, n) + } + + return detector.ResultOf(nodes, edges) +} diff --git a/go/internal/detector/csharp/minimal_apis_test.go b/go/internal/detector/csharp/minimal_apis_test.go new file mode 100644 index 00000000..059e81fe --- /dev/null +++ b/go/internal/detector/csharp/minimal_apis_test.go @@ -0,0 +1,82 @@ +package csharp + +import ( + "testing" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +const minimalApisSource = `var builder = WebApplication.CreateBuilder(args); +builder.Services.AddAuthentication(); +builder.Services.AddAuthorization(); +var app = builder.Build(); +app.UseAuthentication(); +app.UseAuthorization(); +app.MapGet("/health", () => "ok"); +app.MapPost("/users", CreateUser); +app.MapDelete("/users/{id}", DeleteUser); +app.Run(); +` + +func TestCSharpMinimalApisPositive(t *testing.T) { + d := NewMinimalApisDetector() + r := d.Detect(&detector.Context{FilePath: "Program.cs", Language: "csharp", Content: minimalApisSource}) + + kinds := map[model.NodeKind]int{} + for _, n := range r.Nodes { + kinds[n.Kind]++ + } + if kinds[model.NodeModule] != 1 { + t.Errorf("expected 1 MODULE (web application), got %d", kinds[model.NodeModule]) + } + if kinds[model.NodeEndpoint] != 3 { + t.Errorf("expected 3 ENDPOINTs, got %d", kinds[model.NodeEndpoint]) + } + // 2 UseAuth + 2 AddAuth = 4 guards + if kinds[model.NodeGuard] != 4 { + t.Errorf("expected 4 GUARDs, got %d", kinds[model.NodeGuard]) + } + + // EXPOSES edges: 3 endpoints from one app + exposeEdges := 0 + for _, e := range r.Edges { + if e.Kind == model.EdgeExposes { + exposeEdges++ + } + } + if exposeEdges != 3 { + t.Errorf("expected 3 EXPOSES edges, got %d", exposeEdges) + } +} + +func TestCSharpMinimalApisHttpMethodUppercase(t *testing.T) { + d := NewMinimalApisDetector() + r := d.Detect(&detector.Context{FilePath: "Program.cs", Language: "csharp", Content: minimalApisSource}) + for _, n := range r.Nodes { + if n.Kind == model.NodeEndpoint { + method := n.Properties["http_method"].(string) + if method != "GET" && method != "POST" && method != "DELETE" { + t.Errorf("unexpected http_method %q", method) + } + } + } +} + +func TestCSharpMinimalApisNegative(t *testing.T) { + d := NewMinimalApisDetector() + r := d.Detect(&detector.Context{FilePath: "x.cs", Language: "csharp", Content: "var x = 1;"}) + if len(r.Nodes) != 0 { + t.Fatal("expected 0 nodes") + } +} + +func TestCSharpMinimalApisDeterminism(t *testing.T) { + d := NewMinimalApisDetector() + ctx := &detector.Context{FilePath: "Program.cs", Language: "csharp", Content: minimalApisSource} + r1 := d.Detect(ctx) + r2 := d.Detect(ctx) + if len(r1.Nodes) != len(r2.Nodes) || len(r1.Edges) != len(r2.Edges) { + t.Fatal("non-deterministic counts") + } +} diff --git a/go/internal/detector/csharp/structures.go b/go/internal/detector/csharp/structures.go new file mode 100644 index 00000000..810f6515 --- /dev/null +++ b/go/internal/detector/csharp/structures.go @@ -0,0 +1,270 @@ +package csharp + +import ( + "regexp" + "strconv" + "strings" + "unicode" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/detector/base" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// StructuresDetector detects C# namespaces, classes, interfaces, enums, using +// imports, and MVC controller endpoints (Route + HttpGet/Post/...). Mirrors +// Java CSharpStructuresDetector. +type StructuresDetector struct{} + +func NewStructuresDetector() *StructuresDetector { return &StructuresDetector{} } + +func (StructuresDetector) Name() string { return "csharp_structures" } +func (StructuresDetector) SupportedLanguages() []string { return []string{"csharp"} } +func (StructuresDetector) DefaultConfidence() model.Confidence { return base.RegexDetectorDefaultConfidence } + +func init() { detector.RegisterDefault(NewStructuresDetector()) } + +var ( + csharpClassRE = regexp.MustCompile(`(?:public|internal|private|protected)?\s*(?:abstract|static|sealed|partial)?\s*class\s+(\w+)(?:\s*<[^>]+>)?(?:\s*:\s*([^{]+))?`) + csharpInterfaceRE = regexp.MustCompile(`(?:public|internal)?\s*interface\s+(\w+)(?:\s*<[^>]+>)?(?:\s*:\s*([^{]+))?`) + csharpEnumRE = regexp.MustCompile(`(?:public|internal)?\s*enum\s+(\w+)`) + csharpNamespaceRE = regexp.MustCompile(`namespace\s+([\w.]+)`) + csharpUsingRE = regexp.MustCompile(`(?m)^\s*using\s+([\w.]+)\s*;`) + csharpHttpAttrRE = regexp.MustCompile(`\[(Http(?:Get|Post|Put|Delete|Patch))\s*(?:\("([^"]*)"\))?\]`) + csharpRouteRE = regexp.MustCompile(`\[Route\("([^"]*)"\)\]`) + csharpMethodRE = regexp.MustCompile(`(?:public|protected|private|internal)\s+(?:static\s+|virtual\s+|override\s+|async\s+|abstract\s+)*(?:[\w<>\[\]?,\s]+)\s+(\w+)\s*\(`) + csharpGenericRE = regexp.MustCompile(`<[^>]*>`) + csharpSlashTrimRE = regexp.MustCompile(`^/+|/+$`) + csharpLeadSlashRE = regexp.MustCompile(`^/+`) +) + +var csharpSkipMethodNames = map[string]bool{ + "if": true, "for": true, "while": true, "switch": true, + "catch": true, "using": true, "return": true, "new": true, "class": true, +} + +func (d StructuresDetector) Detect(ctx *detector.Context) *detector.Result { + text := ctx.Content + if text == "" { + return detector.EmptyResult() + } + var nodes []*model.CodeNode + var edges []*model.CodeEdge + filePath := ctx.FilePath + lines := strings.Split(text, "\n") + + // Namespace + var namespace string + if m := csharpNamespaceRE.FindStringSubmatchIndex(text); len(m) >= 4 { + namespace = text[m[2]:m[3]] + n := model.NewCodeNode(filePath+":namespace:"+namespace, model.NodeModule, namespace) + n.FQN = namespace + n.FilePath = filePath + n.LineStart = base.FindLineNumber(text, m[0]) + n.Source = "CSharpStructuresDetector" + nodes = append(nodes, n) + } + + // Using statements + for _, m := range csharpUsingRE.FindAllStringSubmatchIndex(text, -1) { + imp := text[m[2]:m[3]] + e := model.NewCodeEdge(filePath+":imports:"+imp, model.EdgeImports, filePath, imp) + e.Source = "CSharpStructuresDetector" + edges = append(edges, e) + } + + // Classes — also track the class route for endpoint detection + var classRoute string + for _, m := range csharpClassRE.FindAllStringSubmatchIndex(text, -1) { + className := text[m[2]:m[3]] + var baseStr string + if m[4] >= 0 { + baseStr = text[m[4]:m[5]] + } + lineNum := base.FindLineNumber(text, m[0]) + // Examine a window around the class match to spot "abstract" + start := m[0] - 60 + if start < 0 { + start = 0 + } + matchText := text[start:m[1]] + isAbstract := strings.Contains(matchText, "abstract") + kind := model.NodeClass + if isAbstract { + kind = model.NodeAbstractClass + } + fqn := className + if namespace != "" { + fqn = namespace + "." + className + } + nodeID := filePath + ":" + className + + n := model.NewCodeNode(nodeID, kind, className) + n.FQN = fqn + n.FilePath = filePath + n.LineStart = lineNum + n.Source = "CSharpStructuresDetector" + if isAbstract { + n.Properties["is_abstract"] = true + } + + baseClass, ifaceList := parseCSharpBaseTypes(baseStr) + if baseClass != "" { + n.Properties["base_class"] = baseClass + e := model.NewCodeEdge( + nodeID+":extends:"+baseClass, model.EdgeExtends, nodeID, "*:"+baseClass, + ) + e.Source = "CSharpStructuresDetector" + edges = append(edges, e) + } + if len(ifaceList) > 0 { + n.Properties["interfaces"] = ifaceList + for _, iface := range ifaceList { + e := model.NewCodeEdge( + nodeID+":implements:"+iface, model.EdgeImplements, nodeID, "*:"+iface, + ) + e.Source = "CSharpStructuresDetector" + edges = append(edges, e) + } + } + nodes = append(nodes, n) + + // Check 5 lines above class for [Route(...)] + classLineIdx := lineNum - 1 + startLine := classLineIdx - 5 + if startLine < 0 { + startLine = 0 + } + for j := startLine; j < classLineIdx && j < len(lines); j++ { + if rm := csharpRouteRE.FindStringSubmatch(lines[j]); len(rm) >= 2 { + route := rm[1] + ctrl := strings.TrimSuffix(className, "Controller") + classRoute = strings.ReplaceAll(route, "[controller]", ctrl) + break + } + } + } + + // Interfaces + for _, m := range csharpInterfaceRE.FindAllStringSubmatchIndex(text, -1) { + name := text[m[2]:m[3]] + fqn := name + if namespace != "" { + fqn = namespace + "." + name + } + n := model.NewCodeNode(filePath+":"+name, model.NodeInterface, name) + n.FQN = fqn + n.FilePath = filePath + n.LineStart = base.FindLineNumber(text, m[0]) + n.Source = "CSharpStructuresDetector" + nodes = append(nodes, n) + } + + // Enums + for _, m := range csharpEnumRE.FindAllStringSubmatchIndex(text, -1) { + name := text[m[2]:m[3]] + fqn := name + if namespace != "" { + fqn = namespace + "." + name + } + n := model.NewCodeNode(filePath+":"+name, model.NodeEnum, name) + n.FQN = fqn + n.FilePath = filePath + n.LineStart = base.FindLineNumber(text, m[0]) + n.Source = "CSharpStructuresDetector" + nodes = append(nodes, n) + } + + // HTTP endpoints (scan line-by-line, looking 5 lines up for HttpXxx attrs) + for i, line := range lines { + mm := csharpMethodRE.FindStringSubmatch(line) + if len(mm) < 2 { + continue + } + methodName := mm[1] + if csharpSkipMethodNames[methodName] { + continue + } + var httpMethodStr, httpPath string + startLine := i - 5 + if startLine < 0 { + startLine = 0 + } + for j := startLine; j < i; j++ { + if hm := csharpHttpAttrRE.FindStringSubmatch(lines[j]); len(hm) >= 2 { + httpMethodStr = strings.ToUpper(strings.TrimPrefix(hm[1], "Http")) + if len(hm) >= 3 { + httpPath = hm[2] + } + break + } + } + if httpMethodStr == "" { + continue + } + + fullPath := composePath(classRoute, httpPath) + moduleName := ctx.ModuleName + fqn := methodName + if namespace != "" { + fqn = namespace + "." + methodName + } + n := model.NewCodeNode( + "endpoint:"+moduleName+":"+methodName+":"+httpMethodStr+":"+fullPath, + model.NodeEndpoint, httpMethodStr+" "+fullPath, + ) + n.FQN = fqn + n.FilePath = filePath + n.LineStart = i + 1 + n.Source = "CSharpStructuresDetector" + n.Properties["http_method"] = httpMethodStr + n.Properties["path"] = fullPath + nodes = append(nodes, n) + } + + _ = strconv.Itoa // (in case future ID building needs it) + return detector.ResultOf(nodes, edges) +} + +// composePath joins a class route with a method-level path. Matches the Java +// side's trim/normalize behaviour. +func composePath(classRoute, path string) string { + if classRoute != "" { + trimmed := csharpSlashTrimRE.ReplaceAllString(classRoute, "") + full := "/" + trimmed + if path != "" { + full = full + "/" + csharpLeadSlashRE.ReplaceAllString(path, "") + } + return full + } + if path != "" { + return "/" + csharpLeadSlashRE.ReplaceAllString(path, "") + } + return "/" +} + +// parseCSharpBaseTypes splits the comma-separated base-type list into a single +// base class (non-interface) and a list of interfaces. Interfaces are +// identified by the convention "IXxx" — second char is uppercase, first is 'I'. +func parseCSharpBaseTypes(baseStr string) (string, []string) { + if strings.TrimSpace(baseStr) == "" { + return "", nil + } + parts := strings.Split(baseStr, ",") + var baseClass string + var interfaces []string + for _, p := range parts { + clean := strings.TrimSpace(csharpGenericRE.ReplaceAllString(p, "")) + if clean == "" { + continue + } + if len(clean) >= 2 && clean[0] == 'I' && unicode.IsUpper(rune(clean[1])) { + interfaces = append(interfaces, clean) + } else if baseClass == "" { + baseClass = clean + } else { + interfaces = append(interfaces, clean) + } + } + return baseClass, interfaces +} diff --git a/go/internal/detector/csharp/structures_test.go b/go/internal/detector/csharp/structures_test.go new file mode 100644 index 00000000..e9651d76 --- /dev/null +++ b/go/internal/detector/csharp/structures_test.go @@ -0,0 +1,143 @@ +package csharp + +import ( + "testing" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +const csharpStructSource = `using System; +using Microsoft.AspNetCore.Mvc; + +namespace MyApp.Api; + +public abstract class BaseEntity { +} + +public class User : BaseEntity, IComparable, IEquatable { +} + +public interface IUserRepository { +} + +public enum UserRole { + Admin, + User +} + +[Route("api/[controller]")] +public class UsersController : ControllerBase { + [HttpGet] + public IActionResult List() => Ok(); + + [HttpPost("create")] + public IActionResult Create() => Ok(); +} +` + +func TestCSharpStructuresPositive(t *testing.T) { + d := NewStructuresDetector() + r := d.Detect(&detector.Context{ + FilePath: "Api.cs", + Language: "csharp", + Content: csharpStructSource, + ModuleName: "MyApp.Api", + }) + + kinds := map[model.NodeKind]int{} + for _, n := range r.Nodes { + kinds[n.Kind]++ + } + if kinds[model.NodeModule] != 1 { + t.Errorf("expected 1 MODULE (namespace), got %d", kinds[model.NodeModule]) + } + // Note: Java CSharpStructuresDetector uses a 60-char window before the + // class match to detect "abstract". A class declared shortly after an + // abstract class will pick up the previous class's modifier — known + // Java parity behaviour. Total abstract+regular class count == 3 here + // (BaseEntity + User + UsersController). + totalClass := kinds[model.NodeAbstractClass] + kinds[model.NodeClass] + if totalClass != 3 { + t.Errorf("expected 3 class-like nodes total, got %d", totalClass) + } + if kinds[model.NodeAbstractClass] < 1 { + t.Errorf("expected >=1 ABSTRACT_CLASS, got %d", kinds[model.NodeAbstractClass]) + } + if kinds[model.NodeInterface] != 1 { + t.Errorf("expected 1 INTERFACE, got %d", kinds[model.NodeInterface]) + } + if kinds[model.NodeEnum] != 1 { + t.Errorf("expected 1 ENUM, got %d", kinds[model.NodeEnum]) + } + if kinds[model.NodeEndpoint] != 2 { + t.Errorf("expected 2 ENDPOINTs, got %d", kinds[model.NodeEndpoint]) + } + + // Edges: 2 using imports + 1 EXTENDS (User->BaseEntity) + 2 IMPLEMENTS (IComparable, IEquatable) + importEdges := 0 + extendsEdges := 0 + implementsEdges := 0 + for _, e := range r.Edges { + switch e.Kind { + case model.EdgeImports: + importEdges++ + case model.EdgeExtends: + extendsEdges++ + case model.EdgeImplements: + implementsEdges++ + } + } + if importEdges != 2 { + t.Errorf("expected 2 import edges, got %d", importEdges) + } + // UsersController -> ControllerBase (extends) + User -> BaseEntity = 2 EXTENDS + if extendsEdges < 1 { + t.Errorf("expected EXTENDS edges, got %d", extendsEdges) + } + if implementsEdges < 2 { + t.Errorf("expected >=2 IMPLEMENTS edges, got %d", implementsEdges) + } +} + +func TestCSharpStructuresControllerRoute(t *testing.T) { + // Note: mirrors Java CSharpStructuresDetector's forward scan for the + // HttpXxx attribute (j = i-5 → i, first-match-wins). When two methods + // share a 5-line window, both pick up the earlier method's attribute. + // This is a known Java parity bug; keep test loose so we don't regress + // when the Java side is fixed and we follow. + d := NewStructuresDetector() + r := d.Detect(&detector.Context{ + FilePath: "Api.cs", + Language: "csharp", + Content: csharpStructSource, + ModuleName: "MyApp.Api", + }) + pathsFound := map[string]bool{} + for _, n := range r.Nodes { + if n.Kind == model.NodeEndpoint { + pathsFound[n.Properties["path"].(string)] = true + } + } + if !pathsFound["/api/Users"] { + t.Errorf("expected /api/Users as the controller-route prefix path; got %v", pathsFound) + } +} + +func TestCSharpStructuresNegative(t *testing.T) { + d := NewStructuresDetector() + r := d.Detect(&detector.Context{FilePath: "x.cs", Language: "csharp", Content: ""}) + if len(r.Nodes) != 0 { + t.Fatal("expected 0 nodes on empty input") + } +} + +func TestCSharpStructuresDeterminism(t *testing.T) { + d := NewStructuresDetector() + ctx := &detector.Context{FilePath: "Api.cs", Language: "csharp", Content: csharpStructSource} + r1 := d.Detect(ctx) + r2 := d.Detect(ctx) + if len(r1.Nodes) != len(r2.Nodes) || len(r1.Edges) != len(r2.Edges) { + t.Fatal("non-deterministic counts") + } +} diff --git a/go/internal/detector/detector.go b/go/internal/detector/detector.go new file mode 100644 index 00000000..3ac5792a --- /dev/null +++ b/go/internal/detector/detector.go @@ -0,0 +1,52 @@ +package detector + +import ( + "github.com/randomcodespace/codeiq/go/internal/model" + "github.com/randomcodespace/codeiq/go/internal/parser" +) + +// Detector is the contract every detector implements. Mirrors Java +// io.github.randomcodespace.iq.detector.Detector. +// +// Detectors must be stateless — phase 1 invokes each detector from goroutines +// concurrently. Use method-local state only. +type Detector interface { + Name() string + SupportedLanguages() []string + // DefaultConfidence is the floor stamped onto every emission that does not + // explicitly set Confidence — equivalent to Java's defaultConfidence(). + DefaultConfidence() model.Confidence + Detect(ctx *Context) *Result +} + +// Context is the per-file payload threaded through every Detect call. +// Mirrors Java DetectorContext. +type Context struct { + FilePath string + Language string + Content string + Tree *parser.Tree // nil for languages without a tree-sitter grammar + ModuleName string + // ParsedData is the pre-parsed structured payload for YAML/JSON/TOML/INI/ + // properties files. Wrapped in the same envelope shape used by the Java + // side: a map with keys "type" (e.g. "yaml", "yaml_multi", "json", "toml", + // "ini", "properties") and "data" / "documents". nil for files that don't + // participate in structured parsing. + ParsedData map[string]any +} + +// Result is what a single Detect call returns. Mirrors Java DetectorResult. +type Result struct { + Nodes []*model.CodeNode + Edges []*model.CodeEdge +} + +// EmptyResult returns an empty Result. Sentinel for "nothing matched". +func EmptyResult() *Result { + return &Result{Nodes: nil, Edges: nil} +} + +// ResultOf returns a Result with the given slices. +func ResultOf(nodes []*model.CodeNode, edges []*model.CodeEdge) *Result { + return &Result{Nodes: nodes, Edges: edges} +} diff --git a/go/internal/detector/detector_test.go b/go/internal/detector/detector_test.go new file mode 100644 index 00000000..e4207909 --- /dev/null +++ b/go/internal/detector/detector_test.go @@ -0,0 +1,36 @@ +package detector + +import ( + "testing" + + "github.com/randomcodespace/codeiq/go/internal/model" +) + +func TestEmptyResult(t *testing.T) { + r := EmptyResult() + if len(r.Nodes) != 0 || len(r.Edges) != 0 { + t.Fatalf("EmptyResult should be empty: %+v", r) + } +} + +func TestResultOf(t *testing.T) { + n := model.NewCodeNode("a", model.NodeClass, "A") + e := model.NewCodeEdge("a->b", model.EdgeCalls, "a", "b") + r := ResultOf([]*model.CodeNode{n}, []*model.CodeEdge{e}) + if len(r.Nodes) != 1 || len(r.Edges) != 1 { + t.Fatalf("ResultOf mismatch: %+v", r) + } +} + +// A trivial test implementation that satisfies the Detector interface, +// ensuring the interface signature compiles. +type stubDetector struct{} + +func (stubDetector) Name() string { return "stub" } +func (stubDetector) SupportedLanguages() []string { return []string{"java"} } +func (stubDetector) DefaultConfidence() model.Confidence { return model.ConfidenceLexical } +func (stubDetector) Detect(ctx *Context) *Result { return EmptyResult() } + +func TestDetectorInterfaceCompiles(t *testing.T) { + var _ Detector = stubDetector{} +} diff --git a/go/internal/detector/frontend/angular_component.go b/go/internal/detector/frontend/angular_component.go new file mode 100644 index 00000000..ae59328e --- /dev/null +++ b/go/internal/detector/frontend/angular_component.go @@ -0,0 +1,108 @@ +package frontend + +import ( + "regexp" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/detector/base" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// AngularComponentDetector mirrors Java AngularComponentDetector. Detects +// @Component / @Injectable / @Directive / @Pipe / @NgModule decorators in +// TypeScript and emits COMPONENT or MIDDLEWARE nodes accordingly. +type AngularComponentDetector struct{} + +func NewAngularComponentDetector() *AngularComponentDetector { return &AngularComponentDetector{} } + +func (AngularComponentDetector) Name() string { return "frontend.angular_components" } +func (AngularComponentDetector) SupportedLanguages() []string { return []string{"typescript"} } +func (AngularComponentDetector) DefaultConfidence() model.Confidence { return base.RegexDetectorDefaultConfidence } + +func init() { detector.RegisterDefault(NewAngularComponentDetector()) } + +const propAngular = "angular" + +var ( + // RE2 doesn't support DOTALL by default; use (?s) prefix. + angularComponentDecorator = regexp.MustCompile(`(?s)@Component\s*\(\s*\{.*?selector\s*:\s*['"]([^'"]+)['"].*?\}\s*\)\s*\n?\s*(?:export\s+)?class\s+(\w+)`) + angularInjectableDecorator = regexp.MustCompile(`(?s)@Injectable\s*\(\s*\{.*?providedIn\s*:\s*['"]([\w]+)['"].*?\}\s*\)\s*\n?\s*(?:export\s+)?class\s+(\w+)`) + angularDirectiveDecorator = regexp.MustCompile(`(?s)@Directive\s*\(\s*\{.*?selector\s*:\s*['"]([^'"]+)['"].*?\}\s*\)\s*\n?\s*(?:export\s+)?class\s+(\w+)`) + angularPipeDecorator = regexp.MustCompile(`(?s)@Pipe\s*\(\s*\{.*?name\s*:\s*['"]([\w]+)['"].*?\}\s*\)\s*\n?\s*(?:export\s+)?class\s+(\w+)`) + angularNgModuleDecorator = regexp.MustCompile(`(?s)@NgModule\s*\(\s*\{.*?\}\s*\)\s*\n?\s*(?:export\s+)?class\s+(\w+)`) +) + +func (d AngularComponentDetector) Detect(ctx *detector.Context) *detector.Result { + text := ctx.Content + if text == "" { + return detector.EmptyResult() + } + nodes := []*model.CodeNode{} + fp := ctx.FilePath + seen := map[string]bool{} + + for _, m := range angularComponentDecorator.FindAllStringSubmatchIndex(text, -1) { + selector := text[m[2]:m[3]] + className := text[m[4]:m[5]] + if seen[className] { + continue + } + seen[className] = true + n := base.CreateComponentNode(propAngular, fp, "component", className, model.NodeComponent, base.LineAt(text, m[0])) + n.Confidence = base.RegexDetectorDefaultConfidence + n.Properties["selector"] = selector + n.Properties["decorator"] = "Component" + nodes = append(nodes, n) + } + for _, m := range angularInjectableDecorator.FindAllStringSubmatchIndex(text, -1) { + providedIn := text[m[2]:m[3]] + className := text[m[4]:m[5]] + if seen[className] { + continue + } + seen[className] = true + n := base.CreateComponentNode(propAngular, fp, "service", className, model.NodeMiddleware, base.LineAt(text, m[0])) + n.Confidence = base.RegexDetectorDefaultConfidence + n.Properties["provided_in"] = providedIn + n.Properties["decorator"] = "Injectable" + nodes = append(nodes, n) + } + for _, m := range angularDirectiveDecorator.FindAllStringSubmatchIndex(text, -1) { + selector := text[m[2]:m[3]] + className := text[m[4]:m[5]] + if seen[className] { + continue + } + seen[className] = true + n := base.CreateComponentNode(propAngular, fp, "component", className, model.NodeComponent, base.LineAt(text, m[0])) + n.Confidence = base.RegexDetectorDefaultConfidence + n.Properties["selector"] = selector + n.Properties["decorator"] = "Directive" + nodes = append(nodes, n) + } + for _, m := range angularPipeDecorator.FindAllStringSubmatchIndex(text, -1) { + pipeName := text[m[2]:m[3]] + className := text[m[4]:m[5]] + if seen[className] { + continue + } + seen[className] = true + n := base.CreateComponentNode(propAngular, fp, "component", className, model.NodeComponent, base.LineAt(text, m[0])) + n.Confidence = base.RegexDetectorDefaultConfidence + n.Properties["pipe_name"] = pipeName + n.Properties["decorator"] = "Pipe" + nodes = append(nodes, n) + } + for _, m := range angularNgModuleDecorator.FindAllStringSubmatchIndex(text, -1) { + className := text[m[2]:m[3]] + if seen[className] { + continue + } + seen[className] = true + n := base.CreateComponentNode(propAngular, fp, "component", className, model.NodeComponent, base.LineAt(text, m[0])) + n.Confidence = base.RegexDetectorDefaultConfidence + n.Properties["decorator"] = "NgModule" + nodes = append(nodes, n) + } + return detector.ResultOf(nodes, nil) +} diff --git a/go/internal/detector/frontend/angular_component_test.go b/go/internal/detector/frontend/angular_component_test.go new file mode 100644 index 00000000..de2521ed --- /dev/null +++ b/go/internal/detector/frontend/angular_component_test.go @@ -0,0 +1,38 @@ +package frontend + +import ( + "testing" + + "github.com/randomcodespace/codeiq/go/internal/detector" +) + +func TestAngularComponent_Positive(t *testing.T) { + code := "@Component({\n selector: 'app-root'\n})\nexport class AppComponent {}" + d := NewAngularComponentDetector() + r := d.Detect(&detector.Context{FilePath: "app.component.ts", Language: "typescript", Content: code}) + if len(r.Nodes) != 1 { + t.Fatalf("expected 1 node, got %d", len(r.Nodes)) + } + if r.Nodes[0].Properties["framework"] != "angular" { + t.Errorf("framework = %v", r.Nodes[0].Properties["framework"]) + } +} + +func TestAngularComponent_NoMatch(t *testing.T) { + d := NewAngularComponentDetector() + r := d.Detect(&detector.Context{FilePath: "x.ts", Language: "typescript", Content: "class Foo {}"}) + if len(r.Nodes) != 0 { + t.Fatalf("expected 0 nodes, got %d", len(r.Nodes)) + } +} + +func TestAngularComponent_Deterministic(t *testing.T) { + code := "@Component({\n selector: 'app-root'\n})\nclass AppComponent {}" + d := NewAngularComponentDetector() + c := &detector.Context{FilePath: "x.ts", Language: "typescript", Content: code} + r1 := d.Detect(c) + r2 := d.Detect(c) + if len(r1.Nodes) != len(r2.Nodes) { + t.Fatal("non-deterministic") + } +} diff --git a/go/internal/detector/frontend/frontend_route.go b/go/internal/detector/frontend/frontend_route.go new file mode 100644 index 00000000..2ccdc793 --- /dev/null +++ b/go/internal/detector/frontend/frontend_route.go @@ -0,0 +1,158 @@ +package frontend + +import ( + "regexp" + "strings" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/detector/base" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// FrontendRouteDetector mirrors Java FrontendRouteDetector. Detects React +// Router elements, Vue Router routes arrays, Angular Router config, +// and Next.js file-based routes (pages/ and app/). +type FrontendRouteDetector struct{} + +func NewFrontendRouteDetector() *FrontendRouteDetector { return &FrontendRouteDetector{} } + +func (FrontendRouteDetector) Name() string { return "frontend.frontend_routes" } +func (FrontendRouteDetector) SupportedLanguages() []string { + return []string{"typescript", "javascript", "vue", "svelte"} +} +func (FrontendRouteDetector) DefaultConfidence() model.Confidence { + return base.RegexDetectorDefaultConfidence +} + +func init() { detector.RegisterDefault(NewFrontendRouteDetector()) } + +var ( + frReactRouteComp = regexp.MustCompile(`]*?path\s*=\s*["']([^"']+)["'][^>]*?component\s*=\s*\{(\w+)\}`) + frReactRouteElement = regexp.MustCompile(`]*?path\s*=\s*["']([^"']+)["'][^>]*?element\s*=\s*\{<(\w+)`) + frReactRouteBare = regexp.MustCompile(`]*?path\s*=\s*["']([^"']+)["']`) + frVueRoute = regexp.MustCompile(`\{\s*path\s*:\s*['"]([^'"]+)['"](?:.*?component\s*:\s*(\w+))?`) + frVueCreateRouter = regexp.MustCompile(`createRouter\s*\(`) + frVueRoutesArray = regexp.MustCompile(`\broutes\s*:\s*\[`) + frAngularRouterModul = regexp.MustCompile(`RouterModule\.for(?:Root|Child)\s*\(`) + frNextjsPages = regexp.MustCompile(`^pages/(.+)\.(tsx|ts|jsx|js)$`) + frNextjsApp = regexp.MustCompile(`^app/(.+)/page\.(tsx|ts|jsx|js)$`) +) + +func (FrontendRouteDetector) Detect(ctx *detector.Context) *detector.Result { + text := ctx.Content + if text == "" && ctx.FilePath == "" { + return detector.EmptyResult() + } + var nodes []*model.CodeNode + var edges []*model.CodeEdge + + frDetectNextjs(ctx, &nodes) + frDetectReact(ctx, text, &nodes, &edges) + frDetectVue(ctx, text, &nodes, &edges) + frDetectAngular(ctx, text, &nodes, &edges) + + return detector.ResultOf(nodes, edges) +} + +func frDetectNextjs(ctx *detector.Context, nodes *[]*model.CodeNode) { + fp := ctx.FilePath + if m := frNextjsPages.FindStringSubmatch(fp); m != nil { + routePath := frNextjsPagesPath(m[1]) + *nodes = append(*nodes, frRouteNode("route:"+fp+":nextjs:"+routePath, routePath, "nextjs", ctx, 1)) + return + } + if m := frNextjsApp.FindStringSubmatch(fp); m != nil { + raw := strings.ReplaceAll(m[1], "\\", "/") + routePath := "/" + raw + *nodes = append(*nodes, frRouteNode("route:"+fp+":nextjs:"+routePath, routePath, "nextjs", ctx, 1)) + } +} + +func frNextjsPagesPath(raw string) string { + parts := strings.Split(strings.ReplaceAll(raw, "\\", "/"), "/") + if len(parts) > 0 && parts[len(parts)-1] == "index" { + parts = parts[:len(parts)-1] + } + if len(parts) == 0 { + return "/" + } + return "/" + strings.Join(parts, "/") +} + +func frDetectReact(ctx *detector.Context, text string, nodes *[]*model.CodeNode, edges *[]*model.CodeEdge) { + seen := map[string]bool{} + for _, re := range []*regexp.Regexp{frReactRouteComp, frReactRouteElement} { + for _, m := range re.FindAllStringSubmatchIndex(text, -1) { + path := text[m[2]:m[3]] + comp := text[m[4]:m[5]] + if seen[path] { + continue + } + seen[path] = true + line := base.LineAt(text, m[0]) + id := "route:" + ctx.FilePath + ":react:" + path + *nodes = append(*nodes, frRouteNode(id, path, "react", ctx, line)) + *edges = append(*edges, model.NewCodeEdge(id+":renders:"+comp, model.EdgeRenders, id, comp)) + } + } + for _, m := range frReactRouteBare.FindAllStringSubmatchIndex(text, -1) { + path := text[m[2]:m[3]] + if seen[path] { + continue + } + seen[path] = true + line := base.LineAt(text, m[0]) + *nodes = append(*nodes, frRouteNode("route:"+ctx.FilePath+":react:"+path, path, "react", ctx, line)) + } +} + +func frDetectVue(ctx *detector.Context, text string, nodes *[]*model.CodeNode, edges *[]*model.CodeEdge) { + if frVueCreateRouter.FindStringIndex(text) == nil && frVueRoutesArray.FindStringIndex(text) == nil { + return + } + for _, m := range frVueRoute.FindAllStringSubmatchIndex(text, -1) { + path := text[m[2]:m[3]] + var comp string + if m[4] >= 0 { + comp = text[m[4]:m[5]] + } + line := base.LineAt(text, m[0]) + id := "route:" + ctx.FilePath + ":vue:" + path + *nodes = append(*nodes, frRouteNode(id, path, "vue", ctx, line)) + if comp != "" { + *edges = append(*edges, model.NewCodeEdge(id+":renders:"+comp, model.EdgeRenders, id, comp)) + } + } +} + +func frDetectAngular(ctx *detector.Context, text string, nodes *[]*model.CodeNode, edges *[]*model.CodeEdge) { + if frAngularRouterModul.FindStringIndex(text) == nil { + return + } + for _, m := range frVueRoute.FindAllStringSubmatchIndex(text, -1) { + path := text[m[2]:m[3]] + var comp string + if m[4] >= 0 { + comp = text[m[4]:m[5]] + } + line := base.LineAt(text, m[0]) + id := "route:" + ctx.FilePath + ":angular:" + path + *nodes = append(*nodes, frRouteNode(id, path, "angular", ctx, line)) + if comp != "" { + *edges = append(*edges, model.NewCodeEdge(id+":renders:"+comp, model.EdgeRenders, id, comp)) + } + } +} + +func frRouteNode(id, path, framework string, ctx *detector.Context, line int) *model.CodeNode { + n := model.NewCodeNode(id, model.NodeEndpoint, "route "+path) + n.FQN = ctx.FilePath + "::route:" + path + n.FilePath = ctx.FilePath + n.LineStart = line + n.Source = "FrontendRouteDetector" + n.Confidence = base.RegexDetectorDefaultConfidence + n.Properties["protocol"] = "frontend_route" + n.Properties["framework"] = framework + n.Properties["route_path"] = path + return n +} diff --git a/go/internal/detector/frontend/frontend_route_test.go b/go/internal/detector/frontend/frontend_route_test.go new file mode 100644 index 00000000..c14846c6 --- /dev/null +++ b/go/internal/detector/frontend/frontend_route_test.go @@ -0,0 +1,82 @@ +package frontend + +import ( + "testing" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +func TestFrontendRoute_React(t *testing.T) { + d := NewFrontendRouteDetector() + src := ` + + }/> + +` + r := d.Detect(&detector.Context{FilePath: "src/App.tsx", Language: "typescript", Content: src}) + if len(r.Nodes) != 3 { + t.Fatalf("expected 3 routes, got %d", len(r.Nodes)) + } + for _, n := range r.Nodes { + if n.Properties["framework"] != "react" { + t.Errorf("framework = %v want react", n.Properties["framework"]) + } + if n.Kind != model.NodeEndpoint { + t.Errorf("kind = %v", n.Kind) + } + } + if len(r.Edges) != 2 { + t.Fatalf("expected 2 renders edges, got %d", len(r.Edges)) + } +} + +func TestFrontendRoute_NextjsPages(t *testing.T) { + d := NewFrontendRouteDetector() + r := d.Detect(&detector.Context{FilePath: "pages/about.tsx", Language: "typescript", Content: "export default function About(){return null}"}) + if len(r.Nodes) != 1 { + t.Fatalf("expected 1 node, got %d", len(r.Nodes)) + } + if r.Nodes[0].Properties["framework"] != "nextjs" { + t.Errorf("framework = %v", r.Nodes[0].Properties["framework"]) + } + if r.Nodes[0].Properties["route_path"] != "/about" { + t.Errorf("route_path = %v", r.Nodes[0].Properties["route_path"]) + } +} + +func TestFrontendRoute_NextjsApp(t *testing.T) { + d := NewFrontendRouteDetector() + r := d.Detect(&detector.Context{FilePath: "app/blog/[slug]/page.tsx", Language: "typescript", Content: "export default function P(){return null}"}) + if len(r.Nodes) != 1 { + t.Fatalf("expected 1 node, got %d", len(r.Nodes)) + } + if r.Nodes[0].Properties["route_path"] != "/blog/[slug]" { + t.Errorf("route_path = %v", r.Nodes[0].Properties["route_path"]) + } +} + +func TestFrontendRoute_Vue(t *testing.T) { + d := NewFrontendRouteDetector() + src := `const router = createRouter({ + routes: [ + { path: '/home', component: Home }, + { path: '/about', component: About }, + ] +})` + r := d.Detect(&detector.Context{FilePath: "src/router.ts", Language: "typescript", Content: src}) + if len(r.Nodes) < 2 { + t.Fatalf("expected >=2 routes, got %d", len(r.Nodes)) + } +} + +func TestFrontendRoute_Determinism(t *testing.T) { + d := NewFrontendRouteDetector() + src := `` + c := &detector.Context{FilePath: "src/App.tsx", Language: "typescript", Content: src} + r1 := d.Detect(c) + r2 := d.Detect(c) + if len(r1.Nodes) != len(r2.Nodes) { + t.Fatal("non-deterministic") + } +} diff --git a/go/internal/detector/frontend/react_component.go b/go/internal/detector/frontend/react_component.go new file mode 100644 index 00000000..d10dfa42 --- /dev/null +++ b/go/internal/detector/frontend/react_component.go @@ -0,0 +1,140 @@ +package frontend + +import ( + "regexp" + "sort" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/detector/base" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +// ReactComponentDetector mirrors Java ReactComponentDetector. Emits a +// COMPONENT node per React function / class component and a HOOK node per +// custom hook (`use*` exports). For each component, emits a RENDERS edge to +// each capitalized JSX tag found within that component's body scope. +type ReactComponentDetector struct{} + +func NewReactComponentDetector() *ReactComponentDetector { return &ReactComponentDetector{} } + +func (ReactComponentDetector) Name() string { return "frontend.react_components" } +func (ReactComponentDetector) SupportedLanguages() []string { return []string{"typescript", "javascript"} } +func (ReactComponentDetector) DefaultConfidence() model.Confidence { return base.RegexDetectorDefaultConfidence } + +func init() { detector.RegisterDefault(NewReactComponentDetector()) } + +const propReact = "react" + +var ( + reactExportDefaultFunc = regexp.MustCompile(`export\s+default\s+function\s+([A-Z]\w*)\s*\(`) + reactExportConstArrow = regexp.MustCompile(`export\s+const\s+([A-Z]\w*)\s*=\s*\(`) + reactExportConstFC = regexp.MustCompile(`export\s+const\s+([A-Z]\w*)\s*:\s*React\.FC`) + reactClassExtendsReact = regexp.MustCompile(`class\s+([A-Z]\w*)\s+extends\s+React\.Component`) + reactClassExtendsComp = regexp.MustCompile(`class\s+([A-Z]\w*)\s+extends\s+Component\b`) + reactExportFuncHook = regexp.MustCompile(`export\s+function\s+(use[A-Z]\w*)\s*\(`) + reactExportConstHook = regexp.MustCompile(`export\s+const\s+(use[A-Z]\w*)\s*=\s*`) + reactJSXTag = regexp.MustCompile(`<([A-Z]\w*)\b`) + reactComponentRegexFunc = []*regexp.Regexp{reactExportDefaultFunc, reactExportConstArrow, reactExportConstFC} + reactComponentRegexClass = []*regexp.Regexp{reactClassExtendsReact, reactClassExtendsComp} + reactHookRegexes = []*regexp.Regexp{reactExportFuncHook, reactExportConstHook} +) + +func (d ReactComponentDetector) Detect(ctx *detector.Context) *detector.Result { + text := ctx.Content + if text == "" { + return detector.EmptyResult() + } + nodes := []*model.CodeNode{} + edges := []*model.CodeEdge{} + fp := ctx.FilePath + + type compEntry struct { + name string + sourceID string + matchStart int + } + var compEntries []compEntry + seen := map[string]bool{} + + addFunc := func(name string, start int) { + if seen[name] { + return + } + seen[name] = true + sourceID := "react:" + fp + ":component:" + name + n := base.CreateComponentNode(propReact, fp, "component", name, model.NodeComponent, base.LineAt(text, start)) + n.Confidence = base.RegexDetectorDefaultConfidence + n.Properties["component_type"] = "function" + nodes = append(nodes, n) + compEntries = append(compEntries, compEntry{name, sourceID, start}) + } + addClass := func(name string, start int) { + if seen[name] { + return + } + seen[name] = true + sourceID := "react:" + fp + ":component:" + name + n := base.CreateComponentNode(propReact, fp, "component", name, model.NodeComponent, base.LineAt(text, start)) + n.Confidence = base.RegexDetectorDefaultConfidence + n.Properties["component_type"] = "class" + nodes = append(nodes, n) + compEntries = append(compEntries, compEntry{name, sourceID, start}) + } + + for _, re := range reactComponentRegexFunc { + for _, m := range re.FindAllStringSubmatchIndex(text, -1) { + addFunc(text[m[2]:m[3]], m[0]) + } + } + for _, re := range reactComponentRegexClass { + for _, m := range re.FindAllStringSubmatchIndex(text, -1) { + addClass(text[m[2]:m[3]], m[0]) + } + } + + // Hooks + seenHooks := map[string]bool{} + for _, re := range reactHookRegexes { + for _, m := range re.FindAllStringSubmatchIndex(text, -1) { + name := text[m[2]:m[3]] + if seenHooks[name] { + continue + } + seenHooks[name] = true + n := base.CreateComponentNode(propReact, fp, "hook", name, model.NodeHook, base.LineAt(text, m[0])) + n.Confidence = base.RegexDetectorDefaultConfidence + nodes = append(nodes, n) + } + } + + // RENDERS edges: scope JSX tag search to each component's body section. + sort.Slice(compEntries, func(i, j int) bool { + return compEntries[i].matchStart < compEntries[j].matchStart + }) + for i, comp := range compEntries { + bodyStart := comp.matchStart + bodyEnd := len(text) + if i+1 < len(compEntries) { + bodyEnd = compEntries[i+1].matchStart + } + body := text[bodyStart:bodyEnd] + childSet := map[string]bool{} + for _, jm := range reactJSXTag.FindAllStringSubmatch(body, -1) { + tag := jm[1] + if tag != comp.name { + childSet[tag] = true + } + } + children := make([]string, 0, len(childSet)) + for c := range childSet { + children = append(children, c) + } + sort.Strings(children) + for _, child := range children { + e := model.NewCodeEdge(comp.sourceID+":renders:"+child, model.EdgeRenders, comp.sourceID, child) + e.Confidence = base.RegexDetectorDefaultConfidence + edges = append(edges, e) + } + } + return detector.ResultOf(nodes, edges) +} diff --git a/go/internal/detector/frontend/react_component_test.go b/go/internal/detector/frontend/react_component_test.go new file mode 100644 index 00000000..ba14cc84 --- /dev/null +++ b/go/internal/detector/frontend/react_component_test.go @@ -0,0 +1,160 @@ +package frontend + +import ( + "strings" + "testing" + + "github.com/randomcodespace/codeiq/go/internal/detector" + "github.com/randomcodespace/codeiq/go/internal/model" +) + +func TestReactComponent_FunctionComponent(t *testing.T) { + src := "export default function MyApp() {\n return
;\n}" + d := NewReactComponentDetector() + r := d.Detect(&detector.Context{FilePath: "App.tsx", Language: "typescript", Content: src}) + if len(r.Nodes) != 1 { + t.Fatalf("expected 1 node, got %d", len(r.Nodes)) + } + if r.Nodes[0].Kind != model.NodeComponent { + t.Errorf("kind = %v, want COMPONENT", r.Nodes[0].Kind) + } + if r.Nodes[0].Label != "MyApp" { + t.Errorf("label = %q", r.Nodes[0].Label) + } +} + +func TestReactComponent_NoMatchOnPlainCode(t *testing.T) { + d := NewReactComponentDetector() + r := d.Detect(&detector.Context{FilePath: "x.ts", Language: "typescript", Content: "function lowercase() {}"}) + if len(r.Nodes) != 0 { + t.Fatalf("expected 0 nodes, got %d", len(r.Nodes)) + } +} + +func TestReactComponent_RendersEdgesScoped(t *testing.T) { + src := `export const Header = () => { + return