-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathindex.go
More file actions
108 lines (101 loc) · 3.72 KB
/
index.go
File metadata and controls
108 lines (101 loc) · 3.72 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
package cli
import (
"fmt"
"os"
"path/filepath"
"github.com/randomcodespace/codeiq/internal/analyzer"
"github.com/randomcodespace/codeiq/internal/cache"
"github.com/randomcodespace/codeiq/internal/detector"
// Blank imports register all phase-1 detectors with detector.Default.
_ "github.com/randomcodespace/codeiq/internal/detector/generic"
_ "github.com/randomcodespace/codeiq/internal/detector/jvm/java"
_ "github.com/randomcodespace/codeiq/internal/detector/python"
"github.com/spf13/cobra"
)
func init() {
registerSubcommand(func() *cobra.Command {
var (
batchSize int
workers int
force bool
)
cmd := &cobra.Command{
Use: "index [path]",
Short: "Scan a codebase into the analysis cache (write path).",
Long: `Scan the source tree at [path] and write detector results into
the SQLite analysis cache at <path>/.codeiq/cache/codeiq.sqlite. The cache is
keyed by SHA-256 file content hash so subsequent runs reuse cached results
for unchanged files. After indexing, run "codeiq enrich" to load the cache
into the Kuzu graph store (phase 2).
Phase 1 ships 5 detectors -- Spring REST controllers, JPA entities, Django
models, Flask routes, and a generic-imports detector. Languages covered:
Java and Python.`,
Example: ` codeiq index .
codeiq index /path/to/repo --batch-size 1000 --workers 8
codeiq index .
# -> Files: 12 Nodes: 47 Edges: 23 Cache: ./.codeiq/cache/codeiq.sqlite`,
Args: cobra.MaximumNArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
path := "."
if len(args) == 1 {
path = args[0]
}
abs, err := filepath.Abs(path)
if err != nil {
return err
}
if st, err := os.Stat(abs); err != nil || !st.IsDir() {
return newUsageError("path %q is not a directory", abs)
}
cacheDir := filepath.Join(abs, ".codeiq", "cache")
if err := os.MkdirAll(cacheDir, 0755); err != nil {
return fmt.Errorf("mkdir cache: %w", err)
}
dbPath := filepath.Join(cacheDir, "codeiq.sqlite")
c, err := cache.Open(dbPath)
if err != nil {
return err
}
defer c.Close()
a := analyzer.NewAnalyzer(analyzer.Options{
Cache: c,
Registry: detector.Default,
BatchSize: batchSize,
Workers: workers,
Force: force,
})
stats, err := a.Run(abs)
if err != nil {
return err
}
fmt.Fprintf(cmd.OutOrStdout(),
"Files: %d Nodes: %d Edges: %d Cache: %s\n",
stats.Files, stats.Nodes, stats.Edges, dbPath)
if stats.DedupedNodes > 0 || stats.DedupedEdges > 0 || stats.DroppedEdges > 0 {
fmt.Fprintf(cmd.OutOrStdout(),
"Deduped: %d nodes, %d edges Dropped: %d phantom edges\n",
stats.DedupedNodes, stats.DedupedEdges, stats.DroppedEdges)
}
// Incremental counters are only meaningful when the cache was
// consulted (i.e. not --force). Print them when any of them is
// non-zero so unchanged re-runs see "Unchanged: N (100%)".
if !force && (stats.Added+stats.Modified+stats.Deleted+stats.Unchanged) > 0 {
line := fmt.Sprintf("Added: %d Modified: %d Deleted: %d Unchanged: %d Cache hits: %d",
stats.Added, stats.Modified, stats.Deleted, stats.Unchanged, stats.CacheHits)
if stats.Files > 0 {
line += fmt.Sprintf(" (%.1f%%)", 100.0*float64(stats.CacheHits)/float64(stats.Files))
}
fmt.Fprintln(cmd.OutOrStdout(), line)
}
return nil
},
}
cmd.Flags().IntVar(&batchSize, "batch-size", 500,
"Number of files processed per batch (default: 500).")
cmd.Flags().IntVarP(&workers, "workers", "w", 0,
"Worker goroutine count (default: 2 * GOMAXPROCS).")
cmd.Flags().BoolVar(&force, "force", false,
"Bypass the incremental cache; re-parse every file even when the content hash hasn't changed.")
return cmd
})
}