First Working Prototype

This application is a simple proof of concept demonstrating an agent capable of taking a prompt and generating a patch implementing code satisfying the prompt along with an accompanying unit test.
This commit is contained in:
2025-04-20 07:47:41 -04:00
commit 4b8b8132fd
15 changed files with 1797 additions and 0 deletions

299
cmd/autopatch/autopatch.go Normal file
View File

@@ -0,0 +1,299 @@
package autopatch
import (
"ai-code-assistant/pkg/config"
"ai-code-assistant/pkg/database"
"ai-code-assistant/pkg/llm"
"bytes"
"context"
"errors"
"fmt"
"github.com/go-git/go-billy/v5/osfs"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing/cache"
"github.com/go-git/go-git/v5/storage/filesystem"
"github.com/sergi/go-diff/diffmatchpatch"
"github.com/urfave/cli/v3"
"log/slog"
"os"
"path/filepath"
"strings"
)
func Command() *cli.Command {
return &cli.Command{
Name: "auto-patch",
Usage: "this command accepts a repository and a prompt and will generate a git commit attempting code modifications to satisfy the prompt.",
Action: (&agent{}).run,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "repo",
Usage: "path to git repository",
Required: true,
},
&cli.StringFlag{
Name: "task",
Usage: "task to perform, e.g. \"add a test for a function\"",
Required: true,
},
},
}
}
type agent struct {
llm *llm.LLM
}
func (a *agent) run(ctx context.Context, cmd *cli.Command) error {
llmRef := llm.FromContext(ctx)
a.llm = llmRef
err := a.generateGitCommit(ctx, cmd.String("repo"), cmd.String("task"))
if err != nil {
return err
}
return nil
}
func (a *agent) generateGitCommit(ctx context.Context, repoPath, prompt string) error {
fileName, newCode, err := a.generateCodePatch(ctx, repoPath, prompt)
if err != nil {
return err
}
testFile, err := a.generateUnitTest(ctx, prompt, fileName, newCode)
if err != nil {
return err
}
// fileName, testFile := "/home/mpowers/Projects/simple-go-server/main.go", "/home/mpowers/Projects/simple-go-server/main_test.go"
if err := a.commit(ctx, prompt, repoPath, fileName, testFile); err != nil {
return err
}
slog.Info("committed changes to git repo", "repo", repoPath)
return nil
}
func (a *agent) commit(ctx context.Context, prompt, repoPath string, files ...string) error {
gitPath := osfs.New(filepath.Join(repoPath, ".git"))
gitRepo, err := git.Open(filesystem.NewStorage(gitPath, cache.NewObjectLRUDefault()), osfs.New(repoPath))
if err != nil {
return err
}
workTree, err := gitRepo.Worktree()
if err != nil {
return err
}
for _, file := range files {
// Relative paths.
file = strings.TrimPrefix(file, repoPath)
file = strings.TrimPrefix(file, "/")
if _, err := workTree.Add(file); err != nil {
return err
}
}
genPrompt, err := llm.GetPrompt("generate_commitmsg", map[string]any{
"Prompt": prompt,
"Files": files,
})
if err != nil {
return err
}
rsp, err := a.llm.ChatPrompt(ctx, genPrompt)
if err != nil {
return err
}
if _, err := workTree.Commit(rsp, &git.CommitOptions{}); err != nil {
return err
}
return nil
}
func (a *agent) generateCodePatch(ctx context.Context, repoPath, prompt string) (string, string, error) {
db := database.FromContext(ctx)
cfg := config.FromContext(ctx)
repoID, err := db.RepoIDFromPath(ctx, repoPath)
if err != nil {
return "", "", err
}
relDocs := llm.NewGetRelevantDocs(db, a.llm, repoID, cfg.RelevantDocs)
chunks, err := relDocs.GetRelevantFileChunks(ctx, prompt)
if err != nil {
return "", "", err
} else if len(chunks) == 0 {
return "", "", errors.New("no relevant chunks found")
}
chunk := chunks[0]
slog.Info("found most relevant file chunk", "file", chunk.Name, "start", chunk.Start, "end", chunk.End, "score", chunk.Score, "id", chunk.ChunkID)
chunkContext, err := db.GetChunkContext(ctx, chunks[0].ChunkID, 1, chunks[0].Name, repoID)
if err != nil {
return "", "", err
}
genPrompt, err := llm.GetPrompt("generate_patch", map[string]string{
"Prompt": prompt,
"Context": chunkContext,
})
if err != nil {
return "", "", err
}
codeBlock, err := a.generateCode(ctx, genPrompt)
if err != nil {
return "", "", err
}
fmt.Printf("Code block:\n%s\n", codeBlock)
fileName := chunks[0].Name
originalFile, err := os.ReadFile(fileName)
if err != nil {
return "", "", err
}
dmp := diffmatchpatch.New()
diffs := dmp.DiffMain(string(originalFile), codeBlock, true)
diffs = cleanDiffs(diffs)
fmt.Printf("File to patch: %s\n", fileName)
fmt.Println(dmp.DiffPrettyText(diffs))
if err := patchFile(fileName, diffs); err != nil {
return "", "", err
}
return fileName, codeBlock, err
}
func (a *agent) generateUnitTest(ctx context.Context, prompt, fileName, newCode string) (string, error) {
// Check to see if a test file for this already exists.
testFileExists := false
testFile := strings.ReplaceAll(fileName, ".go", "_test.go")
if _, err := os.Stat(testFile); err == nil {
testFileExists = true
}
genPrompt, err := llm.GetPrompt("generate_unittest", map[string]any{
"Prompt": prompt,
"Context": newCode,
"TestFileExists": testFileExists,
})
if err != nil {
return "", err
}
codeBlock, err := a.generateCode(ctx, genPrompt)
if err != nil {
return "", err
}
fmt.Printf("Unit Test Code block:\n%s\n", codeBlock)
if testFileExists {
fp, err := os.OpenFile(testFile, os.O_APPEND|os.O_WRONLY, 0644)
if err != nil {
return "", err
}
defer fp.Close()
if _, err := fp.WriteString("\n" + codeBlock); err != nil {
return "", err
}
} else {
fp, err := os.Open(testFile)
if err != nil {
return "", err
}
defer fp.Close()
if _, err := fp.WriteString(codeBlock); err != nil {
return "", err
}
}
return testFile, nil
}
func (a *agent) generateCode(ctx context.Context, prompt string) (string, error) {
rsp, err := a.llm.CodePrompt(ctx, prompt)
if err != nil {
return "", err
}
startIdx := strings.Index(rsp, "```")
endIdx := strings.LastIndex(rsp, "```")
if startIdx == -1 || endIdx == -1 || startIdx >= endIdx {
return "", fmt.Errorf("unable to find code block in response: %s", rsp)
}
codeBlock := rsp[startIdx+3 : endIdx]
if strings.HasPrefix(codeBlock, "go") {
codeBlock = codeBlock[2:]
}
return codeBlock, nil
}
func patchFile(fileName string, diffs []diffmatchpatch.Diff) error {
var buff bytes.Buffer
for _, diff := range diffs {
text := diff.Text
switch diff.Type {
case diffmatchpatch.DiffInsert:
_, _ = buff.WriteString(text)
case diffmatchpatch.DiffEqual:
_, _ = buff.WriteString(text)
}
}
if err := os.WriteFile(fileName, buff.Bytes(), 0644); err != nil {
return err
}
return nil
}
// cleanDiffs will ignore any deletions at the beginning or end of the context since the LLM may trim these off.
func cleanDiffs(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff {
startIdx := 0
endIdx := len(diffs)
for idx, diff := range diffs {
if diff.Type == diffmatchpatch.DiffDelete {
startIdx = idx + 1
} else {
break
}
}
for idx := len(diffs) - 1; idx >= 0; idx-- {
if diffs[idx].Type == diffmatchpatch.DiffDelete {
endIdx = idx - 1
} else {
break
}
}
return diffs[startIdx:endIdx]
}

63
cmd/chunks/chunks.go Normal file
View File

@@ -0,0 +1,63 @@
package chunks
import (
"ai-code-assistant/pkg/database"
"ai-code-assistant/pkg/llm"
"context"
"github.com/urfave/cli/v3"
"log/slog"
)
func Command() *cli.Command {
return &cli.Command{
Name: "chunks",
Usage: "this command tries to find relevant chunks of code in a git repository for a given query",
Action: (&chunks{}).run,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "repo",
Usage: "path to git repository",
Required: true,
},
&cli.StringFlag{
Name: "query",
Usage: "query to search for",
Required: true,
},
&cli.IntFlag{
Name: "docs",
Usage: "number of docs to get",
Value: 10,
},
},
}
}
type chunks struct {
}
func (c *chunks) run(ctx context.Context, cmd *cli.Command) error {
db := database.FromContext(ctx)
llmRef := llm.FromContext(ctx)
repoID, err := db.RepoIDFromPath(ctx, cmd.String("repo"))
if err != nil {
return err
}
relDocs := llm.NewGetRelevantDocs(db, llmRef, repoID, int(cmd.Int("docs")))
chunks, err := relDocs.GetRelevantFileChunks(ctx, cmd.String("query"))
if err != nil {
return err
}
if err := relDocs.RankChunks(ctx, cmd.String("query"), chunks); err != nil {
return err
}
for _, chunk := range chunks {
slog.Info("found relevant chunk", "name", chunk.Name, "start", chunk.Start, "end", chunk.End, "score", chunk.Score, "id", chunk.ChunkID)
}
return nil
}

203
cmd/indexer/indexer.go Normal file
View File

@@ -0,0 +1,203 @@
package indexer
import (
"ai-code-assistant/pkg/database"
"ai-code-assistant/pkg/llm"
"context"
"github.com/go-git/go-billy/v5/osfs"
"github.com/go-git/go-git/v5"
"github.com/go-git/go-git/v5/plumbing/cache"
"github.com/go-git/go-git/v5/storage/filesystem"
"github.com/google/uuid"
"github.com/tmc/langchaingo/schema"
"github.com/tmc/langchaingo/vectorstores/pgvector"
"github.com/urfave/cli/v3"
"log/slog"
"os"
"path/filepath"
"strconv"
)
func Command() *cli.Command {
return &cli.Command{
Name: "indexer",
Usage: "this command will index a local git repository to build context for the llm",
Action: (&indexer{}).run,
Flags: []cli.Flag{
&cli.StringFlag{
Name: "repo",
Usage: "path to git repository",
Required: true,
},
&cli.IntFlag{
Name: "chunk-size",
Usage: "number of bytes to chunk files into, should be roughly 4x the number of tokens",
Value: 512 * 4,
},
},
}
}
type indexer struct {
db *database.Database
llm *llm.LLM
repoPath string
repoID string
chunkSize int
}
func (idx *indexer) run(ctx context.Context, cmd *cli.Command) error {
idx.db = database.FromContext(ctx)
idx.repoPath = cmd.String("repo")
idx.chunkSize = int(cmd.Int("chunk-size"))
idx.llm = llm.FromContext(ctx)
if err := idx.upsertRepo(ctx); err != nil {
return err
}
if err := idx.generateFileChunks(ctx); err != nil {
return err
}
return nil
}
func (idx *indexer) upsertRepo(ctx context.Context) error {
gitPath := osfs.New(filepath.Join(idx.repoPath, ".git"))
gitRepo, err := git.Open(filesystem.NewStorage(gitPath, cache.NewObjectLRUDefault()), gitPath)
if err != nil {
return err
}
headRef, err := gitRepo.Head()
if err != nil {
return err
}
conn, err := idx.db.DB(ctx)
if err != nil {
return err
}
defer conn.Release()
id := uuid.NewString()
if _, err := conn.Exec(ctx, "insert_repo", id, headRef.Hash().String(), idx.repoPath); err != nil {
return err
}
idx.repoID = id
return nil
}
func crawlFiles(ctx context.Context, path string, cb func(ctx context.Context, filePath string) error) error {
pathFiles, err := os.ReadDir(path)
if err != nil {
return err
}
for _, file := range pathFiles {
filePath := filepath.Join(path, file.Name())
if file.IsDir() {
if err := crawlFiles(ctx, filePath, cb); err != nil {
return err
}
} else {
if err := cb(ctx, filePath); err != nil {
return err
}
}
}
return nil
}
func (idx *indexer) generateFileChunks(ctx context.Context) error {
conn, err := idx.db.DB(ctx)
if err != nil {
return err
}
defer conn.Release()
vectorStore, err := pgvector.New(ctx,
pgvector.WithConn(conn),
pgvector.WithEmbedder(idx.llm.Embedder()),
pgvector.WithCollectionName("file_chunks"),
)
if err != nil {
return err
}
allowedExtensions := []string{".go"}
return crawlFiles(ctx, idx.repoPath, func(ctx context.Context, filePath string) error {
chunkID := 0
return chunkFile(ctx, filePath, idx.chunkSize, func(chunk []byte, start, end uint64) error {
shouldIndex := false
for _, ext := range allowedExtensions {
if filepath.Ext(filePath) == ext {
shouldIndex = true
break
}
}
if !shouldIndex {
return nil
}
slog.Info("indexing file", "chunk_id", chunkID, "chunk_size", len(chunk), "file_name", filePath)
docs := []schema.Document{{
PageContent: string(chunk),
Metadata: map[string]any{
"type": "file_chunk",
"file_path": filePath,
"chunk_id": strconv.FormatInt(int64(chunkID), 10),
"start": strconv.FormatUint(start, 10),
"end": strconv.FormatUint(end, 10),
"repo_id": idx.repoID,
},
}}
if _, err := vectorStore.AddDocuments(ctx, docs); err != nil {
return err
}
chunkID++
return nil
})
})
}
// chunkFile will take a file and return it in chunks that are suitable size to be embedded.
// This is a very simple algorithm right now, it would be better to use a lexer to identify good parts of the AST to
// split on. We could also implement a reference graph to find the most relevant files based on the relationships
// between files.
func chunkFile(_ context.Context, filePath string, maxBytes int, chunkCb func(chunk []byte, start, end uint64) error) error {
fileBytes, err := os.ReadFile(filePath)
if err != nil {
return err
}
pos := 0
for pos < len(fileBytes) {
nextChunkSize := maxBytes
if pos+maxBytes > len(fileBytes) {
nextChunkSize = len(fileBytes) - pos
}
if err := chunkCb(fileBytes[pos:pos+nextChunkSize], uint64(pos), uint64(pos+nextChunkSize)); err != nil {
return err
}
pos += maxBytes
}
return nil
}

110
cmd/main.go Normal file
View File

@@ -0,0 +1,110 @@
package main
import (
"ai-code-assistant/cmd/autopatch"
"ai-code-assistant/cmd/chunks"
"ai-code-assistant/cmd/indexer"
"ai-code-assistant/pkg/config"
"ai-code-assistant/pkg/database"
"ai-code-assistant/pkg/llm"
"context"
"fmt"
_ "github.com/lib/pq"
"github.com/urfave/cli/v3"
"gopkg.in/yaml.v3"
"log/slog"
"os"
)
func main() {
app := &cli.Command{
Name: "ai-coding-assistant",
Usage: "an AI-powered autonomous code assistant that can help software engineers by performing tasks on their behalf",
Commands: []*cli.Command{
indexer.Command(),
chunks.Command(),
autopatch.Command(),
},
Before: func(ctx context.Context, cmd *cli.Command) (context.Context, error) {
initPhases := []cli.BeforeFunc{
readConfig,
initLogging,
initDatabase,
initLLM,
}
for _, phase := range initPhases {
var err error
ctx, err = phase(ctx, cmd)
if err != nil {
return nil, err
}
}
return ctx, nil
},
Flags: []cli.Flag{
&cli.StringFlag{
Name: "config",
Usage: "path to config file",
Value: "config.yaml",
},
},
}
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
if err := app.Run(ctx, os.Args); err != nil {
slog.Error("problem running command", "error", err)
}
}
func readConfig(ctx context.Context, cmd *cli.Command) (context.Context, error) {
cfgFile := cmd.String("config")
cfgHandle, err := os.Open(cfgFile)
if err != nil {
return nil, fmt.Errorf("problem opening config: %s: %w", cfgFile, err)
}
cfg := &config.Configuration{}
if err := yaml.NewDecoder(cfgHandle).Decode(cfg); err != nil {
return nil, fmt.Errorf("problem parsing config: %w", err)
}
return config.WrapContext(ctx, cfg), nil
}
func initLogging(ctx context.Context, _ *cli.Command) (context.Context, error) {
cfg := config.FromContext(ctx)
var lvl slog.Level
if err := lvl.UnmarshalText([]byte(cfg.Logging.Level)); err != nil {
return nil, err
}
slog.SetLogLoggerLevel(lvl)
return ctx, nil
}
func initDatabase(ctx context.Context, _ *cli.Command) (context.Context, error) {
cfg := config.FromContext(ctx)
db, err := database.FromConfig(ctx, cfg)
if err != nil {
return nil, err
}
return database.WrapContext(ctx, db), nil
}
func initLLM(ctx context.Context, _ *cli.Command) (context.Context, error) {
cfg := config.FromContext(ctx)
llmRef, err := llm.FromConfig(cfg)
if err != nil {
return nil, err
}
return llm.WrapContext(ctx, llmRef), nil
}