Code Cleanup and Quality of Life
Checks to make sure repo is indexed before generating code. Don't generate tests for changes to tests. Remove unused code. Fix bootstrapping issue with langchaingo tables.
This commit is contained in:
@@ -6,12 +6,18 @@ import (
|
||||
"embed"
|
||||
"errors"
|
||||
"fmt"
|
||||
"github.com/go-git/go-billy/v5/osfs"
|
||||
"github.com/go-git/go-git/v5"
|
||||
"github.com/go-git/go-git/v5/plumbing/cache"
|
||||
"github.com/go-git/go-git/v5/storage/filesystem"
|
||||
"github.com/golang-migrate/migrate/v4"
|
||||
_ "github.com/golang-migrate/migrate/v4/database/postgres"
|
||||
"github.com/golang-migrate/migrate/v4/source/iofs"
|
||||
"github.com/google/uuid"
|
||||
"github.com/jackc/pgx/v5"
|
||||
"github.com/jackc/pgx/v5/pgxpool"
|
||||
_ "github.com/lib/pq"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
)
|
||||
|
||||
@@ -26,9 +32,10 @@ const (
|
||||
|
||||
func preparedStatements() map[string]string {
|
||||
return map[string]string{
|
||||
"insert_repo": `INSERT INTO repos (repo_id, repo_hash, repo_path) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING`,
|
||||
"repo_from_path": `SELECT repo_id FROM repos WHERE repo_path = $1`,
|
||||
"get_chunk": `SELECT document FROM langchain_pg_embedding WHERE JSON_EXTRACT_PATH_TEXT(cmetadata, 'chunk_id')=$1 AND JSON_EXTRACT_PATH_TEXT(cmetadata, 'file_path')=$2 AND JSON_EXTRACT_PATH_TEXT(cmetadata, 'repo_id')=$3`,
|
||||
"get_repo": `SELECT repo_id FROM repos WHERE repo_hash = $1 AND repo_path = $2`,
|
||||
"insert_repo": `INSERT INTO repos (repo_id, repo_hash, repo_path) VALUES ($1, $2, $3) ON CONFLICT DO NOTHING`,
|
||||
"get_chunk": `SELECT document FROM langchain_pg_embedding WHERE JSON_EXTRACT_PATH_TEXT(cmetadata, 'chunk_id')=$1 AND JSON_EXTRACT_PATH_TEXT(cmetadata, 'file_path')=$2 AND JSON_EXTRACT_PATH_TEXT(cmetadata, 'repo_id')=$3`,
|
||||
"clear_chunks_for_repo": `DELETE FROM langchain_pg_embedding WHERE JSON_EXTRACT_PATH_TEXT(cmetadata, 'repo_id')=$1`,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -51,20 +58,40 @@ func (db *Database) DB(ctx context.Context) (*pgxpool.Conn, error) {
|
||||
return conn, nil
|
||||
}
|
||||
|
||||
func (db *Database) RepoIDFromPath(ctx context.Context, path string) (string, error) {
|
||||
func (db *Database) UpsertRepo(ctx context.Context, repoPath string) (string, bool, error) {
|
||||
gitPath := osfs.New(filepath.Join(repoPath, ".git"))
|
||||
|
||||
gitRepo, err := git.Open(filesystem.NewStorage(gitPath, cache.NewObjectLRUDefault()), gitPath)
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
|
||||
headRef, err := gitRepo.Head()
|
||||
if err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
|
||||
conn, err := db.DB(ctx)
|
||||
if err != nil {
|
||||
return "", err
|
||||
return "", false, err
|
||||
}
|
||||
defer conn.Release()
|
||||
|
||||
var repoID string
|
||||
var id string
|
||||
|
||||
if err := conn.QueryRow(ctx, "repo_from_path", path).Scan(&repoID); err != nil {
|
||||
return "", err
|
||||
if err := conn.QueryRow(ctx, "get_repo", headRef.Hash().String(), repoPath).Scan(&id); err == nil {
|
||||
return id, true, nil
|
||||
} else if !errors.Is(err, pgx.ErrNoRows) {
|
||||
return "", false, err
|
||||
}
|
||||
|
||||
return repoID, nil
|
||||
id = uuid.NewString()
|
||||
|
||||
if _, err := conn.Exec(ctx, "insert_repo", id, headRef.Hash().String(), repoPath); err != nil {
|
||||
return "", false, err
|
||||
}
|
||||
|
||||
return id, false, nil
|
||||
}
|
||||
|
||||
func (db *Database) GetChunk(ctx context.Context, chunkID int, path, repoID string) (string, error) {
|
||||
|
||||
@@ -4,3 +4,19 @@ CREATE TABLE IF NOT EXISTS repos (
|
||||
repo_path TEXT NOT NULL,
|
||||
UNIQUE(repo_hash, repo_path)
|
||||
);
|
||||
|
||||
CREATE TABLE langchain_pg_collection (
|
||||
name VARCHAR UNIQUE,
|
||||
cmetadata JSON,
|
||||
uuid UUID PRIMARY KEY
|
||||
);
|
||||
|
||||
CREATE TABLE langchain_pg_embedding (
|
||||
collection_id UUID REFERENCES langchain_pg_collection ON DELETE CASCADE,
|
||||
embedding VECTOR,
|
||||
document VARCHAR,
|
||||
cmetadata JSON,
|
||||
uuid UUID PRIMARY KEY
|
||||
);
|
||||
|
||||
CREATE INDEX langchain_pg_embedding_collection_id ON langchain_pg_embedding (collection_id);
|
||||
Reference in New Issue
Block a user