From 207d14a8f186c40a9809afb90b7bbd8ce8545a0b Mon Sep 17 00:00:00 2001 From: Sam Mortenson Date: Wed, 13 Nov 2024 09:51:38 -0800 Subject: [PATCH 1/3] Made solid progress --- .gitignore | 1 + cmd/ripoff-export/ripoff_export.go | 95 ++++++++++++++++++++++++++++++ db.go | 93 +++++++++++++++++++++++++++++ export.go | 81 +++++++++++++++++++++++++ 4 files changed, 270 insertions(+) create mode 100644 cmd/ripoff-export/ripoff_export.go create mode 100644 export.go diff --git a/.gitignore b/.gitignore index 7cd58de..b5ab113 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ tmp /ripoff .DS_Store +export diff --git a/cmd/ripoff-export/ripoff_export.go b/cmd/ripoff-export/ripoff_export.go new file mode 100644 index 0000000..efedc61 --- /dev/null +++ b/cmd/ripoff-export/ripoff_export.go @@ -0,0 +1,95 @@ +package main + +import ( + "bytes" + "context" + "fmt" + "log" + "log/slog" + "os" + "path" + "path/filepath" + + "github.com/jackc/pgx/v5" + "gopkg.in/yaml.v3" + + "github.com/mortenson/ripoff" +) + +func errAttr(err error) slog.Attr { + return slog.Any("error", err) +} + +func main() { + dburl := os.Getenv("DATABASE_URL") + if dburl == "" { + slog.Error("DATABASE_URL env variable is required") + os.Exit(1) + } + + if len(os.Args) != 2 { + slog.Error("Path to export directory is required") + os.Exit(1) + } + + // Connect to database. + ctx := context.Background() + conn, err := pgx.Connect(ctx, dburl) + if err != nil { + slog.Error("Could not connect to database", errAttr(err)) + os.Exit(1) + } + defer conn.Close(ctx) + + exportDirectory := path.Clean(os.Args[1]) + dirInfo, err := os.Stat(exportDirectory) + if err == nil && !dirInfo.IsDir() { + slog.Error("Export directory is not a directory") + os.Exit(1) + } + + // Directory exists, delete it after verifying that it's safe to do so. + if err == nil && !os.IsNotExist(err) { + err = filepath.WalkDir(exportDirectory, func(path string, entry os.DirEntry, err error) error { + if err != nil { + return err + } + if !entry.IsDir() && filepath.Ext(path) != ".yaml" && filepath.Ext(path) != ".yml" { + return fmt.Errorf("ripoff-export can only safely delete directories that only contain YAML files, found: %s", path) + } + return nil + }) + if err != nil { + slog.Error("Error verifying test directory", errAttr(err)) + os.Exit(1) + } + err = os.RemoveAll(exportDirectory) + if err != nil { + slog.Error("Could not read from export directory", errAttr(err)) + os.Exit(1) + } + } + + err = os.MkdirAll(exportDirectory, 0755) + if err != nil { + slog.Error("Could not re-create export directory", errAttr(err)) + os.Exit(1) + } + + ripoffFile, err := ripoff.ExportToRipoff(ctx, conn, exportDirectory) + if err != nil { + slog.Error("Could not assemble ripoff file from database", errAttr(err)) + os.Exit(1) + } + + var ripoffFileBuf bytes.Buffer + yamlEncoder := yaml.NewEncoder(&ripoffFileBuf) + yamlEncoder.SetIndent(2) + err = yamlEncoder.Encode(ripoffFile) + if err != nil { + slog.Error("Could not marshal yaml from ripoff file", errAttr(err)) + os.Exit(1) + } + + log.Print(ripoffFileBuf.String()) +} diff --git a/db.go b/db.go index 1de1c32..41df5b0 100644 --- a/db.go +++ b/db.go @@ -261,3 +261,96 @@ func buildQueriesForRipoff(primaryKeys PrimaryKeysResult, totalRipoff RipoffFile } return sortedQueries, nil } + +const columnsWithForeignKeysQuery = ` +select col.table_name as table, + col.column_name, + COALESCE(rel.table_name, '') as primary_table, + COALESCE(rel.column_name, '') as primary_column, + COALESCE(kcu.constraint_name, '') +from information_schema.columns col +left join (select kcu.constraint_schema, + kcu.constraint_name, + kcu.table_schema, + kcu.table_name, + kcu.column_name, + kcu.ordinal_position, + kcu.position_in_unique_constraint + from information_schema.key_column_usage kcu + join information_schema.table_constraints tco + on kcu.constraint_schema = tco.constraint_schema + and kcu.constraint_name = tco.constraint_name + and tco.constraint_type = 'FOREIGN KEY' + ) as kcu + on col.table_schema = kcu.table_schema + and col.table_name = kcu.table_name + and col.column_name = kcu.column_name +left join information_schema.referential_constraints rco + on rco.constraint_name = kcu.constraint_name + and rco.constraint_schema = kcu.table_schema +left join information_schema.key_column_usage rel + on rco.unique_constraint_name = rel.constraint_name + and rco.unique_constraint_schema = rel.constraint_schema + and rel.ordinal_position = kcu.position_in_unique_constraint +where col.table_schema = 'public'; +` + +type ForeignKey struct { + ToTable string + ColumnConditions [][2]string +} + +type ForeignKeyResultTable struct { + Columns []string + // Constraint -> Fkey + ForeignKeys map[string]*ForeignKey +} + +type ForeignKeysResult map[string]*ForeignKeyResultTable + +func getForeignKeysResult(ctx context.Context, conn pgx.Tx) (ForeignKeysResult, error) { + rows, err := conn.Query(ctx, columnsWithForeignKeysQuery) + if err != nil { + return ForeignKeysResult{}, err + } + defer rows.Close() + + result := ForeignKeysResult{} + + for rows.Next() { + var fromTableName string + var fromColumnName string + var toTableName string + var toColumnName string // Unused + var constaintName string + err = rows.Scan(&fromTableName, &fromColumnName, &toTableName, &toColumnName, &constaintName) + if err != nil { + return ForeignKeysResult{}, err + } + _, tableExists := result[fromTableName] + if !tableExists { + result[fromTableName] = &ForeignKeyResultTable{ + Columns: []string{}, + ForeignKeys: map[string]*ForeignKey{}, + } + } + result[fromTableName].Columns = append(result[fromTableName].Columns, fromColumnName) + if constaintName != "" { + _, fkeyExists := result[fromTableName].ForeignKeys[constaintName] + if !fkeyExists { + result[fromTableName].ForeignKeys[constaintName] = &ForeignKey{ + ToTable: toTableName, + ColumnConditions: [][2]string{}, + } + } + if fromColumnName != "" && toColumnName != "" { + result[fromTableName].ForeignKeys[constaintName].ColumnConditions = append( + result[fromTableName].ForeignKeys[constaintName].ColumnConditions, + [2]string{fromColumnName, toColumnName}, + ) + } + } + } + + return result, nil +} diff --git a/export.go b/export.go new file mode 100644 index 0000000..1009c08 --- /dev/null +++ b/export.go @@ -0,0 +1,81 @@ +package ripoff + +import ( + "context" + "fmt" + "strings" + + "github.com/jackc/pgx/v5" + "github.com/lib/pq" +) + +func ExportToRipoff(ctx context.Context, conn *pgx.Conn, path string) (RipoffFile, error) { + ripoffFile := RipoffFile{ + Rows: map[string]Row{}, + } + + tx, err := conn.Begin(ctx) + if err != nil { + return ripoffFile, err + } + defer tx.Rollback(ctx) + + primaryKeyResult, err := getPrimaryKeys(ctx, tx) + if err != nil { + return ripoffFile, err + } + foreignKeyResult, err := getForeignKeysResult(ctx, tx) + if err != nil { + return ripoffFile, err + } + // Assemble an easier to parse [table,column] -> table map for single column foreign keys. + singleColumnFkeyMap := map[[2]string]string{} + for table, tableInfo := range foreignKeyResult { + for _, foreignKey := range tableInfo.ForeignKeys { + if len(foreignKey.ColumnConditions) == 1 { + singleColumnFkeyMap[[2]string{table, foreignKey.ColumnConditions[0][0]}] = foreignKey.ToTable + } + } + } + for table, primaryKeys := range primaryKeyResult { + if len(primaryKeys) != 1 { + return RipoffFile{}, fmt.Errorf("multiple primary keys are not supported in exports yet, abort on table: %s", table) + } + columns := make([]string, len(foreignKeyResult[table].Columns)) + for i, column := range foreignKeyResult[table].Columns { + columns[i] = fmt.Sprintf("CAST(%s AS TEXT)", pq.QuoteIdentifier(column)) + } + selectQuery := fmt.Sprintf("SELECT %s FROM %s;", strings.Join(columns, ", "), pq.QuoteIdentifier(table)) + rows, err := tx.Query(ctx, selectQuery) + if err != nil { + return RipoffFile{}, err + } + defer rows.Close() + fields := rows.FieldDescriptions() + for rows.Next() { + columns, err := rows.Values() + if err != nil { + return RipoffFile{}, err + } + ripoffRow := Row{} + var id any + for i, field := range fields { + // No need to export primary keys due to inference from schema. + if primaryKeys[0] == field.Name { + id = columns[i] + continue + } + // If this is a foreign key, should ensure it uses the table:valueFunc() format. + toTable, isFkey := singleColumnFkeyMap[[2]string{table, field.Name}] + if isFkey { + ripoffRow[field.Name] = fmt.Sprintf("%s:literal(%s)", toTable, columns[i]) + continue + } + // Normal column. + ripoffRow[field.Name] = columns[i] + } + ripoffFile.Rows[fmt.Sprintf("%s:literal(%s)", table, id)] = ripoffRow + } + } + return ripoffFile, nil +} From 979eefe889eba3a77f309e8b8ce50f7d84be91c0 Mon Sep 17 00:00:00 2001 From: Sam Mortenson Date: Thu, 14 Nov 2024 11:08:34 -0800 Subject: [PATCH 2/3] Tests both manual and automated --- .gitignore | 2 +- cmd/ripoff-export/ripoff_export.go | 22 +++- db.go | 63 ++++++--- db_test.go | 2 +- export.go | 124 ++++++++++++++---- export_test.go | 78 +++++++++++ testdata/export/basic/setup.sql | 68 ++++++++++ testdata/export/basic/truncate.sql | 1 + testdata/{ => import}/basic/basic.yml | 0 testdata/{ => import}/basic/schema.sql | 0 testdata/{ => import}/bigdata/bigdata.yml | 0 testdata/{ => import}/bigdata/schema.sql | 0 .../bigdata/template_multi_user.yml | 0 .../dependencies/dependencies.yml | 0 testdata/{ => import}/dependencies/schema.sql | 0 testdata/{ => import}/enums/enums.yml | 0 testdata/{ => import}/enums/schema.sql | 0 .../{ => import}/enums/template_workspace.yml | 0 testdata/{ => import}/enums/validate.sql | 0 testdata/{ => import}/faker/faker.yml | 0 testdata/{ => import}/faker/schema.sql | 0 testdata/{ => import}/faker/validate.sql | 0 .../multiple_primary_keys.yml | 0 .../multiple_primary_keys/schema.sql | 0 .../real_world_example/real_world_example.yml | 0 .../real_world_example/schema.sql | 0 .../real_world_example/template_user.yml | 0 .../real_world_example/template_workspace.yml | 0 testdata/{ => import}/templates/schema.sql | 0 .../{ => import}/templates/template_user.yml | 0 .../{ => import}/templates/template_users.yml | 0 testdata/{ => import}/templates/templates.yml | 0 32 files changed, 311 insertions(+), 49 deletions(-) create mode 100644 export_test.go create mode 100644 testdata/export/basic/setup.sql create mode 100644 testdata/export/basic/truncate.sql rename testdata/{ => import}/basic/basic.yml (100%) rename testdata/{ => import}/basic/schema.sql (100%) rename testdata/{ => import}/bigdata/bigdata.yml (100%) rename testdata/{ => import}/bigdata/schema.sql (100%) rename testdata/{ => import}/bigdata/template_multi_user.yml (100%) rename testdata/{ => import}/dependencies/dependencies.yml (100%) rename testdata/{ => import}/dependencies/schema.sql (100%) rename testdata/{ => import}/enums/enums.yml (100%) rename testdata/{ => import}/enums/schema.sql (100%) rename testdata/{ => import}/enums/template_workspace.yml (100%) rename testdata/{ => import}/enums/validate.sql (100%) rename testdata/{ => import}/faker/faker.yml (100%) rename testdata/{ => import}/faker/schema.sql (100%) rename testdata/{ => import}/faker/validate.sql (100%) rename testdata/{ => import}/multiple_primary_keys/multiple_primary_keys.yml (100%) rename testdata/{ => import}/multiple_primary_keys/schema.sql (100%) rename testdata/{ => import}/real_world_example/real_world_example.yml (100%) rename testdata/{ => import}/real_world_example/schema.sql (100%) rename testdata/{ => import}/real_world_example/template_user.yml (100%) rename testdata/{ => import}/real_world_example/template_workspace.yml (100%) rename testdata/{ => import}/templates/schema.sql (100%) rename testdata/{ => import}/templates/template_user.yml (100%) rename testdata/{ => import}/templates/template_users.yml (100%) rename testdata/{ => import}/templates/templates.yml (100%) diff --git a/.gitignore b/.gitignore index b5ab113..9885b66 100644 --- a/.gitignore +++ b/.gitignore @@ -2,4 +2,4 @@ tmp /ripoff .DS_Store -export +/export diff --git a/cmd/ripoff-export/ripoff_export.go b/cmd/ripoff-export/ripoff_export.go index efedc61..aa79642 100644 --- a/cmd/ripoff-export/ripoff_export.go +++ b/cmd/ripoff-export/ripoff_export.go @@ -4,7 +4,6 @@ import ( "bytes" "context" "fmt" - "log" "log/slog" "os" "path" @@ -76,7 +75,20 @@ func main() { os.Exit(1) } - ripoffFile, err := ripoff.ExportToRipoff(ctx, conn, exportDirectory) + tx, err := conn.Begin(ctx) + if err != nil { + slog.Error("Could not create transaction", errAttr(err)) + os.Exit(1) + } + defer func() { + err = tx.Rollback(ctx) + if err != nil && err != pgx.ErrTxClosed { + slog.Error("Could not rollback transaction", errAttr(err)) + os.Exit(1) + } + }() + + ripoffFile, err := ripoff.ExportToRipoff(ctx, tx) if err != nil { slog.Error("Could not assemble ripoff file from database", errAttr(err)) os.Exit(1) @@ -91,5 +103,9 @@ func main() { os.Exit(1) } - log.Print(ripoffFileBuf.String()) + err = os.WriteFile(path.Join(exportDirectory, "ripoff.yml"), ripoffFileBuf.Bytes(), 0644) + if err != nil { + slog.Error("Could not write ripoff file", errAttr(err)) + os.Exit(1) + } } diff --git a/db.go b/db.go index 41df5b0..1c0d596 100644 --- a/db.go +++ b/db.go @@ -4,6 +4,7 @@ import ( "context" "crypto/sha256" "encoding/binary" + "errors" "fmt" "log/slog" "math/rand" @@ -107,7 +108,7 @@ func GetEnumValues(ctx context.Context, tx pgx.Tx) (EnumValuesResult, error) { } var valueFuncRegex = regexp.MustCompile(`([a-zA-Z]+)\((.*)\)$`) -var referenceRegex = regexp.MustCompile(`^[a-zA-Z0-9_]+:`) +var referenceRegex = regexp.MustCompile(`^[a-zA-Z0-9_]+:[a-zA-Z]+\(`) func prepareValue(rawValue string) (string, error) { valueFuncMatches := valueFuncRegex.FindStringSubmatch(rawValue) @@ -181,32 +182,46 @@ func buildQueryForRow(primaryKeys PrimaryKeysResult, rowId string, row Row, depe if column == "~conflict" { continue } + if column == "~dependencies" { + for _, dependency := range valueRaw.([]interface{}) { + err := dependencyGraph.AddEdge(rowId, dependency.(string)) + if isRealGraphError(err) { + return "", err + } + } + continue + } - // Technically we allow more than strings in ripoff files for templating purposes, + // Technically we allow more than null strings in ripoff files for templating purposes, // but full support (ex: escaping arrays, what to do with maps, etc.) is quite hard so tabling that for now. - value := fmt.Sprint(valueRaw) + if valueRaw == nil { + values = append(values, "NULL") + setStatements = append(setStatements, fmt.Sprintf("%s = %s", pq.QuoteIdentifier(column), "NULL")) + } else { + value := fmt.Sprint(valueRaw) + + // Assume that if a valueFunc is prefixed with a table name, it's a primary/foreign key. + addEdge := referenceRegex.MatchString(value) + // Don't add edges to and from the same row. + if addEdge && rowId != value { + err := dependencyGraph.AddEdge(rowId, value) + if isRealGraphError(err) { + return "", err + } + } - // Assume that if a valueFunc is prefixed with a table name, it's a primary/foreign key. - addEdge := referenceRegex.MatchString(value) - // Don't add edges to and from the same row. - if addEdge && rowId != value { - err := dependencyGraph.AddEdge(rowId, value) + columns = append(columns, pq.QuoteIdentifier(column)) + valuePrepared, err := prepareValue(value) if err != nil { return "", err } + // Assume this column is the primary key. + if rowId == value && onConflictColumn == "" { + onConflictColumn = pq.QuoteIdentifier(column) + } + values = append(values, pq.QuoteLiteral(valuePrepared)) + setStatements = append(setStatements, fmt.Sprintf("%s = %s", pq.QuoteIdentifier(column), pq.QuoteLiteral(valuePrepared))) } - - columns = append(columns, pq.QuoteIdentifier(column)) - valuePrepared, err := prepareValue(value) - if err != nil { - return "", err - } - // Assume this column is the primary key. - if rowId == value && onConflictColumn == "" { - onConflictColumn = pq.QuoteIdentifier(column) - } - values = append(values, pq.QuoteLiteral(valuePrepared)) - setStatements = append(setStatements, fmt.Sprintf("%s = %s", pq.QuoteIdentifier(column), pq.QuoteLiteral(valuePrepared))) } if onConflictColumn == "" { @@ -306,6 +321,7 @@ type ForeignKeyResultTable struct { ForeignKeys map[string]*ForeignKey } +// Map of table name to foreign keys. type ForeignKeysResult map[string]*ForeignKeyResultTable func getForeignKeysResult(ctx context.Context, conn pgx.Tx) (ForeignKeysResult, error) { @@ -354,3 +370,10 @@ func getForeignKeysResult(ctx context.Context, conn pgx.Tx) (ForeignKeysResult, return result, nil } + +func isRealGraphError(err error) bool { + if err == nil || errors.Is(err, graph.ErrEdgeAlreadyExists) { + return false + } + return true +} diff --git a/db_test.go b/db_test.go index 2e37c0f..781b905 100644 --- a/db_test.go +++ b/db_test.go @@ -70,7 +70,7 @@ func TestRipoff(t *testing.T) { defer conn.Close(ctx) _, filename, _, _ := runtime.Caller(0) - dir := path.Join(path.Dir(filename), "testdata") + dir := path.Join(path.Dir(filename), "testdata", "import") dirEntry, err := os.ReadDir(dir) require.NoError(t, err) diff --git a/export.go b/export.go index 1009c08..d564c19 100644 --- a/export.go +++ b/export.go @@ -3,23 +3,26 @@ package ripoff import ( "context" "fmt" + "slices" "strings" "github.com/jackc/pgx/v5" "github.com/lib/pq" ) -func ExportToRipoff(ctx context.Context, conn *pgx.Conn, path string) (RipoffFile, error) { +type RowMissingDependency struct { + Row Row + ToTable string + ToColumn string + UniqueValue string +} + +// Exports all rows in the database to a ripoff file. +func ExportToRipoff(ctx context.Context, tx pgx.Tx) (RipoffFile, error) { ripoffFile := RipoffFile{ Rows: map[string]Row{}, } - tx, err := conn.Begin(ctx) - if err != nil { - return ripoffFile, err - } - defer tx.Rollback(ctx) - primaryKeyResult, err := getPrimaryKeys(ctx, tx) if err != nil { return ripoffFile, err @@ -28,19 +31,33 @@ func ExportToRipoff(ctx context.Context, conn *pgx.Conn, path string) (RipoffFil if err != nil { return ripoffFile, err } - // Assemble an easier to parse [table,column] -> table map for single column foreign keys. - singleColumnFkeyMap := map[[2]string]string{} + // A map from [table,column] -> ForeignKey for single column foreign keys. + singleColumnFkeyMap := map[[2]string]*ForeignKey{} + // A map from [table,column] -> a map of column values to row keys (ex: users:literal(1)) of the given table + uniqueConstraintMap := map[[2]string]map[string]string{} + // A map from table to a list of columns that need mapped in uniqueConstraintMap. + hasUniqueConstraintMap := map[string][]string{} for table, tableInfo := range foreignKeyResult { for _, foreignKey := range tableInfo.ForeignKeys { - if len(foreignKey.ColumnConditions) == 1 { - singleColumnFkeyMap[[2]string{table, foreignKey.ColumnConditions[0][0]}] = foreignKey.ToTable + if len(foreignKey.ColumnConditions) != 1 { + continue + } + singleColumnFkeyMap[[2]string{table, foreignKey.ColumnConditions[0][0]}] = foreignKey + // This is a foreign key to a unique index, not a primary key. + if len(primaryKeyResult[foreignKey.ToTable]) == 1 && primaryKeyResult[foreignKey.ToTable][0] != foreignKey.ColumnConditions[0][1] { + _, ok := hasUniqueConstraintMap[foreignKey.ToTable] + if !ok { + hasUniqueConstraintMap[foreignKey.ToTable] = []string{} + } + uniqueConstraintMap[[2]string{foreignKey.ToTable, foreignKey.ColumnConditions[0][1]}] = map[string]string{} + hasUniqueConstraintMap[foreignKey.ToTable] = append(hasUniqueConstraintMap[foreignKey.ToTable], foreignKey.ColumnConditions[0][1]) } } } + + missingDependencies := []RowMissingDependency{} + for table, primaryKeys := range primaryKeyResult { - if len(primaryKeys) != 1 { - return RipoffFile{}, fmt.Errorf("multiple primary keys are not supported in exports yet, abort on table: %s", table) - } columns := make([]string, len(foreignKeyResult[table].Columns)) for i, column := range foreignKeyResult[table].Columns { columns[i] = fmt.Sprintf("CAST(%s AS TEXT)", pq.QuoteIdentifier(column)) @@ -53,29 +70,88 @@ func ExportToRipoff(ctx context.Context, conn *pgx.Conn, path string) (RipoffFil defer rows.Close() fields := rows.FieldDescriptions() for rows.Next() { - columns, err := rows.Values() + columnsRaw, err := rows.Values() if err != nil { return RipoffFile{}, err } + columns := make([]*string, len(columnsRaw)) + for i, column := range columnsRaw { + if column == nil { + columns[i] = nil + } else { + str := column.(string) + columns[i] = &str + } + } ripoffRow := Row{} - var id any + ids := []string{} for i, field := range fields { + // + if columns[i] == nil { + ripoffRow[field.Name] = nil + continue + } + columnVal := *columns[i] + // Note: the order here + if slices.Contains(primaryKeys, field.Name) { + ids = append(ids, columnVal) + } // No need to export primary keys due to inference from schema. - if primaryKeys[0] == field.Name { - id = columns[i] + if len(primaryKeys) == 1 && primaryKeys[0] == field.Name { continue } // If this is a foreign key, should ensure it uses the table:valueFunc() format. - toTable, isFkey := singleColumnFkeyMap[[2]string{table, field.Name}] - if isFkey { - ripoffRow[field.Name] = fmt.Sprintf("%s:literal(%s)", toTable, columns[i]) - continue + foreignKey, isFkey := singleColumnFkeyMap[[2]string{table, field.Name}] + if isFkey && columnVal != "" { + // Does the referenced table have more than one primary key, or does the constraint not point to a primary key? + // Then is a foreign key to a non-primary key, we need to fill this info in later. + if len(primaryKeyResult[foreignKey.ToTable]) != 1 || primaryKeyResult[foreignKey.ToTable][0] != foreignKey.ColumnConditions[0][1] { + missingDependencies = append(missingDependencies, RowMissingDependency{ + Row: ripoffRow, + UniqueValue: columnVal, + ToTable: foreignKey.ToTable, + ToColumn: foreignKey.ColumnConditions[0][1], + }) + } else { + ripoffRow[field.Name] = fmt.Sprintf("%s:literal(%s)", foreignKey.ToTable, columnVal) + continue + } } // Normal column. - ripoffRow[field.Name] = columns[i] + ripoffRow[field.Name] = columnVal } - ripoffFile.Rows[fmt.Sprintf("%s:literal(%s)", table, id)] = ripoffRow + rowKey := fmt.Sprintf("%s:literal(%s)", table, strings.Join(ids, ".")) + // For foreign keys to non-unique fields, we need to maintain our own map of unique values to rowKeys. + columnsThatNeepMapped, needsMapped := hasUniqueConstraintMap[table] + if needsMapped { + for i, field := range fields { + if columns[i] == nil { + continue + } + columnVal := *columns[i] + if slices.Contains(columnsThatNeepMapped, field.Name) { + uniqueConstraintMap[[2]string{table, field.Name}][columnVal] = rowKey + } + } + } + ripoffFile.Rows[rowKey] = ripoffRow + } + } + // Resolve missing dependencies now that all rows are in memory. + for _, missingDependency := range missingDependencies { + valueMap, ok := uniqueConstraintMap[[2]string{missingDependency.ToTable, missingDependency.ToColumn}] + if !ok { + return ripoffFile, fmt.Errorf("row has dependency on column %s.%s which is not mapped", missingDependency.ToTable, missingDependency.ToColumn) + } + rowKey, ok := valueMap[missingDependency.UniqueValue] + if !ok { + return ripoffFile, fmt.Errorf("row has dependency on column %s.%s which does not contain unqiue value %s", missingDependency.ToTable, missingDependency.ToColumn, missingDependency.UniqueValue) + } + dependencies, ok := missingDependency.Row["~dependencies"].([]string) + if !ok { + missingDependency.Row["~dependencies"] = []string{} } + missingDependency.Row["~dependencies"] = append(dependencies, rowKey) } return ripoffFile, nil } diff --git a/export_test.go b/export_test.go new file mode 100644 index 0000000..ce59c09 --- /dev/null +++ b/export_test.go @@ -0,0 +1,78 @@ +package ripoff + +import ( + "context" + "fmt" + "os" + "path" + "runtime" + "strings" + "testing" + + "github.com/jackc/pgx/v5" + "github.com/lib/pq" + "github.com/stretchr/testify/require" +) + +func runExportTestData(t *testing.T, ctx context.Context, tx pgx.Tx, testDir string) { + // Set up schema and initial rows. + setupFile, err := os.ReadFile(path.Join(testDir, "setup.sql")) + require.NoError(t, err) + _, err = tx.Exec(ctx, string(setupFile)) + require.NoError(t, err) + // Generate new ripoff file. + ripoffFile, err := ExportToRipoff(ctx, tx) + require.NoError(t, err) + // Wipe database. + truncateFile, err := os.ReadFile(path.Join(testDir, "truncate.sql")) + require.NoError(t, err) + _, err = tx.Exec(ctx, string(truncateFile)) + require.NoError(t, err) + // Run generated ripoff. + err = RunRipoff(ctx, tx, ripoffFile) + require.NoError(t, err) + // Try to verify that the number of generated rows matches the ripoff. + tableCount := map[string]int{} + for rowId := range ripoffFile.Rows { + tableName := strings.Split(rowId, ":") + if len(tableName) > 0 { + tableCount[tableName[0]]++ + } + } + for tableName, expectedCount := range tableCount { + row := tx.QueryRow(ctx, fmt.Sprintf("SELECT COUNT(*) FROM %s;", pq.QuoteIdentifier(tableName))) + var realCount int + err := row.Scan(&realCount) + require.NoError(t, err) + require.Equal(t, expectedCount, realCount) + } +} + +func TestRipoffExport(t *testing.T) { + envUrl := os.Getenv("RIPOFF_TEST_DATABASE_URL") + if envUrl == "" { + envUrl = "postgres:///ripoff-test-db" + } + ctx := context.Background() + conn, err := pgx.Connect(ctx, envUrl) + if err != nil { + require.NoError(t, err) + } + defer conn.Close(ctx) + + _, filename, _, _ := runtime.Caller(0) + dir := path.Join(path.Dir(filename), "testdata", "export") + dirEntry, err := os.ReadDir(dir) + require.NoError(t, err) + + for _, e := range dirEntry { + if !e.IsDir() { + continue + } + tx, err := conn.Begin(ctx) + require.NoError(t, err) + runExportTestData(t, ctx, tx, path.Join(dir, e.Name())) + err = tx.Rollback(ctx) + require.NoError(t, err) + } +} diff --git a/testdata/export/basic/setup.sql b/testdata/export/basic/setup.sql new file mode 100644 index 0000000..2c2c157 --- /dev/null +++ b/testdata/export/basic/setup.sql @@ -0,0 +1,68 @@ +CREATE TABLE avatars ( + id UUID NOT NULL PRIMARY KEY, + url TEXT NOT NULL +); + +CREATE TABLE avatar_modifiers ( + id UUID NOT NULL PRIMARY KEY REFERENCES avatars, + grayscale BOOLEAN NOT NULL +); + +CREATE TABLE roles ( + id UUID NOT NULL PRIMARY KEY, + name TEXT NOT NULL +); + +ALTER TABLE roles ADD CONSTRAINT unique_roles_name UNIQUE (name); + +CREATE TABLE employees ( + id BIGSERIAL NOT NULL PRIMARY KEY, + role TEXT NOT NULL +); + +-- We are foreign keying to a non primary key. Tricky! +ALTER TABLE employees + ADD CONSTRAINT fk_employees_roles + FOREIGN KEY (role) REFERENCES roles (name); + +CREATE TABLE users ( + id UUID NOT NULL PRIMARY KEY, + avatar_id UUID NOT NULL REFERENCES avatars, + email TEXT NOT NULL, + employee_id BIGSERIAL NOT NULL REFERENCES employees +); + +INSERT INTO avatars + (id, url) + VALUES + ('09af5166-a1ed-11ef-b864-0242ac120002', 'first.png'), + ('0cf7650c-a1ed-11ef-b864-0242ac120002', 'second.png'), + ('184e5e10-a1ed-11ef-b864-0242ac120002', 'third.png'); + +INSERT INTO avatar_modifiers + (id, grayscale) + VALUES + ('09af5166-a1ed-11ef-b864-0242ac120002', FALSE), + ('0cf7650c-a1ed-11ef-b864-0242ac120002', TRUE), + ('184e5e10-a1ed-11ef-b864-0242ac120002', FALSE); + +INSERT INTO roles + (id, name) + VALUES + (gen_random_uuid(), 'Boss'), + (gen_random_uuid(), 'Mini Boss'), + (gen_random_uuid(), 'Minion'); + +INSERT INTO employees + (id, role) + VALUES + (1, 'Boss'), + (2, 'Mini Boss'), + (3, 'Minion'); + +INSERT INTO users + (id, avatar_id, email, employee_id) + VALUES + ('448e6222-a1ed-11ef-b864-0242ac120002', '09af5166-a1ed-11ef-b864-0242ac120002', 'first@example.com', 1), + ('459a966e-a1f1-11ef-b864-0242ac120002', '0cf7650c-a1ed-11ef-b864-0242ac120002', 'second@example.com', 2), + ('4848cf02-a1f1-11ef-b864-0242ac120002', '184e5e10-a1ed-11ef-b864-0242ac120002', 'third@example.com', 3); diff --git a/testdata/export/basic/truncate.sql b/testdata/export/basic/truncate.sql new file mode 100644 index 0000000..0cd74dd --- /dev/null +++ b/testdata/export/basic/truncate.sql @@ -0,0 +1 @@ +TRUNCATE TABLE users, avatar_modifiers, avatars, employees CASCADE; diff --git a/testdata/basic/basic.yml b/testdata/import/basic/basic.yml similarity index 100% rename from testdata/basic/basic.yml rename to testdata/import/basic/basic.yml diff --git a/testdata/basic/schema.sql b/testdata/import/basic/schema.sql similarity index 100% rename from testdata/basic/schema.sql rename to testdata/import/basic/schema.sql diff --git a/testdata/bigdata/bigdata.yml b/testdata/import/bigdata/bigdata.yml similarity index 100% rename from testdata/bigdata/bigdata.yml rename to testdata/import/bigdata/bigdata.yml diff --git a/testdata/bigdata/schema.sql b/testdata/import/bigdata/schema.sql similarity index 100% rename from testdata/bigdata/schema.sql rename to testdata/import/bigdata/schema.sql diff --git a/testdata/bigdata/template_multi_user.yml b/testdata/import/bigdata/template_multi_user.yml similarity index 100% rename from testdata/bigdata/template_multi_user.yml rename to testdata/import/bigdata/template_multi_user.yml diff --git a/testdata/dependencies/dependencies.yml b/testdata/import/dependencies/dependencies.yml similarity index 100% rename from testdata/dependencies/dependencies.yml rename to testdata/import/dependencies/dependencies.yml diff --git a/testdata/dependencies/schema.sql b/testdata/import/dependencies/schema.sql similarity index 100% rename from testdata/dependencies/schema.sql rename to testdata/import/dependencies/schema.sql diff --git a/testdata/enums/enums.yml b/testdata/import/enums/enums.yml similarity index 100% rename from testdata/enums/enums.yml rename to testdata/import/enums/enums.yml diff --git a/testdata/enums/schema.sql b/testdata/import/enums/schema.sql similarity index 100% rename from testdata/enums/schema.sql rename to testdata/import/enums/schema.sql diff --git a/testdata/enums/template_workspace.yml b/testdata/import/enums/template_workspace.yml similarity index 100% rename from testdata/enums/template_workspace.yml rename to testdata/import/enums/template_workspace.yml diff --git a/testdata/enums/validate.sql b/testdata/import/enums/validate.sql similarity index 100% rename from testdata/enums/validate.sql rename to testdata/import/enums/validate.sql diff --git a/testdata/faker/faker.yml b/testdata/import/faker/faker.yml similarity index 100% rename from testdata/faker/faker.yml rename to testdata/import/faker/faker.yml diff --git a/testdata/faker/schema.sql b/testdata/import/faker/schema.sql similarity index 100% rename from testdata/faker/schema.sql rename to testdata/import/faker/schema.sql diff --git a/testdata/faker/validate.sql b/testdata/import/faker/validate.sql similarity index 100% rename from testdata/faker/validate.sql rename to testdata/import/faker/validate.sql diff --git a/testdata/multiple_primary_keys/multiple_primary_keys.yml b/testdata/import/multiple_primary_keys/multiple_primary_keys.yml similarity index 100% rename from testdata/multiple_primary_keys/multiple_primary_keys.yml rename to testdata/import/multiple_primary_keys/multiple_primary_keys.yml diff --git a/testdata/multiple_primary_keys/schema.sql b/testdata/import/multiple_primary_keys/schema.sql similarity index 100% rename from testdata/multiple_primary_keys/schema.sql rename to testdata/import/multiple_primary_keys/schema.sql diff --git a/testdata/real_world_example/real_world_example.yml b/testdata/import/real_world_example/real_world_example.yml similarity index 100% rename from testdata/real_world_example/real_world_example.yml rename to testdata/import/real_world_example/real_world_example.yml diff --git a/testdata/real_world_example/schema.sql b/testdata/import/real_world_example/schema.sql similarity index 100% rename from testdata/real_world_example/schema.sql rename to testdata/import/real_world_example/schema.sql diff --git a/testdata/real_world_example/template_user.yml b/testdata/import/real_world_example/template_user.yml similarity index 100% rename from testdata/real_world_example/template_user.yml rename to testdata/import/real_world_example/template_user.yml diff --git a/testdata/real_world_example/template_workspace.yml b/testdata/import/real_world_example/template_workspace.yml similarity index 100% rename from testdata/real_world_example/template_workspace.yml rename to testdata/import/real_world_example/template_workspace.yml diff --git a/testdata/templates/schema.sql b/testdata/import/templates/schema.sql similarity index 100% rename from testdata/templates/schema.sql rename to testdata/import/templates/schema.sql diff --git a/testdata/templates/template_user.yml b/testdata/import/templates/template_user.yml similarity index 100% rename from testdata/templates/template_user.yml rename to testdata/import/templates/template_user.yml diff --git a/testdata/templates/template_users.yml b/testdata/import/templates/template_users.yml similarity index 100% rename from testdata/templates/template_users.yml rename to testdata/import/templates/template_users.yml diff --git a/testdata/templates/templates.yml b/testdata/import/templates/templates.yml similarity index 100% rename from testdata/templates/templates.yml rename to testdata/import/templates/templates.yml From 3801b123f300187cd9b77a66cc97810ab6b82270 Mon Sep 17 00:00:00 2001 From: Sam Mortenson Date: Thu, 14 Nov 2024 11:53:11 -0800 Subject: [PATCH 3/3] Added some docs --- README.md | 14 +++++++++++++- db.go | 18 ++++++++++++++++-- export.go | 13 +++++++++---- 3 files changed, 38 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index f052026..7f1fef6 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ rows: For more (sometimes wildly complex) examples, see `./testdata`. -## More on valueFuncs and row keys +## More on valueFuncs valueFuncs allow you to generate random data that's seeded with a static string. This ensures that repeat runs of ripoff are deterministic, which enables upserts (consistent primary keys). @@ -90,6 +90,18 @@ rows: - `rowId` - The map key of the row using this template, ex `users:uuid(fooBar)`. Useful for allowing the "caller" to provide their own ID for the "main" row being created, if there is one. Optional to use if you find it awkward. - `enums` - A map of SQL enums names to an array of enum values. Useful for creating one row for each value of an enum (ex: each user role). +# Export from your database to ripoff files + +An experimental command has been added to generate ripoff files from your database. This may be useful to users just starting to use ripoff who don't have so much fake data that templating is required yet. + +Currently, it attempts to export all data from all tables into a single ripoff file. In the future flags may be added to allow you to include/exclude tables, add arbitrary `WHERE` conditions, modify the row id/key, export multiple files, or use existing templates. + +## Installation + +1. Run `go install github.com/mortenson/ripoff/cmd/ripoff-export@latest` +2. Set the `DATABASE_URL` env variable to your local PostgreSQL database +3. Run `ripoff-export ` + # Security This project explicitly allows SQL injection due to the way queries are constructed. Do not run `ripoff` on directories you do not trust. diff --git a/db.go b/db.go index 1c0d596..47b5ca3 100644 --- a/db.go +++ b/db.go @@ -182,9 +182,23 @@ func buildQueryForRow(primaryKeys PrimaryKeysResult, rowId string, row Row, depe if column == "~conflict" { continue } + // Explicit dependencies, for foreign keys to non-primary keys. if column == "~dependencies" { - for _, dependency := range valueRaw.([]interface{}) { - err := dependencyGraph.AddEdge(rowId, dependency.(string)) + dependencies := []string{} + switch v := valueRaw.(type) { + // Coming from yaml + case []interface{}: + for _, curr := range v { + dependencies = append(dependencies, curr.(string)) + } + // Coming from Go, probably a test + case []string: + dependencies = v + default: + return "", fmt.Errorf("cannot parse ~dependencies value in row %s", rowId) + } + for _, dependency := range dependencies { + err := dependencyGraph.AddEdge(rowId, dependency) if isRealGraphError(err) { return "", err } diff --git a/export.go b/export.go index d564c19..72e92ae 100644 --- a/export.go +++ b/export.go @@ -23,22 +23,25 @@ func ExportToRipoff(ctx context.Context, tx pgx.Tx) (RipoffFile, error) { Rows: map[string]Row{}, } + // We use primary keys to determine what columns to use as row keys. primaryKeyResult, err := getPrimaryKeys(ctx, tx) if err != nil { return ripoffFile, err } + // We use foreign keys to reference other rows using the table_name:literal(...) syntax. foreignKeyResult, err := getForeignKeysResult(ctx, tx) if err != nil { return ripoffFile, err } // A map from [table,column] -> ForeignKey for single column foreign keys. singleColumnFkeyMap := map[[2]string]*ForeignKey{} - // A map from [table,column] -> a map of column values to row keys (ex: users:literal(1)) of the given table + // A map from [table,column] -> a map of column values to row keys (ex: users:literal(1)) of the given table. uniqueConstraintMap := map[[2]string]map[string]string{} // A map from table to a list of columns that need mapped in uniqueConstraintMap. hasUniqueConstraintMap := map[string][]string{} for table, tableInfo := range foreignKeyResult { for _, foreignKey := range tableInfo.ForeignKeys { + // We could possibly maintain a uniqueConstraintMap map for these as well, but tabling for now. if len(foreignKey.ColumnConditions) != 1 { continue } @@ -59,6 +62,7 @@ func ExportToRipoff(ctx context.Context, tx pgx.Tx) (RipoffFile, error) { for table, primaryKeys := range primaryKeyResult { columns := make([]string, len(foreignKeyResult[table].Columns)) + // Due to yaml limitations, ripoff treats all data as nullable text on import and export. for i, column := range foreignKeyResult[table].Columns { columns[i] = fmt.Sprintf("CAST(%s AS TEXT)", pq.QuoteIdentifier(column)) } @@ -74,6 +78,7 @@ func ExportToRipoff(ctx context.Context, tx pgx.Tx) (RipoffFile, error) { if err != nil { return RipoffFile{}, err } + // Convert the columns to nullable strings. columns := make([]*string, len(columnsRaw)) for i, column := range columnsRaw { if column == nil { @@ -86,17 +91,17 @@ func ExportToRipoff(ctx context.Context, tx pgx.Tx) (RipoffFile, error) { ripoffRow := Row{} ids := []string{} for i, field := range fields { - // + // Null columns are still exported since we don't know if there is a default or not (at least not at time of writing). if columns[i] == nil { ripoffRow[field.Name] = nil continue } columnVal := *columns[i] - // Note: the order here + // Note: for multi-column primary keys this is ugly. if slices.Contains(primaryKeys, field.Name) { ids = append(ids, columnVal) } - // No need to export primary keys due to inference from schema. + // No need to export primary keys due to inference from schema on import. if len(primaryKeys) == 1 && primaryKeys[0] == field.Name { continue }