Compare commits

...

44 commits

Author SHA1 Message Date
Simon Vieille 39e0e4344f
add/update readme
All checks were successful
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build Pipeline was successful
2024-04-01 16:31:32 +02:00
Simon Vieille ad953fc634
add/update readme
All checks were successful
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build Pipeline was successful
2024-04-01 15:51:18 +02:00
Simon Vieille e97b5726bc
add/update readme
Some checks failed
ci/woodpecker/push/build unknown status
ci/woodpecker/push/test Pipeline failed
2024-04-01 15:50:53 +02:00
Simon Vieille baf348227d
apply prettier
All checks were successful
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build Pipeline was successful
2024-04-01 15:46:24 +02:00
Simon Vieille 312c92d56f
add/update readme
Some checks failed
ci/woodpecker/push/build unknown status
ci/woodpecker/push/test Pipeline failed
2024-04-01 15:45:34 +02:00
Simon Vieille 00ba7f4af9
add/update readme
All checks were successful
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build Pipeline was successful
2024-04-01 15:44:01 +02:00
Simon Vieille b3ef84265d
add/update readme
All checks were successful
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build Pipeline was successful
2024-04-01 15:31:20 +02:00
Simon Vieille c9abeb8da8
add/update readme
All checks were successful
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build Pipeline was successful
2024-04-01 15:00:23 +02:00
Simon Vieille 571c65fd56
remove tests
All checks were successful
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build Pipeline was successful
2024-04-01 12:31:03 +02:00
Simon Vieille a7cd0634ef
add postgres column type check
add specific method to handle named parameters
2024-04-01 12:30:45 +02:00
Simon Vieille 2ba8561574
tests: add mysql/postgres datas 2024-04-01 12:29:51 +02:00
Simon Vieille 77a87114c8
remove boolean_boolint faker 2024-04-01 12:29:21 +02:00
Simon Vieille 12570ec571
update example file
All checks were successful
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build Pipeline was successful
2024-03-31 22:32:30 +02:00
Simon Vieille e6b5d2160a
tests: add pg in ci
All checks were successful
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build Pipeline was successful
2024-03-31 22:28:45 +02:00
Simon Vieille 9c4b7ea525
tests: add pg in ci
Some checks failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build unknown status
2024-03-31 22:26:28 +02:00
Simon Vieille 5e3145a236
tests: add pg in ci
Some checks failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build unknown status
2024-03-31 22:25:27 +02:00
Simon Vieille 55d7b80976
tests: add pg in ci
Some checks failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build unknown status
2024-03-31 22:24:00 +02:00
Simon Vieille 0a1e8ffda5
tests: update mysql password in ci
Some checks failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build unknown status
2024-03-31 22:22:38 +02:00
Simon Vieille 11b676446d
add postgres dump for tests
Some checks failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build unknown status
2024-03-31 22:20:30 +02:00
Simon Vieille 801b11e33f
add schema for tests 2024-03-31 22:20:19 +02:00
Simon Vieille 40d474b6eb
fix tests for app 2024-03-31 22:19:58 +02:00
Simon Vieille 83682511b3
add IsInterger property in Data struct
add tests
2024-03-31 22:19:32 +02:00
Simon Vieille 60cbaf1530
add IsInterger property in Data struct
add tests
2024-03-31 22:19:15 +02:00
Simon Vieille 5be7927799
fix dsn when when th database is postgresql 2024-03-31 22:18:28 +02:00
Simon Vieille 038d0a2d6b use integer when required in queries 2024-03-31 22:17:07 +02:00
Simon Vieille 16bdb470dc
tests: update mysql password 2024-03-31 22:16:28 +02:00
Simon Vieille ae77b65281
[wip] add run tests
Some checks failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build unknown status
2024-03-20 16:21:37 +01:00
Simon Vieille 9440835453
[wip] add run tests
Some checks failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build unknown status
2024-03-20 16:14:26 +01:00
Simon Vieille 3405d3b60d
[wip] add run tests
Some checks failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build unknown status
2024-03-20 16:12:11 +01:00
Simon Vieille 1c9cf2629c
[wip] add run tests
All checks were successful
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build Pipeline was successful
2024-03-20 16:04:04 +01:00
Simon Vieille 5c8c8a0037
[wip] add run tests
Some checks failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build unknown status
2024-03-20 14:47:53 +01:00
Simon Vieille 61eccc280f
[wip] add run tests
Some checks failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build unknown status
2024-03-20 14:23:46 +01:00
Simon Vieille d10b039d00
[wip] add run tests
Some checks failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build unknown status
2024-03-20 14:19:05 +01:00
Simon Vieille 0b598ef0b2
[wip] add run tests 2024-03-20 14:18:07 +01:00
Simon Vieille c83aab5f94
fix ci typo
Some checks failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build unknown status
2024-03-20 14:11:23 +01:00
Simon Vieille 831fc3b287
fix ci typo 2024-03-20 14:11:01 +01:00
Simon Vieille a5fc2071da
[wip] add run tests 2024-03-20 14:10:02 +01:00
Simon Vieille 1c6f792711
configuration of ci 2024-03-20 14:09:43 +01:00
Simon Vieille 0c2015ad7a
add ci depends
All checks were successful
ci/woodpecker/push/test Pipeline was successful
ci/woodpecker/push/build Pipeline was successful
2024-03-20 12:34:14 +01:00
Simon Vieille 7be1382051
add ci
Some checks failed
ci/woodpecker/push/test Pipeline failed
ci/woodpecker/push/build Pipeline failed
2024-03-20 12:32:57 +01:00
Simon Vieille 2081937a24
add tests 2024-03-20 12:26:46 +01:00
Simon Vieille 2db8cc1087
add tests 2024-03-20 11:48:12 +01:00
Simon Vieille f4946b3c08
add tests 2024-03-20 11:42:28 +01:00
Simon Vieille b547166c83
add table escape 2024-03-20 11:42:18 +01:00
19 changed files with 676 additions and 44 deletions

24
.woodpecker/build.yml Normal file
View file

@ -0,0 +1,24 @@
when:
- event: [pull_request, tag]
- event: push
branch:
- ${CI_REPO_DEFAULT_BRANCH}
- release/*
- renovate/*
depends_on:
- test
variables:
- &golang_image 'golang:1.22.0'
steps:
"Add vendor":
image: *golang_image
commands:
- go mod vendor
"Run build":
image: *golang_image
commands:
- go build

54
.woodpecker/test.yml Normal file
View file

@ -0,0 +1,54 @@
when:
- event: [pull_request, tag]
- event: push
branch:
- ${CI_REPO_DEFAULT_BRANCH}
- release/*
- renovate/*
variables:
- &golang_image 'golang:1.22.0'
- &mysql_image 'mariadb:10.3'
- &postgres_image 'postgres:16'
steps:
"Add vendor":
image: *golang_image
commands:
- go mod vendor
"Wait databases":
image: gitnet.fr/deblan/timeout:latest
commands:
- /bin/timeout -t 30 -v -c 'while true; do nc -z -v service-mysql 3306 2>&1 | grep succeeded && exit 0; sleep 0.5; done'
- /bin/timeout -t 30 -v -c 'while true; do nc -z -v service-postgres 5432 2>&1 | grep succeeded && exit 0; sleep 0.5; done'
"Fill MySQL":
image: *mysql_image
commands:
- mysql -hservice-mysql -uroot -proot test < tests/mysql_data.sql
"Fill PostgresSQL":
image: *postgres_image
commands:
- psql -U postgres -h service-postgres -d test < tests/postgres_data.sql
"Run tests":
image: *golang_image
commands:
- go test -v ./...
services:
service-postgres:
image: *postgres_image
environment:
- POSTGRES_USER=postgres
- POSTGRES_PASSWORD=postgres
- POSTGRES_DB=test
- POSTGRES_HOST_AUTH_METHOD=trust
service-mysql:
image: *mysql_image
environment:
- MYSQL_DATABASE=test
- MARIADB_ROOT_PASSWORD=root

53
README.fr.md Normal file
View file

@ -0,0 +1,53 @@
# Database Anonimizer
**Database Anonimizer** est un outil écrit en GO et qui permet **d'anonymiser ou supprimer des données** d'une base de données **MySQL** ou **PostgreSQL**.
Il répond à plusieurs cas d'usags comme le **permettre de transférer une copie de base de données anonymisée à des développeurs et des développeuses** ou répondre à la nécessité d'**anonymiser ou supprimer des données dans le cadre du RGPD** (Règlement général sur la protection des données) selon des durées de conservation définies dans un registre de traitement.
Le projet inclue une grande quantité de générateurs de données aléatoires. Il permet également de générer des données via des modèles écrits en Twig. Vous pouvez spécifier des règles précises pour chaque tables ou bien des règles globales appliquées sur chacunes des tables de votre configration.
## Usage
### Configuration
La configuration est écrite en YAML. Voici un exemple complet :
```
rules:
columns:
phone: phone_e164number
generators:
person_name: [display_name]
actions:
- table: user
virtual_columns:
domain: internet_domain
columns:
firstname: person_firstname
lastname: person_lastname
email: "{{ (firstname ~ '.' ~ lastname ~ '@' ~ domain)|lower }}"
- table: company
columns:
name: company_name
- table: access_log
query: 'select * from access_log where date < (NOW() - INTERVAL 6 MONTH)'
delete: true
- table: user_ip
primary_key: [user_id, ip_id]
delete: true
```
### Exécution
Pour afficher l'aide, utiliser `-h` :
```
database-anonymizer -h
```
Voici des exemples pour MySQL et PostgreSQL :
```
database-anonymizer --dsn "mysql://username:password@tcp(db_host)/db_name" --schema ./schema.yaml
database-anonymizer --dsn "postgres://postgres:postgres@localhost:5432/test" --schema ./schema.yaml
```

53
README.md Normal file
View file

@ -0,0 +1,53 @@
# Database Anonimizer
**Database Anonymizer** is a tool written in GO that allows **anonymizing or deleting data from a MySQL or PostgreSQL database**.
It caters to various use cases such as **providing developers with an anonymized copy of a database** or **fulfilling the need to anonymize or delete data in compliance with GDPR (General Data Protection Regulation) requirements**, based on retention periods defined in the treatment register.
The project includes a vast array of fakers. It also enables data generation via Twig-written templates. You can specify precise rules for each table or global rules applied to all tables in your configuration.
## Usage
### Configuration
The configuration is written in YAML. Here's a complete example:
```
rules:
columns:
phone: phone_e164number
generators:
person_name: [display_name]
actions:
- table: user
virtual_columns:
domain: internet_domain
columns:
firstname: person_firstname
lastname: person_lastname
email: "{{ (firstname ~ '.' ~ lastname ~ '@' ~ domain)|lower }}"
- table: company
columns:
name: company_name
- table: access_log
query: 'select * from access_log where date < (NOW() - INTERVAL 6 MONTH)'
delete: true
- table: user_ip
primary_key: [user_id, ip_id]
delete: true
```
### Exécution
To display help, use `-h`:
```
database-anonymizer -h
```
Here are examples for MySQL and PostgreSQL:
```
database-anonymizer --dsn "mysql://username:password@tcp(db_host)/db_name" --schema ./schema.yaml
database-anonymizer --dsn "postgres://postgres:postgres@localhost:5432/test" --schema ./schema.yaml
```

View file

@ -5,11 +5,8 @@ import (
"errors"
"fmt"
// "os"
"strconv"
"strings"
nq "github.com/Knetic/go-namedParameterQuery"
"gitnet.fr/deblan/database-anonymizer/config"
"gitnet.fr/deblan/database-anonymizer/data"
"gitnet.fr/deblan/database-anonymizer/database"
@ -19,12 +16,19 @@ import (
type App struct {
Db *sql.DB
DbConfig config.DatabaseConfig
FakeManager faker.FakeManager
}
func (a *App) Run(db *sql.DB, c config.SchemaConfig, fakeManager faker.FakeManager) error {
func (a *App) Run(
db *sql.DB,
c config.SchemaConfig,
fakeManager faker.FakeManager,
dbc config.DatabaseConfig,
) error {
a.Db = db
a.FakeManager = fakeManager
a.DbConfig = dbc
for _, data := range c.Rules.Actions {
err := a.DoAction(data, c.Rules.Columns, c.Rules.Generators)
@ -39,39 +43,43 @@ func (a *App) Run(db *sql.DB, c config.SchemaConfig, fakeManager faker.FakeManag
func (a *App) TruncateTable(c config.SchemaConfigAction) error {
if c.Query == "" {
_, err := a.Db.Exec(fmt.Sprintf("TRUNCATE %s", c.Table))
_, err := a.Db.Exec(fmt.Sprintf("TRUNCATE %s", database.EscapeTable(a.DbConfig.Type, c.Table)))
return err
}
query := a.CreateSelectQuery(c)
rows := database.GetRows(a.Db, query)
rows := database.GetRows(a.Db, query, c.Table, a.DbConfig.Type)
var scan any
for _, row := range rows {
pkeys := []string{}
pCounter := 1
values := make(map[int]string)
for _, col := range c.PrimaryKey {
pkeys = append(pkeys, fmt.Sprintf("%s=:p%s", col, strconv.Itoa(pCounter)))
pCounter = pCounter + 1
if !row[col].IsString {
value := row[col]
pkeys = append(pkeys, fmt.Sprintf("%s=%s", col, value.FinalValue()))
} else {
pkeys = append(pkeys, database.GetNamedParameter(a.DbConfig.Type, col, len(values)+1))
values[len(values)+1] = row[col].Value
}
}
sql := fmt.Sprintf(
"DELETE FROM %s WHERE %s",
c.Table,
database.EscapeTable(a.DbConfig.Type, c.Table),
strings.Join(pkeys, " AND "),
)
stmt := nq.NewNamedParameterQuery(sql)
pCounter = 1
for _, col := range c.PrimaryKey {
stmt.SetValue(fmt.Sprintf("p%s", strconv.Itoa(pCounter)), row[col].Value)
pCounter = pCounter + 1
var args []any
if len(values) > 0 {
for i := 1; i <= len(values); i++ {
args = append(args, values[i])
}
}
a.Db.QueryRow(stmt.GetParsedQuery(), (stmt.GetParsedParameters())...).Scan(&scan)
a.Db.QueryRow(sql, args...).Scan(&scan)
}
return nil
@ -79,7 +87,7 @@ func (a *App) TruncateTable(c config.SchemaConfigAction) error {
func (a *App) UpdateRows(c config.SchemaConfigAction, globalColumns map[string]string, generators map[string][]string) error {
query := a.CreateSelectQuery(c)
rows := database.GetRows(a.Db, query)
rows := database.GetRows(a.Db, query, c.Table, a.DbConfig.Type)
var scan any
for key, row := range rows {
@ -142,38 +150,49 @@ func (a *App) UpdateRows(c config.SchemaConfigAction, globalColumns map[string]s
updates := []string{}
pkeys := []string{}
values := make(map[int]string)
pCounter := 1
for col, value := range row {
if value.IsUpdated && !value.IsVirtual {
values[pCounter] = value.Value
updates = append(updates, fmt.Sprintf("%s=:p%s", col, strconv.Itoa(pCounter)))
pCounter = pCounter + 1
if value.IsString {
updates = append(updates, database.GetNamedParameter(a.DbConfig.Type, col, len(values)+1))
values[len(values)+1] = value.FinalValue()
} else {
updates = append(updates, fmt.Sprintf("%s=%s", col, value.FinalValue()))
}
}
}
for _, col := range c.PrimaryKey {
values[pCounter] = row[col].Value
pkeys = append(pkeys, fmt.Sprintf("%s=:p%s", col, strconv.Itoa(pCounter)))
pCounter = pCounter + 1
value := row[col]
if !value.IsString {
pkeys = append(pkeys, fmt.Sprintf("%s=%s", col, value.FinalValue()))
} else {
pkeys = append(pkeys, database.GetNamedParameter(a.DbConfig.Type, col, len(values)+1))
values[len(values)+1] = value.FinalValue()
}
}
if len(updates) > 0 {
sql := fmt.Sprintf(
"UPDATE %s SET %s WHERE %s",
c.Table,
database.EscapeTable(a.DbConfig.Type, c.Table),
strings.Join(updates, ", "),
strings.Join(pkeys, " AND "),
)
stmt := nq.NewNamedParameterQuery(sql)
pCounter = 1
for i, value := range values {
stmt.SetValue(fmt.Sprintf("p%s", strconv.Itoa(i)), value)
var args []any
if len(values) > 0 {
for i := 1; i <= len(values); i++ {
args = append(args, values[i])
}
}
a.Db.QueryRow(stmt.GetParsedQuery(), (stmt.GetParsedParameters())...).Scan(&scan)
err := a.Db.QueryRow(sql, args...).Scan(&scan)
if err.Error() != "" && err.Error() != "sql: no rows in result set" {
logger.LogFatalExitIf(err)
}
}
}
@ -185,7 +204,7 @@ func (a *App) CreateSelectQuery(c config.SchemaConfigAction) string {
return c.Query
}
return fmt.Sprintf("SELECT * FROM %s", c.Table)
return fmt.Sprintf("SELECT * FROM %s", database.EscapeTable(a.DbConfig.Type, c.Table))
}
func (a *App) DoAction(c config.SchemaConfigAction, globalColumns map[string]string, generators map[string][]string) error {

78
app/app_test.go Normal file
View file

@ -0,0 +1,78 @@
package app
import (
"database/sql"
"fmt"
_ "github.com/go-sql-driver/mysql"
_ "github.com/lib/pq"
"gitnet.fr/deblan/database-anonymizer/config"
"gitnet.fr/deblan/database-anonymizer/faker"
"testing"
)
func TestAppCreateSelectQuery(t *testing.T) {
c := config.SchemaConfigAction{Table: "foo"}
app := App{
FakeManager: faker.NewFakeManager(),
DbConfig: config.DatabaseConfig{Type: "mysql", Dsn: "mysql://foo:bar@tests"},
}
if app.CreateSelectQuery(c) != "SELECT * FROM `foo`" {
t.Fatalf("TestAppCreateSelectQuery: empty configured query check failed")
}
c = config.SchemaConfigAction{Table: "foo", Query: "query"}
if app.CreateSelectQuery(c) != "query" {
t.Fatalf("TestAppCreateSelectQuery: configured query check failed")
}
}
func TestAppDoAction(t *testing.T) {
c := config.SchemaConfigAction{Table: "foo"}
app := App{
FakeManager: faker.NewFakeManager(),
DbConfig: config.DatabaseConfig{Type: "mysql", Dsn: "mysql://foo:bar@tests"},
}
if app.CreateSelectQuery(c) != "SELECT * FROM `foo`" {
t.Fatalf("TestAppCreateSelectQuery: empty configured query check failed")
}
c = config.SchemaConfigAction{Table: "foo", Query: "query"}
if app.CreateSelectQuery(c) != "query" {
t.Fatalf("TestAppCreateSelectQuery: configured query check failed")
}
}
func TestAppRun(t *testing.T) {
schema, _ := config.LoadSchemaConfigFromFile("../tests/schema.yml")
dsns := make(map[string]string)
dsns["mysql"] = "mysql://root:root@tcp(service-mysql)/test"
dsns["postgres"] = "postgres://postgres:postgres@service-postgres:5432/test?sslmode=disable"
var count int
for dbtype, dsn := range dsns {
databaseConfig, _ := config.LoadDatabaseConfig(dsn)
db, _ := sql.Open(databaseConfig.Type, databaseConfig.Dsn)
app := App{}
app.Run(db, schema, faker.NewFakeManager(), databaseConfig)
row := db.QueryRow("SELECT COUNT(*) FROM table_truncate1")
row.Scan(&count)
if count != 0 {
t.Fatalf(fmt.Sprintf("TestAppRuny: table_truncate1 check failed (%s)", dbtype))
}
row = db.QueryRow("SELECT COUNT(*) FROM table_truncate2")
row.Scan(&count)
if count != 1 {
t.Fatalf(fmt.Sprintf("TestAppRuny: table_truncate2 check failed (%s)", dbtype))
}
}
}

49
config/config_test.go Normal file
View file

@ -0,0 +1,49 @@
package config
import (
"testing"
)
func TestLoadDatabaseConfig(t *testing.T) {
c, err := LoadDatabaseConfig("mysql://")
if err != nil {
t.Fatalf("LoadDatabaseConfig: mysql dsn check failed")
}
if c.Type != "mysql" {
t.Fatalf("LoadDatabaseConfig: mysql type check failed")
}
c, err = LoadDatabaseConfig("postgres://")
if err != nil {
t.Fatalf("LoadDatabaseConfig: postgres dsn check failed")
}
if c.Type != "postgres" {
t.Fatalf("LoadDatabaseConfig: postgres type check failed")
}
_, err = LoadDatabaseConfig("foo://")
if err == nil {
t.Fatalf("LoadDatabaseConfig: lambda dsn check failed")
}
}
func TestSchemaConfigActionInitPrimaryKey(t *testing.T) {
c := SchemaConfigAction{}
c.InitPrimaryKey()
if len(c.PrimaryKey) != 1 || c.PrimaryKey[0] != "id" {
t.Fatalf("TestSchemaConfigActionInitPrimaryKey: primary key check failed")
}
c = SchemaConfigAction{PrimaryKey: []string{"foo", "bar"}}
c.InitPrimaryKey()
if len(c.PrimaryKey) != 2 || c.PrimaryKey[0] != "foo" || c.PrimaryKey[1] != "bar" {
t.Fatalf("TestSchemaConfigActionInitPrimaryKey: primary key check failed")
}
}

View file

@ -30,7 +30,12 @@ func LoadDatabaseConfig(dsn string) (DatabaseConfig, error) {
dbType := elements[0]
config.Dsn = strings.Replace(dsn, fmt.Sprintf("%s://", dbType), "", 1)
if dbType == "postgres" {
config.Dsn = dsn
} else {
config.Dsn = strings.Replace(dsn, fmt.Sprintf("%s://", dbType), "", 1)
}
config.Type = elements[0]
return config, nil

View file

@ -16,6 +16,11 @@ type Data struct {
IsVirtual bool
IsPrimaryKey bool
IsUpdated bool
IsInteger bool
IsBoolean bool
IsString bool
IsNull bool
}
func (d *Data) FromByte(v []byte) *Data {
@ -36,6 +41,22 @@ func (d *Data) FromString(v string) *Data {
return d
}
func (d *Data) FinalValue() string {
if d.IsNull {
return "null"
}
if d.IsBoolean {
if d.Value == "1" {
return "true"
} else {
return "false"
}
}
return d.Value
}
func (d *Data) IsTwigExpression() bool {
return strings.Contains(d.Faker, "{{") || strings.Contains(d.Faker, "}}")
}

88
data/data_test.go Normal file
View file

@ -0,0 +1,88 @@
package data
import (
"gitnet.fr/deblan/database-anonymizer/faker"
"testing"
)
func TestDataFroms(t *testing.T) {
d := Data{}
var varInt64 int64
varInt64 = 42
d.FromInt64(varInt64)
if d.Value != "42" {
t.Fatalf("TestDataFroms: FromInt64 check failed")
}
v := []byte{'A', 'B', 'C'}
d.FromByte(v)
if d.Value != "ABC" {
t.Fatalf("TestDataFroms: FromByte check failed")
}
}
func TestDataIsTwigExpression(t *testing.T) {
d := Data{Faker: "foo"}
if d.IsTwigExpression() {
t.Fatalf("IsTwigExpression: IsTwigExpression check failed")
}
d = Data{Faker: "foo {{"}
if !d.IsTwigExpression() {
t.Fatalf("IsTwigExpression: IsTwigExpression check failed")
}
d = Data{Faker: "}}"}
if !d.IsTwigExpression() {
t.Fatalf("IsTwigExpression: IsTwigExpression check failed")
}
}
func TestDataUpdate(t *testing.T) {
row := make(map[string]Data)
row["bar"] = Data{Value: "bar_value"}
manager := faker.NewFakeManager()
d := Data{Faker: "", Value: "foo"}
if d.IsUpdated {
t.Fatalf("TestDataUpdate: IsUpdated check failed")
}
d.Update(row, manager)
if d.IsUpdated {
t.Fatalf("TestDataUpdate: IsUpdated check failed")
}
if d.Value != "foo" {
t.Fatalf("TestDataUpdate: Value check failed")
}
d = Data{Faker: "_", Value: "foo"}
d.Update(row, manager)
if d.IsUpdated {
t.Fatalf("TestDataUpdate: IsUpdated check failed")
}
if d.Value != "foo" {
t.Fatalf("TestDataUpdate: Value check failed")
}
d = Data{Faker: "address", Value: "foo"}
d.Update(row, manager)
if !d.IsUpdated {
t.Fatalf("TestDataUpdate: IsUpdated check failed")
}
if d.Value == "foo" && len(d.Value) > 0 {
t.Fatalf("TestDataUpdate: Value check failed")
}
d = Data{Faker: "Twig {{ bar }}", Value: "foo"}
d.Update(row, manager)
if !d.IsUpdated {
t.Fatalf("TestDataUpdate: IsUpdated check failed")
}
if d.Value != "Twig bar_value" {
t.Fatalf("TestDataUpdate: Value check failed")
}
}

View file

@ -2,11 +2,28 @@ package database
import (
"database/sql"
"fmt"
"gitnet.fr/deblan/database-anonymizer/data"
"gitnet.fr/deblan/database-anonymizer/logger"
)
func GetRows(db *sql.DB, query string) map[int]map[string]data.Data {
func EscapeTable(dbType, table string) string {
if dbType == "mysql" {
return fmt.Sprintf("`%s`", table)
}
return fmt.Sprintf("\"%s\"", table)
}
func GetNamedParameter(dbType, col string, number int) string {
if dbType == "mysql" {
return fmt.Sprintf("%s=?", col)
}
return fmt.Sprintf("%s=$%d", col, number)
}
func GetRows(db *sql.DB, query, table, dbType string) map[int]map[string]data.Data {
rows, err := db.Query(query)
defer rows.Close()
logger.LogFatalExitIf(err)
@ -20,6 +37,8 @@ func GetRows(db *sql.DB, query string) map[int]map[string]data.Data {
key := 0
columnsTypes := make(map[string]string)
for rows.Next() {
row := make(map[string]data.Data)
@ -31,11 +50,32 @@ func GetRows(db *sql.DB, query string) map[int]map[string]data.Data {
logger.LogFatalExitIf(err)
}
var typeValue string
for i, col := range columns {
value := values[i]
d := data.Data{IsVirtual: false}
d := data.Data{
IsVirtual: false,
IsNull: value == nil,
}
if value != nil {
if dbType == "postgres" {
if len(columnsTypes[col]) == 0 {
typeQuery := fmt.Sprintf("SELECT pg_typeof(%s) as value FROM %s", col, EscapeTable(dbType, table))
db.QueryRow(typeQuery).Scan(&typeValue)
columnsTypes[col] = typeValue
}
dataType := columnsTypes[col]
d.IsInteger = dataType == "integer"
d.IsBoolean = dataType == "boolean"
d.IsString = !d.IsBoolean && !d.IsInteger
} else {
d.IsString = true
}
switch v := value.(type) {
case []byte:
d.FromByte(v)

25
database/database_test.go Normal file
View file

@ -0,0 +1,25 @@
package database
import (
"testing"
)
func TestEscapeTable(t *testing.T) {
if EscapeTable("mysql", "foo") != "`foo`" {
t.Fatalf("TestEscapeTable: mysql check failed")
}
if EscapeTable("postgres", "foo") != "\"foo\"" {
t.Fatalf("TestEscapeTable: postgres check failed")
}
}
func TestGetNamedParameter(t *testing.T) {
if GetNamedParameter("mysql", "foo", 1) != "foo=?" {
t.Fatalf("TestGetNamedParameter: mysql check failed")
}
if GetNamedParameter("postgres", "foo", 1) != "foo=$1" {
t.Fatalf("TestGetNamedParameter: postgres check failed")
}
}

View file

@ -1,11 +1,10 @@
rules:
columns:
phone: phone_e164number
# generators:
# aaaaaaaaaaaaa: [id]
generators:
person_name: [display_name]
actions:
- table: user
query: 'select * from `user` where username != "admin"'
virtual_columns:
domain: internet_domain
columns:
@ -15,8 +14,9 @@ rules:
- table: company
columns:
name: company_name
- table: post
query: 'select * from post where company_id is not null'
- table: access_log
query: 'select * from access_log where date < (NOW() - INTERVAL 6 MONTH)'
delete: true
- table: training
- table: user_ip
primary_key: [user_id, ip_id]
delete: true

View file

@ -50,7 +50,6 @@ func NewFakeManager() FakeManager {
return "0"
}
}
datas["boolean_boolint"] = func() string { return strconv.Itoa(fake.Boolean().BoolInt()) }
datas["car_category"] = func() string { return fake.Car().Category() }
datas["car_fueltype"] = func() string { return fake.Car().FuelType() }
datas["car_maker"] = func() string { return fake.Car().Maker() }

25
faker/faker_test.go Normal file
View file

@ -0,0 +1,25 @@
package faker
import (
"testing"
)
func TestIsValidFaker(t *testing.T) {
manager := NewFakeManager()
if !manager.IsValidFaker("") {
t.Fatalf("TestIsValidFaker: empty faker check failed")
}
if !manager.IsValidFaker("_") {
t.Fatalf("TestIsValidFaker: _ faker check failed")
}
if !manager.IsValidFaker("address") {
t.Fatalf("TestIsValidFaker: address faker check failed")
}
if manager.IsValidFaker("unknown_faker") {
t.Fatalf("TestIsValidFaker: unknown_faker faker check failed")
}
}

View file

@ -15,6 +15,11 @@ import (
func main() {
app := &cli.App{
Name: "database-anonymizer",
Usage: "Allows anonymizing or deleting data from a MySQL or PostgreSQL database",
Authors: []*cli.Author{
&cli.Author{Name: "Simon Vieille", Email: "contact@deblan.fr"},
},
Flags: []cli.Flag{
&cli.StringFlag{
Name: "dsn",
@ -39,7 +44,7 @@ func main() {
logger.LogFatalExitIf(err)
app := app.App{}
return app.Run(db, schema, faker.NewFakeManager())
return app.Run(db, schema, faker.NewFakeManager(), databaseConfig)
},
}

40
tests/mysql_data.sql Normal file
View file

@ -0,0 +1,40 @@
SET NAMES utf8;
SET time_zone = '+00:00';
SET foreign_key_checks = 0;
SET sql_mode = 'NO_AUTO_VALUE_ON_ZERO';
SET NAMES utf8mb4;
DROP TABLE IF EXISTS `table_truncate1`;
CREATE TABLE `table_truncate1` (
`id` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
INSERT INTO `table_truncate1` (`id`) VALUES (1), (2), (3);
DROP TABLE IF EXISTS `table_truncate2`;
CREATE TABLE `table_truncate2` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`delete_me` tinyint(4) NOT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
INSERT INTO `table_truncate2` (`id`, `delete_me`) VALUES
(1, 1),
(2, 1),
(3, 0);
DROP TABLE IF EXISTS `table_update`;
CREATE TABLE `table_update` (
`id` int NOT NULL AUTO_INCREMENT PRIMARY KEY,
`col_string` varchar(255) NULL,
`col_bool` int NULL,
`col_int` int NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_general_ci;
INSERT INTO `table_update` (`id`, `col_string`, `col_bool`, `col_int`) VALUES
(1, 'foo', 1, 1),
(2, 'bar', 0, 2),
(3, NULL, NULL, NULL);

42
tests/postgres_data.sql Normal file
View file

@ -0,0 +1,42 @@
DROP TABLE IF EXISTS "table_truncate1";
DROP SEQUENCE IF EXISTS table_truncate1_id_seq;
CREATE SEQUENCE table_truncate1_id_seq INCREMENT 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1;
CREATE TABLE "public"."table_truncate1" (
"id" integer DEFAULT nextval('table_truncate1_id_seq') NOT NULL,
CONSTRAINT "table_truncate1_pkey" PRIMARY KEY ("id")
) WITH (oids = false);
INSERT INTO "table_truncate1" ("id") VALUES (1), (2), (3);
DROP TABLE IF EXISTS "table_truncate2";
DROP SEQUENCE IF EXISTS table_truncate2_id_seq;
CREATE SEQUENCE table_truncate2_id_seq INCREMENT 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1;
CREATE TABLE "public"."table_truncate2" (
"id" integer DEFAULT nextval('table_truncate2_id_seq') NOT NULL,
"delete_me" boolean NOT NULL,
CONSTRAINT "table_truncate2_pkey" PRIMARY KEY ("id")
) WITH (oids = false);
INSERT INTO "table_truncate2" ("id", "delete_me") VALUES
(1, 't'),
(2, 't'),
(3, 'f');
DROP TABLE IF EXISTS "table_update";
DROP SEQUENCE IF EXISTS table_update_id_seq;
CREATE SEQUENCE table_update_id_seq INCREMENT 1 MINVALUE 1 MAXVALUE 9223372036854775807 CACHE 1;
CREATE TABLE "public"."table_update" (
"id" integer DEFAULT nextval('table_update_id_seq') NOT NULL,
"col_string" character varying,
"col_bool" boolean,
"col_int" integer,
CONSTRAINT "table_update_pkey" PRIMARY KEY ("id")
) WITH (oids = false);
INSERT INTO "table_update" ("id", "col_string", "col_bool", "col_int") VALUES
(1, 'foo', 't', 1),
(2, 'bar', 'f', 2),
(3, NULL, NULL, NULL);

12
tests/schema.yml Normal file
View file

@ -0,0 +1,12 @@
rules:
actions:
- table: table_update
columns:
col_string: address_city
col_bool: boolean_bool
col_int: '{{ "10" }}'
- table: table_truncate1
truncate: true
- table: table_truncate2
query: 'select * from table_truncate2 where delete_me=true'
truncate: true