diff --git a/Makefile b/Makefile
index 3db5089..a49b833 100644
--- a/Makefile
+++ b/Makefile
@@ -45,7 +45,7 @@ debug:
## Write config template
config_template:
- $(PYTHON_INTERPRETER) iss/tools/config_template.py
+ $(PYTHON_INTERPRETER) -m iss.tools.config_template
## start docker
docker_start:
@@ -80,6 +80,9 @@ training:
exec_clustering:
$(PYTHON_INTERPRETER) -m iss.exec.clustering
+facets:
+ $(PYTHON_INTERPRETER) -m iss.exec.facets
+
posters:
$(PYTHON_INTERPRETER) -m iss.exec.posters --config-id=1 --generate=1 --poster-id='test'
diff --git a/README.md b/README.md
index 94f5720..cfb836e 100644
--- a/README.md
+++ b/README.md
@@ -227,6 +227,10 @@ i use a special config file for floydhub so i provide a different `.env` file.
Training dashboard and dataset are public and available [here](https://www.floydhub.com/prise6/projects/smart-iss-posts/22)
+```
+make floyd_training_prod
+```
+
I tested google colab and train the final model with it, but result are really similar to the floydhub model.
### Clustering
@@ -310,7 +314,27 @@ A bit messy.
#### Facets
-*WIP*
+Let's try [facets](https://pair-code.github.io/facets/) on this dataset ! Thanks to mysql db i can compare different clustering and visualize it with facets-dive.
+
+```
+make facets
+```
+
+Two html page are created in the directory `reports/`.
+
+You can manipulate all your images:
+
+![facets_dive_0](data/facets_dive_0.png)
+
+Bin by cluster:
+
+![facets_dive_0](data/facets_dive_1.png)
+
+And zoom on it:
+
+![facets_dive_0](data/facets_dive_2.png)
+
+It's a bit messy because you cannot filter your data ... but the sprite trick make it fast!
### Posters
diff --git a/config/config.template.yaml b/config/config.template.yaml
index c0ab64b..05a6d0e 100644
--- a/config/config.template.yaml
+++ b/config/config.template.yaml
@@ -1,12 +1,75 @@
+clustering:
+ advanced:
+ PCA:
+ n_components: XXX
+ random_state: XXX
+ dbscan:
+ eps: XXX
+ min_samples: XXX
+ kmeans: XXX
+ save_directory: XXX
+ strong_kmeans:
+ high: XXX
+ iter: XXX
+ low: XXX
+ seed: XXX
+ threshold: XXX
+ version: XXX
+ classical:
+ CAH:
+ n_clusters: XXX
+ PCA:
+ n_components: XXX
+ random_state: XXX
+ TSNE:
+ n_components: XXX
+ kmeans:
+ n_clusters: XXX
+ random_state: XXX
+ model:
+ name: XXX
+ type: XXX
+ save_directory: XXX
+ version: XXX
+ dbscan:
+ dbscan:
+ min_cluster_size: XXX
+ min_samples: XXX
+ model:
+ name: XXX
+ type: XXX
+ save_directory: XXX
+ umap:
+ metric: XXX
+ min_dist: XXX
+ n_components: XXX
+ n_neighbors: XXX
+ random_state: XXX
+ version: XXX
+ n2d:
+ kmeans:
+ n_clusters: XXX
+ random_state: XXX
+ model:
+ name: XXX
+ type: XXX
+ save_directory: XXX
+ umap:
+ metric: XXX
+ min_dist: XXX
+ n_components: XXX
+ n_neighbors: XXX
+ random_state: XXX
+ version: XXX
directory:
- autoencoder:
- base: XXX
- test: XXX
- train: XXX
- valid: XXX
collections: XXX
+ data_dir: XXX
+ isr_dir: XXX
+ project_dir: XXX
+ reports: XXX
models:
simple:
+ activation: XXX
batch_size: XXX
callbacks:
checkpoint:
@@ -23,6 +86,97 @@ models:
input_channel: XXX
input_height: XXX
input_width: XXX
+ latent_shape: XXX
+ learning_rate: XXX
+ model_name: XXX
+ sampling: XXX
+ save_directory: XXX
+ steps_per_epoch: XXX
+ use_multiprocessing: XXX
+ validation_freq: XXX
+ validation_steps: XXX
+ verbose: XXX
+ workers: XXX
+ simple_conv:
+ activation: XXX
+ batch_size: XXX
+ callbacks:
+ checkpoint:
+ directory: XXX
+ period: XXX
+ verbose: XXX
+ csv_logger:
+ append: XXX
+ directory: XXX
+ floyd: XXX
+ tensorboard:
+ limit_image: XXX
+ log_dir: XXX
+ epochs: XXX
+ initial_epoch: XXX
+ input_channel: XXX
+ input_height: XXX
+ input_width: XXX
+ latent_channel: XXX
+ latent_height: XXX
+ latent_width: XXX
+ learning_rate: XXX
+ model_name: XXX
+ sampling: XXX
+ save_directory: XXX
+ steps_per_epoch: XXX
+ use_multiprocessing: XXX
+ validation_freq: XXX
+ validation_steps: XXX
+ verbose: XXX
+ workers: XXX
+ variational:
+ activation: XXX
+ batch_size: XXX
+ callbacks:
+ checkpoint:
+ directory: XXX
+ period: XXX
+ verbose: XXX
+ csv_logger:
+ append: XXX
+ directory: XXX
+ display_picture:
+ epoch_laps: XXX
+ epochs: XXX
+ initial_epoch: XXX
+ input_channel: XXX
+ input_height: XXX
+ input_width: XXX
+ latent_shape: XXX
+ learning_rate: XXX
+ model_name: XXX
+ save_directory: XXX
+ steps_per_epoch: XXX
+ use_multiprocessing: XXX
+ validation_freq: XXX
+ validation_steps: XXX
+ verbose: XXX
+ workers: XXX
+ variational_conv:
+ activation: XXX
+ batch_size: XXX
+ callbacks:
+ checkpoint:
+ directory: XXX
+ period: XXX
+ verbose: XXX
+ csv_logger:
+ append: XXX
+ directory: XXX
+ display_picture:
+ epoch_laps: XXX
+ epochs: XXX
+ initial_epoch: XXX
+ input_channel: XXX
+ input_height: XXX
+ input_width: XXX
+ latent_shape: XXX
learning_rate: XXX
model_name: XXX
save_directory: XXX
@@ -39,10 +193,17 @@ mysql:
port: XXX
server: XXX
user: XXX
-training:
- proportions:
- test: XXX
- train: XXX
- valid: XXX
- seed: XXX
+sampling:
+ autoencoder:
+ directory:
+ base: XXX
+ from: XXX
+ test: XXX
+ train: XXX
+ valid: XXX
+ proportions:
+ test: XXX
+ train: XXX
+ valid: XXX
+ seed: XXX
version: XXX
diff --git a/data/facets_dive_0.png b/data/facets_dive_0.png
new file mode 100644
index 0000000..b7b161e
Binary files /dev/null and b/data/facets_dive_0.png differ
diff --git a/data/facets_dive_1.png b/data/facets_dive_1.png
new file mode 100644
index 0000000..8c1d1d1
Binary files /dev/null and b/data/facets_dive_1.png differ
diff --git a/data/facets_dive_2.png b/data/facets_dive_2.png
new file mode 100644
index 0000000..217bf8a
Binary files /dev/null and b/data/facets_dive_2.png differ
diff --git a/iss/exec/facets.py b/iss/exec/facets.py
new file mode 100644
index 0000000..233ce90
--- /dev/null
+++ b/iss/exec/facets.py
@@ -0,0 +1,147 @@
+import os
+import base64
+import pandas as pd
+import numpy as np
+from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator
+
+from iss.init_config import CONFIG
+from iss.tools import Tools
+
+
+SPRITE_NB_LIGNE = 145
+SPRITE_NB_COLONNE = 100
+TARGET_SIZE_WIDTH = 48*2
+TARGET_SIZE_HEIGHT = 27*2
+LIMIT = 14499
+
+def request_data(config, db_manager):
+
+ sql = """
+ SELECT
+ v1.pictures_id,
+
+ v1.pictures_x as v1_x,
+ v1.pictures_y as v1_y,
+ CAST(v1.label AS CHAR) as v1_label,
+
+ v2.pictures_x as v2_x,
+ v2.pictures_y as v2_y,
+ CAST(v2.label AS CHAR) as v2_label,
+
+ v3.pictures_x as v3_x,
+ v3.pictures_y as v3_y,
+ CAST(v3.label AS CHAR) as v3_label,
+
+ loc.pictures_timestamp,
+ loc.pictures_location_text,
+ loc.pictures_latitude,
+ loc.pictures_longitude
+
+ FROM iss.pictures_embedding AS v1
+
+ INNER JOIN iss.pictures_embedding v2
+ ON v1.pictures_id = v2.pictures_id
+ AND v2.clustering_type = v1.clustering_type
+ AND v2.clustering_model_type = v1.clustering_model_type
+ AND v2.clustering_model_name = v2.clustering_model_name
+ AND v2.clustering_version = 2
+
+ INNER JOIN iss.pictures_embedding v3
+ ON v1.pictures_id = v3.pictures_id
+ AND v3.clustering_type = v1.clustering_type
+ AND v3.clustering_model_type = v1.clustering_model_type
+ AND v3.clustering_model_name = v1.clustering_model_name
+ AND v3.clustering_version = 3
+
+ LEFT JOIN iss.pictures_location loc
+ ON loc.pictures_id = v1.pictures_id
+
+ WHERE v1.clustering_version = %s
+ ORDER BY pictures_id ASC LIMIT %s"""
+
+ db_manager.cursor.execute(sql, (1, LIMIT))
+ results = db_manager.cursor.fetchall()
+
+ return pd.DataFrame(results, columns=db_manager.cursor.column_names)
+
+
+def create_sprite(config, df):
+
+ images_array = [Tools.read_np_picture(os.path.join(config.get('directory')['collections'], "%s.jpg" % picture_id), target_size = (TARGET_SIZE_HEIGHT, TARGET_SIZE_WIDTH)) for picture_id in df['pictures_id']]
+ sprite = np.zeros((TARGET_SIZE_HEIGHT*SPRITE_NB_LIGNE, TARGET_SIZE_WIDTH*SPRITE_NB_COLONNE, 3))
+ index = 0
+ for i in range(SPRITE_NB_LIGNE):
+ for j in range(SPRITE_NB_COLONNE):
+ sprite[(i*TARGET_SIZE_HEIGHT):(i+1)*TARGET_SIZE_HEIGHT, (j*TARGET_SIZE_WIDTH):(j+1)*TARGET_SIZE_WIDTH, :] = images_array[index]
+ index += 1
+ if index >= len(images_array):
+ break
+ if index >= len(images_array):
+ break
+
+ img = Tools.display_one_picture(sprite)
+ return img
+
+
+def generate_facets(config, df):
+
+ proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames([{'name': 'facets-iss', 'table': df}])
+ protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
+
+ HTML_TEMPLATE = """
+
+
+
+ """
+ html = HTML_TEMPLATE.format(protostr=protostr)
+
+ return html
+
+def generate_facets_dive(config, df, relative_sprite_path):
+
+ jsonstr = df.to_json(orient = 'records')
+ HTML_TEMPLATE = """
+
+
+
+
+ """
+ html = HTML_TEMPLATE.format(jsonstr=jsonstr, atlas_url = relative_sprite_path, sprite_width=TARGET_SIZE_WIDTH, sprite_height=TARGET_SIZE_HEIGHT)
+
+ return html
+
+
+def main():
+
+ ## db manager
+ db_manager = Tools.create_db_manager(CONFIG)
+
+ ## request data
+ df = request_data(CONFIG, db_manager)
+
+ ## create sprite
+ sprite = create_sprite(CONFIG, df)
+
+ ## save sprite
+ sprite.save(os.path.join(CONFIG.get('directory')['reports'], 'figures', 'sprite_altas.png'), "PNG")
+
+ ## generate facets
+ html_facets = generate_facets(CONFIG, df)
+ with open(os.path.join(CONFIG.get('directory')['reports'], 'facets.html'),'w') as f:
+ f.write(html_facets)
+
+ ## generate facets-dive
+ html_facets_dive = generate_facets_dive(CONFIG, df, './figures/sprite_altas.png')
+ with open(os.path.join(CONFIG.get('directory')['reports'], 'facets-dive.html'), 'w') as f:
+ f.write(html_facets_dive)
+
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file