clean facets code + doc + config template
This commit is contained in:
parent
e0a87c0f3d
commit
beb5b5107e
5
Makefile
5
Makefile
|
@ -45,7 +45,7 @@ debug:
|
||||||
|
|
||||||
## Write config template
|
## Write config template
|
||||||
config_template:
|
config_template:
|
||||||
$(PYTHON_INTERPRETER) iss/tools/config_template.py
|
$(PYTHON_INTERPRETER) -m iss.tools.config_template
|
||||||
|
|
||||||
## start docker
|
## start docker
|
||||||
docker_start:
|
docker_start:
|
||||||
|
@ -80,6 +80,9 @@ training:
|
||||||
exec_clustering:
|
exec_clustering:
|
||||||
$(PYTHON_INTERPRETER) -m iss.exec.clustering
|
$(PYTHON_INTERPRETER) -m iss.exec.clustering
|
||||||
|
|
||||||
|
facets:
|
||||||
|
$(PYTHON_INTERPRETER) -m iss.exec.facets
|
||||||
|
|
||||||
posters:
|
posters:
|
||||||
$(PYTHON_INTERPRETER) -m iss.exec.posters --config-id=1 --generate=1 --poster-id='test'
|
$(PYTHON_INTERPRETER) -m iss.exec.posters --config-id=1 --generate=1 --poster-id='test'
|
||||||
|
|
||||||
|
|
26
README.md
26
README.md
|
@ -227,6 +227,10 @@ i use a special config file for floydhub so i provide a different `.env` file.
|
||||||
|
|
||||||
Training dashboard and dataset are public and available [here](https://www.floydhub.com/prise6/projects/smart-iss-posts/22)
|
Training dashboard and dataset are public and available [here](https://www.floydhub.com/prise6/projects/smart-iss-posts/22)
|
||||||
|
|
||||||
|
```
|
||||||
|
make floyd_training_prod
|
||||||
|
```
|
||||||
|
|
||||||
I tested google colab and train the final model with it, but result are really similar to the floydhub model.
|
I tested google colab and train the final model with it, but result are really similar to the floydhub model.
|
||||||
|
|
||||||
### Clustering
|
### Clustering
|
||||||
|
@ -310,7 +314,27 @@ A bit messy.
|
||||||
|
|
||||||
#### Facets
|
#### Facets
|
||||||
|
|
||||||
*WIP*
|
Let's try [facets](https://pair-code.github.io/facets/) on this dataset ! Thanks to mysql db i can compare different clustering and visualize it with facets-dive.
|
||||||
|
|
||||||
|
```
|
||||||
|
make facets
|
||||||
|
```
|
||||||
|
|
||||||
|
Two html page are created in the directory `reports/`.
|
||||||
|
|
||||||
|
You can manipulate all your images:
|
||||||
|
|
||||||
|
![facets_dive_0](data/facets_dive_0.png)
|
||||||
|
|
||||||
|
Bin by cluster:
|
||||||
|
|
||||||
|
![facets_dive_0](data/facets_dive_1.png)
|
||||||
|
|
||||||
|
And zoom on it:
|
||||||
|
|
||||||
|
![facets_dive_0](data/facets_dive_2.png)
|
||||||
|
|
||||||
|
It's a bit messy because you cannot filter your data ... but the sprite trick make it fast!
|
||||||
|
|
||||||
|
|
||||||
### Posters
|
### Posters
|
||||||
|
|
|
@ -1,12 +1,75 @@
|
||||||
|
clustering:
|
||||||
|
advanced:
|
||||||
|
PCA:
|
||||||
|
n_components: XXX
|
||||||
|
random_state: XXX
|
||||||
|
dbscan:
|
||||||
|
eps: XXX
|
||||||
|
min_samples: XXX
|
||||||
|
kmeans: XXX
|
||||||
|
save_directory: XXX
|
||||||
|
strong_kmeans:
|
||||||
|
high: XXX
|
||||||
|
iter: XXX
|
||||||
|
low: XXX
|
||||||
|
seed: XXX
|
||||||
|
threshold: XXX
|
||||||
|
version: XXX
|
||||||
|
classical:
|
||||||
|
CAH:
|
||||||
|
n_clusters: XXX
|
||||||
|
PCA:
|
||||||
|
n_components: XXX
|
||||||
|
random_state: XXX
|
||||||
|
TSNE:
|
||||||
|
n_components: XXX
|
||||||
|
kmeans:
|
||||||
|
n_clusters: XXX
|
||||||
|
random_state: XXX
|
||||||
|
model:
|
||||||
|
name: XXX
|
||||||
|
type: XXX
|
||||||
|
save_directory: XXX
|
||||||
|
version: XXX
|
||||||
|
dbscan:
|
||||||
|
dbscan:
|
||||||
|
min_cluster_size: XXX
|
||||||
|
min_samples: XXX
|
||||||
|
model:
|
||||||
|
name: XXX
|
||||||
|
type: XXX
|
||||||
|
save_directory: XXX
|
||||||
|
umap:
|
||||||
|
metric: XXX
|
||||||
|
min_dist: XXX
|
||||||
|
n_components: XXX
|
||||||
|
n_neighbors: XXX
|
||||||
|
random_state: XXX
|
||||||
|
version: XXX
|
||||||
|
n2d:
|
||||||
|
kmeans:
|
||||||
|
n_clusters: XXX
|
||||||
|
random_state: XXX
|
||||||
|
model:
|
||||||
|
name: XXX
|
||||||
|
type: XXX
|
||||||
|
save_directory: XXX
|
||||||
|
umap:
|
||||||
|
metric: XXX
|
||||||
|
min_dist: XXX
|
||||||
|
n_components: XXX
|
||||||
|
n_neighbors: XXX
|
||||||
|
random_state: XXX
|
||||||
|
version: XXX
|
||||||
directory:
|
directory:
|
||||||
autoencoder:
|
|
||||||
base: XXX
|
|
||||||
test: XXX
|
|
||||||
train: XXX
|
|
||||||
valid: XXX
|
|
||||||
collections: XXX
|
collections: XXX
|
||||||
|
data_dir: XXX
|
||||||
|
isr_dir: XXX
|
||||||
|
project_dir: XXX
|
||||||
|
reports: XXX
|
||||||
models:
|
models:
|
||||||
simple:
|
simple:
|
||||||
|
activation: XXX
|
||||||
batch_size: XXX
|
batch_size: XXX
|
||||||
callbacks:
|
callbacks:
|
||||||
checkpoint:
|
checkpoint:
|
||||||
|
@ -23,6 +86,97 @@ models:
|
||||||
input_channel: XXX
|
input_channel: XXX
|
||||||
input_height: XXX
|
input_height: XXX
|
||||||
input_width: XXX
|
input_width: XXX
|
||||||
|
latent_shape: XXX
|
||||||
|
learning_rate: XXX
|
||||||
|
model_name: XXX
|
||||||
|
sampling: XXX
|
||||||
|
save_directory: XXX
|
||||||
|
steps_per_epoch: XXX
|
||||||
|
use_multiprocessing: XXX
|
||||||
|
validation_freq: XXX
|
||||||
|
validation_steps: XXX
|
||||||
|
verbose: XXX
|
||||||
|
workers: XXX
|
||||||
|
simple_conv:
|
||||||
|
activation: XXX
|
||||||
|
batch_size: XXX
|
||||||
|
callbacks:
|
||||||
|
checkpoint:
|
||||||
|
directory: XXX
|
||||||
|
period: XXX
|
||||||
|
verbose: XXX
|
||||||
|
csv_logger:
|
||||||
|
append: XXX
|
||||||
|
directory: XXX
|
||||||
|
floyd: XXX
|
||||||
|
tensorboard:
|
||||||
|
limit_image: XXX
|
||||||
|
log_dir: XXX
|
||||||
|
epochs: XXX
|
||||||
|
initial_epoch: XXX
|
||||||
|
input_channel: XXX
|
||||||
|
input_height: XXX
|
||||||
|
input_width: XXX
|
||||||
|
latent_channel: XXX
|
||||||
|
latent_height: XXX
|
||||||
|
latent_width: XXX
|
||||||
|
learning_rate: XXX
|
||||||
|
model_name: XXX
|
||||||
|
sampling: XXX
|
||||||
|
save_directory: XXX
|
||||||
|
steps_per_epoch: XXX
|
||||||
|
use_multiprocessing: XXX
|
||||||
|
validation_freq: XXX
|
||||||
|
validation_steps: XXX
|
||||||
|
verbose: XXX
|
||||||
|
workers: XXX
|
||||||
|
variational:
|
||||||
|
activation: XXX
|
||||||
|
batch_size: XXX
|
||||||
|
callbacks:
|
||||||
|
checkpoint:
|
||||||
|
directory: XXX
|
||||||
|
period: XXX
|
||||||
|
verbose: XXX
|
||||||
|
csv_logger:
|
||||||
|
append: XXX
|
||||||
|
directory: XXX
|
||||||
|
display_picture:
|
||||||
|
epoch_laps: XXX
|
||||||
|
epochs: XXX
|
||||||
|
initial_epoch: XXX
|
||||||
|
input_channel: XXX
|
||||||
|
input_height: XXX
|
||||||
|
input_width: XXX
|
||||||
|
latent_shape: XXX
|
||||||
|
learning_rate: XXX
|
||||||
|
model_name: XXX
|
||||||
|
save_directory: XXX
|
||||||
|
steps_per_epoch: XXX
|
||||||
|
use_multiprocessing: XXX
|
||||||
|
validation_freq: XXX
|
||||||
|
validation_steps: XXX
|
||||||
|
verbose: XXX
|
||||||
|
workers: XXX
|
||||||
|
variational_conv:
|
||||||
|
activation: XXX
|
||||||
|
batch_size: XXX
|
||||||
|
callbacks:
|
||||||
|
checkpoint:
|
||||||
|
directory: XXX
|
||||||
|
period: XXX
|
||||||
|
verbose: XXX
|
||||||
|
csv_logger:
|
||||||
|
append: XXX
|
||||||
|
directory: XXX
|
||||||
|
display_picture:
|
||||||
|
epoch_laps: XXX
|
||||||
|
epochs: XXX
|
||||||
|
initial_epoch: XXX
|
||||||
|
input_channel: XXX
|
||||||
|
input_height: XXX
|
||||||
|
input_width: XXX
|
||||||
|
latent_shape: XXX
|
||||||
learning_rate: XXX
|
learning_rate: XXX
|
||||||
model_name: XXX
|
model_name: XXX
|
||||||
save_directory: XXX
|
save_directory: XXX
|
||||||
|
@ -39,10 +193,17 @@ mysql:
|
||||||
port: XXX
|
port: XXX
|
||||||
server: XXX
|
server: XXX
|
||||||
user: XXX
|
user: XXX
|
||||||
training:
|
sampling:
|
||||||
proportions:
|
autoencoder:
|
||||||
test: XXX
|
directory:
|
||||||
train: XXX
|
base: XXX
|
||||||
valid: XXX
|
from: XXX
|
||||||
seed: XXX
|
test: XXX
|
||||||
|
train: XXX
|
||||||
|
valid: XXX
|
||||||
|
proportions:
|
||||||
|
test: XXX
|
||||||
|
train: XXX
|
||||||
|
valid: XXX
|
||||||
|
seed: XXX
|
||||||
version: XXX
|
version: XXX
|
||||||
|
|
Binary file not shown.
After Width: | Height: | Size: 1.7 MiB |
Binary file not shown.
After Width: | Height: | Size: 398 KiB |
Binary file not shown.
After Width: | Height: | Size: 590 KiB |
|
@ -0,0 +1,147 @@
|
||||||
|
import os
|
||||||
|
import base64
|
||||||
|
import pandas as pd
|
||||||
|
import numpy as np
|
||||||
|
from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator
|
||||||
|
|
||||||
|
from iss.init_config import CONFIG
|
||||||
|
from iss.tools import Tools
|
||||||
|
|
||||||
|
|
||||||
|
SPRITE_NB_LIGNE = 145
|
||||||
|
SPRITE_NB_COLONNE = 100
|
||||||
|
TARGET_SIZE_WIDTH = 48*2
|
||||||
|
TARGET_SIZE_HEIGHT = 27*2
|
||||||
|
LIMIT = 14499
|
||||||
|
|
||||||
|
def request_data(config, db_manager):
|
||||||
|
|
||||||
|
sql = """
|
||||||
|
SELECT
|
||||||
|
v1.pictures_id,
|
||||||
|
|
||||||
|
v1.pictures_x as v1_x,
|
||||||
|
v1.pictures_y as v1_y,
|
||||||
|
CAST(v1.label AS CHAR) as v1_label,
|
||||||
|
|
||||||
|
v2.pictures_x as v2_x,
|
||||||
|
v2.pictures_y as v2_y,
|
||||||
|
CAST(v2.label AS CHAR) as v2_label,
|
||||||
|
|
||||||
|
v3.pictures_x as v3_x,
|
||||||
|
v3.pictures_y as v3_y,
|
||||||
|
CAST(v3.label AS CHAR) as v3_label,
|
||||||
|
|
||||||
|
loc.pictures_timestamp,
|
||||||
|
loc.pictures_location_text,
|
||||||
|
loc.pictures_latitude,
|
||||||
|
loc.pictures_longitude
|
||||||
|
|
||||||
|
FROM iss.pictures_embedding AS v1
|
||||||
|
|
||||||
|
INNER JOIN iss.pictures_embedding v2
|
||||||
|
ON v1.pictures_id = v2.pictures_id
|
||||||
|
AND v2.clustering_type = v1.clustering_type
|
||||||
|
AND v2.clustering_model_type = v1.clustering_model_type
|
||||||
|
AND v2.clustering_model_name = v2.clustering_model_name
|
||||||
|
AND v2.clustering_version = 2
|
||||||
|
|
||||||
|
INNER JOIN iss.pictures_embedding v3
|
||||||
|
ON v1.pictures_id = v3.pictures_id
|
||||||
|
AND v3.clustering_type = v1.clustering_type
|
||||||
|
AND v3.clustering_model_type = v1.clustering_model_type
|
||||||
|
AND v3.clustering_model_name = v1.clustering_model_name
|
||||||
|
AND v3.clustering_version = 3
|
||||||
|
|
||||||
|
LEFT JOIN iss.pictures_location loc
|
||||||
|
ON loc.pictures_id = v1.pictures_id
|
||||||
|
|
||||||
|
WHERE v1.clustering_version = %s
|
||||||
|
ORDER BY pictures_id ASC LIMIT %s"""
|
||||||
|
|
||||||
|
db_manager.cursor.execute(sql, (1, LIMIT))
|
||||||
|
results = db_manager.cursor.fetchall()
|
||||||
|
|
||||||
|
return pd.DataFrame(results, columns=db_manager.cursor.column_names)
|
||||||
|
|
||||||
|
|
||||||
|
def create_sprite(config, df):
|
||||||
|
|
||||||
|
images_array = [Tools.read_np_picture(os.path.join(config.get('directory')['collections'], "%s.jpg" % picture_id), target_size = (TARGET_SIZE_HEIGHT, TARGET_SIZE_WIDTH)) for picture_id in df['pictures_id']]
|
||||||
|
sprite = np.zeros((TARGET_SIZE_HEIGHT*SPRITE_NB_LIGNE, TARGET_SIZE_WIDTH*SPRITE_NB_COLONNE, 3))
|
||||||
|
index = 0
|
||||||
|
for i in range(SPRITE_NB_LIGNE):
|
||||||
|
for j in range(SPRITE_NB_COLONNE):
|
||||||
|
sprite[(i*TARGET_SIZE_HEIGHT):(i+1)*TARGET_SIZE_HEIGHT, (j*TARGET_SIZE_WIDTH):(j+1)*TARGET_SIZE_WIDTH, :] = images_array[index]
|
||||||
|
index += 1
|
||||||
|
if index >= len(images_array):
|
||||||
|
break
|
||||||
|
if index >= len(images_array):
|
||||||
|
break
|
||||||
|
|
||||||
|
img = Tools.display_one_picture(sprite)
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
def generate_facets(config, df):
|
||||||
|
|
||||||
|
proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames([{'name': 'facets-iss', 'table': df}])
|
||||||
|
protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
|
||||||
|
|
||||||
|
HTML_TEMPLATE = """
|
||||||
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js"></script>
|
||||||
|
<link rel="import" href="https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html" >
|
||||||
|
<facets-overview id="elem"></facets-overview>
|
||||||
|
<script>
|
||||||
|
document.querySelector("#elem").protoInput = "{protostr}";
|
||||||
|
</script>"""
|
||||||
|
html = HTML_TEMPLATE.format(protostr=protostr)
|
||||||
|
|
||||||
|
return html
|
||||||
|
|
||||||
|
def generate_facets_dive(config, df, relative_sprite_path):
|
||||||
|
|
||||||
|
jsonstr = df.to_json(orient = 'records')
|
||||||
|
HTML_TEMPLATE = """
|
||||||
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js"></script>
|
||||||
|
<link rel="import" href="https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html">
|
||||||
|
<facets-dive id="elem" height="600" cross-origin="anonymous" sprite-image-width="{sprite_width}" sprite-image-height="{sprite_height}">
|
||||||
|
</facets-dive>
|
||||||
|
<script>
|
||||||
|
var data = {jsonstr};
|
||||||
|
var atlas_url = "{atlas_url}";
|
||||||
|
document.querySelector("#elem").data = data;
|
||||||
|
document.querySelector("#elem").atlasUrl = atlas_url;
|
||||||
|
</script>"""
|
||||||
|
html = HTML_TEMPLATE.format(jsonstr=jsonstr, atlas_url = relative_sprite_path, sprite_width=TARGET_SIZE_WIDTH, sprite_height=TARGET_SIZE_HEIGHT)
|
||||||
|
|
||||||
|
return html
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
|
||||||
|
## db manager
|
||||||
|
db_manager = Tools.create_db_manager(CONFIG)
|
||||||
|
|
||||||
|
## request data
|
||||||
|
df = request_data(CONFIG, db_manager)
|
||||||
|
|
||||||
|
## create sprite
|
||||||
|
sprite = create_sprite(CONFIG, df)
|
||||||
|
|
||||||
|
## save sprite
|
||||||
|
sprite.save(os.path.join(CONFIG.get('directory')['reports'], 'figures', 'sprite_altas.png'), "PNG")
|
||||||
|
|
||||||
|
## generate facets
|
||||||
|
html_facets = generate_facets(CONFIG, df)
|
||||||
|
with open(os.path.join(CONFIG.get('directory')['reports'], 'facets.html'),'w') as f:
|
||||||
|
f.write(html_facets)
|
||||||
|
|
||||||
|
## generate facets-dive
|
||||||
|
html_facets_dive = generate_facets_dive(CONFIG, df, './figures/sprite_altas.png')
|
||||||
|
with open(os.path.join(CONFIG.get('directory')['reports'], 'facets-dive.html'), 'w') as f:
|
||||||
|
f.write(html_facets_dive)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
Loading…
Reference in New Issue