mirror of
https://github.com/prise6/smart-iss-posts
synced 2024-04-27 11:31:51 +02:00
clean facets code + doc + config template
This commit is contained in:
parent
e0a87c0f3d
commit
beb5b5107e
5
Makefile
5
Makefile
|
@ -45,7 +45,7 @@ debug:
|
|||
|
||||
## Write config template
|
||||
config_template:
|
||||
$(PYTHON_INTERPRETER) iss/tools/config_template.py
|
||||
$(PYTHON_INTERPRETER) -m iss.tools.config_template
|
||||
|
||||
## start docker
|
||||
docker_start:
|
||||
|
@ -80,6 +80,9 @@ training:
|
|||
exec_clustering:
|
||||
$(PYTHON_INTERPRETER) -m iss.exec.clustering
|
||||
|
||||
facets:
|
||||
$(PYTHON_INTERPRETER) -m iss.exec.facets
|
||||
|
||||
posters:
|
||||
$(PYTHON_INTERPRETER) -m iss.exec.posters --config-id=1 --generate=1 --poster-id='test'
|
||||
|
||||
|
|
26
README.md
26
README.md
|
@ -227,6 +227,10 @@ i use a special config file for floydhub so i provide a different `.env` file.
|
|||
|
||||
Training dashboard and dataset are public and available [here](https://www.floydhub.com/prise6/projects/smart-iss-posts/22)
|
||||
|
||||
```
|
||||
make floyd_training_prod
|
||||
```
|
||||
|
||||
I tested google colab and train the final model with it, but result are really similar to the floydhub model.
|
||||
|
||||
### Clustering
|
||||
|
@ -310,7 +314,27 @@ A bit messy.
|
|||
|
||||
#### Facets
|
||||
|
||||
*WIP*
|
||||
Let's try [facets](https://pair-code.github.io/facets/) on this dataset ! Thanks to mysql db i can compare different clustering and visualize it with facets-dive.
|
||||
|
||||
```
|
||||
make facets
|
||||
```
|
||||
|
||||
Two html page are created in the directory `reports/`.
|
||||
|
||||
You can manipulate all your images:
|
||||
|
||||
![facets_dive_0](data/facets_dive_0.png)
|
||||
|
||||
Bin by cluster:
|
||||
|
||||
![facets_dive_0](data/facets_dive_1.png)
|
||||
|
||||
And zoom on it:
|
||||
|
||||
![facets_dive_0](data/facets_dive_2.png)
|
||||
|
||||
It's a bit messy because you cannot filter your data ... but the sprite trick make it fast!
|
||||
|
||||
|
||||
### Posters
|
||||
|
|
|
@ -1,12 +1,75 @@
|
|||
clustering:
|
||||
advanced:
|
||||
PCA:
|
||||
n_components: XXX
|
||||
random_state: XXX
|
||||
dbscan:
|
||||
eps: XXX
|
||||
min_samples: XXX
|
||||
kmeans: XXX
|
||||
save_directory: XXX
|
||||
strong_kmeans:
|
||||
high: XXX
|
||||
iter: XXX
|
||||
low: XXX
|
||||
seed: XXX
|
||||
threshold: XXX
|
||||
version: XXX
|
||||
classical:
|
||||
CAH:
|
||||
n_clusters: XXX
|
||||
PCA:
|
||||
n_components: XXX
|
||||
random_state: XXX
|
||||
TSNE:
|
||||
n_components: XXX
|
||||
kmeans:
|
||||
n_clusters: XXX
|
||||
random_state: XXX
|
||||
model:
|
||||
name: XXX
|
||||
type: XXX
|
||||
save_directory: XXX
|
||||
version: XXX
|
||||
dbscan:
|
||||
dbscan:
|
||||
min_cluster_size: XXX
|
||||
min_samples: XXX
|
||||
model:
|
||||
name: XXX
|
||||
type: XXX
|
||||
save_directory: XXX
|
||||
umap:
|
||||
metric: XXX
|
||||
min_dist: XXX
|
||||
n_components: XXX
|
||||
n_neighbors: XXX
|
||||
random_state: XXX
|
||||
version: XXX
|
||||
n2d:
|
||||
kmeans:
|
||||
n_clusters: XXX
|
||||
random_state: XXX
|
||||
model:
|
||||
name: XXX
|
||||
type: XXX
|
||||
save_directory: XXX
|
||||
umap:
|
||||
metric: XXX
|
||||
min_dist: XXX
|
||||
n_components: XXX
|
||||
n_neighbors: XXX
|
||||
random_state: XXX
|
||||
version: XXX
|
||||
directory:
|
||||
autoencoder:
|
||||
base: XXX
|
||||
test: XXX
|
||||
train: XXX
|
||||
valid: XXX
|
||||
collections: XXX
|
||||
data_dir: XXX
|
||||
isr_dir: XXX
|
||||
project_dir: XXX
|
||||
reports: XXX
|
||||
models:
|
||||
simple:
|
||||
activation: XXX
|
||||
batch_size: XXX
|
||||
callbacks:
|
||||
checkpoint:
|
||||
|
@ -23,6 +86,97 @@ models:
|
|||
input_channel: XXX
|
||||
input_height: XXX
|
||||
input_width: XXX
|
||||
latent_shape: XXX
|
||||
learning_rate: XXX
|
||||
model_name: XXX
|
||||
sampling: XXX
|
||||
save_directory: XXX
|
||||
steps_per_epoch: XXX
|
||||
use_multiprocessing: XXX
|
||||
validation_freq: XXX
|
||||
validation_steps: XXX
|
||||
verbose: XXX
|
||||
workers: XXX
|
||||
simple_conv:
|
||||
activation: XXX
|
||||
batch_size: XXX
|
||||
callbacks:
|
||||
checkpoint:
|
||||
directory: XXX
|
||||
period: XXX
|
||||
verbose: XXX
|
||||
csv_logger:
|
||||
append: XXX
|
||||
directory: XXX
|
||||
floyd: XXX
|
||||
tensorboard:
|
||||
limit_image: XXX
|
||||
log_dir: XXX
|
||||
epochs: XXX
|
||||
initial_epoch: XXX
|
||||
input_channel: XXX
|
||||
input_height: XXX
|
||||
input_width: XXX
|
||||
latent_channel: XXX
|
||||
latent_height: XXX
|
||||
latent_width: XXX
|
||||
learning_rate: XXX
|
||||
model_name: XXX
|
||||
sampling: XXX
|
||||
save_directory: XXX
|
||||
steps_per_epoch: XXX
|
||||
use_multiprocessing: XXX
|
||||
validation_freq: XXX
|
||||
validation_steps: XXX
|
||||
verbose: XXX
|
||||
workers: XXX
|
||||
variational:
|
||||
activation: XXX
|
||||
batch_size: XXX
|
||||
callbacks:
|
||||
checkpoint:
|
||||
directory: XXX
|
||||
period: XXX
|
||||
verbose: XXX
|
||||
csv_logger:
|
||||
append: XXX
|
||||
directory: XXX
|
||||
display_picture:
|
||||
epoch_laps: XXX
|
||||
epochs: XXX
|
||||
initial_epoch: XXX
|
||||
input_channel: XXX
|
||||
input_height: XXX
|
||||
input_width: XXX
|
||||
latent_shape: XXX
|
||||
learning_rate: XXX
|
||||
model_name: XXX
|
||||
save_directory: XXX
|
||||
steps_per_epoch: XXX
|
||||
use_multiprocessing: XXX
|
||||
validation_freq: XXX
|
||||
validation_steps: XXX
|
||||
verbose: XXX
|
||||
workers: XXX
|
||||
variational_conv:
|
||||
activation: XXX
|
||||
batch_size: XXX
|
||||
callbacks:
|
||||
checkpoint:
|
||||
directory: XXX
|
||||
period: XXX
|
||||
verbose: XXX
|
||||
csv_logger:
|
||||
append: XXX
|
||||
directory: XXX
|
||||
display_picture:
|
||||
epoch_laps: XXX
|
||||
epochs: XXX
|
||||
initial_epoch: XXX
|
||||
input_channel: XXX
|
||||
input_height: XXX
|
||||
input_width: XXX
|
||||
latent_shape: XXX
|
||||
learning_rate: XXX
|
||||
model_name: XXX
|
||||
save_directory: XXX
|
||||
|
@ -39,10 +193,17 @@ mysql:
|
|||
port: XXX
|
||||
server: XXX
|
||||
user: XXX
|
||||
training:
|
||||
proportions:
|
||||
test: XXX
|
||||
train: XXX
|
||||
valid: XXX
|
||||
seed: XXX
|
||||
sampling:
|
||||
autoencoder:
|
||||
directory:
|
||||
base: XXX
|
||||
from: XXX
|
||||
test: XXX
|
||||
train: XXX
|
||||
valid: XXX
|
||||
proportions:
|
||||
test: XXX
|
||||
train: XXX
|
||||
valid: XXX
|
||||
seed: XXX
|
||||
version: XXX
|
||||
|
|
BIN
data/facets_dive_0.png
Normal file
BIN
data/facets_dive_0.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 1.7 MiB |
BIN
data/facets_dive_1.png
Normal file
BIN
data/facets_dive_1.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 398 KiB |
BIN
data/facets_dive_2.png
Normal file
BIN
data/facets_dive_2.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 590 KiB |
147
iss/exec/facets.py
Normal file
147
iss/exec/facets.py
Normal file
|
@ -0,0 +1,147 @@
|
|||
import os
|
||||
import base64
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from facets_overview.generic_feature_statistics_generator import GenericFeatureStatisticsGenerator
|
||||
|
||||
from iss.init_config import CONFIG
|
||||
from iss.tools import Tools
|
||||
|
||||
|
||||
SPRITE_NB_LIGNE = 145
|
||||
SPRITE_NB_COLONNE = 100
|
||||
TARGET_SIZE_WIDTH = 48*2
|
||||
TARGET_SIZE_HEIGHT = 27*2
|
||||
LIMIT = 14499
|
||||
|
||||
def request_data(config, db_manager):
|
||||
|
||||
sql = """
|
||||
SELECT
|
||||
v1.pictures_id,
|
||||
|
||||
v1.pictures_x as v1_x,
|
||||
v1.pictures_y as v1_y,
|
||||
CAST(v1.label AS CHAR) as v1_label,
|
||||
|
||||
v2.pictures_x as v2_x,
|
||||
v2.pictures_y as v2_y,
|
||||
CAST(v2.label AS CHAR) as v2_label,
|
||||
|
||||
v3.pictures_x as v3_x,
|
||||
v3.pictures_y as v3_y,
|
||||
CAST(v3.label AS CHAR) as v3_label,
|
||||
|
||||
loc.pictures_timestamp,
|
||||
loc.pictures_location_text,
|
||||
loc.pictures_latitude,
|
||||
loc.pictures_longitude
|
||||
|
||||
FROM iss.pictures_embedding AS v1
|
||||
|
||||
INNER JOIN iss.pictures_embedding v2
|
||||
ON v1.pictures_id = v2.pictures_id
|
||||
AND v2.clustering_type = v1.clustering_type
|
||||
AND v2.clustering_model_type = v1.clustering_model_type
|
||||
AND v2.clustering_model_name = v2.clustering_model_name
|
||||
AND v2.clustering_version = 2
|
||||
|
||||
INNER JOIN iss.pictures_embedding v3
|
||||
ON v1.pictures_id = v3.pictures_id
|
||||
AND v3.clustering_type = v1.clustering_type
|
||||
AND v3.clustering_model_type = v1.clustering_model_type
|
||||
AND v3.clustering_model_name = v1.clustering_model_name
|
||||
AND v3.clustering_version = 3
|
||||
|
||||
LEFT JOIN iss.pictures_location loc
|
||||
ON loc.pictures_id = v1.pictures_id
|
||||
|
||||
WHERE v1.clustering_version = %s
|
||||
ORDER BY pictures_id ASC LIMIT %s"""
|
||||
|
||||
db_manager.cursor.execute(sql, (1, LIMIT))
|
||||
results = db_manager.cursor.fetchall()
|
||||
|
||||
return pd.DataFrame(results, columns=db_manager.cursor.column_names)
|
||||
|
||||
|
||||
def create_sprite(config, df):
|
||||
|
||||
images_array = [Tools.read_np_picture(os.path.join(config.get('directory')['collections'], "%s.jpg" % picture_id), target_size = (TARGET_SIZE_HEIGHT, TARGET_SIZE_WIDTH)) for picture_id in df['pictures_id']]
|
||||
sprite = np.zeros((TARGET_SIZE_HEIGHT*SPRITE_NB_LIGNE, TARGET_SIZE_WIDTH*SPRITE_NB_COLONNE, 3))
|
||||
index = 0
|
||||
for i in range(SPRITE_NB_LIGNE):
|
||||
for j in range(SPRITE_NB_COLONNE):
|
||||
sprite[(i*TARGET_SIZE_HEIGHT):(i+1)*TARGET_SIZE_HEIGHT, (j*TARGET_SIZE_WIDTH):(j+1)*TARGET_SIZE_WIDTH, :] = images_array[index]
|
||||
index += 1
|
||||
if index >= len(images_array):
|
||||
break
|
||||
if index >= len(images_array):
|
||||
break
|
||||
|
||||
img = Tools.display_one_picture(sprite)
|
||||
return img
|
||||
|
||||
|
||||
def generate_facets(config, df):
|
||||
|
||||
proto = GenericFeatureStatisticsGenerator().ProtoFromDataFrames([{'name': 'facets-iss', 'table': df}])
|
||||
protostr = base64.b64encode(proto.SerializeToString()).decode("utf-8")
|
||||
|
||||
HTML_TEMPLATE = """
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js"></script>
|
||||
<link rel="import" href="https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html" >
|
||||
<facets-overview id="elem"></facets-overview>
|
||||
<script>
|
||||
document.querySelector("#elem").protoInput = "{protostr}";
|
||||
</script>"""
|
||||
html = HTML_TEMPLATE.format(protostr=protostr)
|
||||
|
||||
return html
|
||||
|
||||
def generate_facets_dive(config, df, relative_sprite_path):
|
||||
|
||||
jsonstr = df.to_json(orient = 'records')
|
||||
HTML_TEMPLATE = """
|
||||
<script src="https://cdnjs.cloudflare.com/ajax/libs/webcomponentsjs/1.3.3/webcomponents-lite.js"></script>
|
||||
<link rel="import" href="https://raw.githubusercontent.com/PAIR-code/facets/1.0.0/facets-dist/facets-jupyter.html">
|
||||
<facets-dive id="elem" height="600" cross-origin="anonymous" sprite-image-width="{sprite_width}" sprite-image-height="{sprite_height}">
|
||||
</facets-dive>
|
||||
<script>
|
||||
var data = {jsonstr};
|
||||
var atlas_url = "{atlas_url}";
|
||||
document.querySelector("#elem").data = data;
|
||||
document.querySelector("#elem").atlasUrl = atlas_url;
|
||||
</script>"""
|
||||
html = HTML_TEMPLATE.format(jsonstr=jsonstr, atlas_url = relative_sprite_path, sprite_width=TARGET_SIZE_WIDTH, sprite_height=TARGET_SIZE_HEIGHT)
|
||||
|
||||
return html
|
||||
|
||||
|
||||
def main():
|
||||
|
||||
## db manager
|
||||
db_manager = Tools.create_db_manager(CONFIG)
|
||||
|
||||
## request data
|
||||
df = request_data(CONFIG, db_manager)
|
||||
|
||||
## create sprite
|
||||
sprite = create_sprite(CONFIG, df)
|
||||
|
||||
## save sprite
|
||||
sprite.save(os.path.join(CONFIG.get('directory')['reports'], 'figures', 'sprite_altas.png'), "PNG")
|
||||
|
||||
## generate facets
|
||||
html_facets = generate_facets(CONFIG, df)
|
||||
with open(os.path.join(CONFIG.get('directory')['reports'], 'facets.html'),'w') as f:
|
||||
f.write(html_facets)
|
||||
|
||||
## generate facets-dive
|
||||
html_facets_dive = generate_facets_dive(CONFIG, df, './figures/sprite_altas.png')
|
||||
with open(os.path.join(CONFIG.get('directory')['reports'], 'facets-dive.html'), 'w') as f:
|
||||
f.write(html_facets_dive)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
Loading…
Reference in a new issue