1
0
Fork 0
mirror of https://github.com/prise6/smart-iss-posts synced 2024-04-27 11:31:51 +02:00
smart-iss-posts/iss/exec/bdd.py
2019-12-12 01:08:04 +01:00

96 lines
3.7 KiB
Python

import os
import time
import mysql.connector
import pandas as pd
import datetime as dt
import numpy as np
from iss.init_config import CONFIG
from iss.clustering import ClassicalClustering, AdvancedClustering, N2DClustering, DBScanClustering
from iss.tools import Tools
def populate_locations(config, db_manager):
history = pd.read_csv(os.path.join(CONFIG.get("directory")['data_dir'], "raw", "history", "history.txt"), sep=";", names=['latitude', 'longitude', 'id', 'location'])
history['timestamp'] = pd.to_datetime(history.id, format="%Y%m%d-%H%M%S").dt.strftime("%Y-%m-%d %H:%M:%S")
history.fillna('NULL', inplace=True)
history = history[['latitude', 'longitude', 'id', 'timestamp', 'location']]
history_tuple = [tuple(x) for x in history.values]
db_manager.create_pictures_location_table(force=True)
count = db_manager.insert_row_pictures_location(history_tuple)
print("Nombre d'insertion: %s" % count)
def populate_embedding(config, db_manager, clustering_type, clustering_version, clustering_model_type, clustering_model_name, drop=False):
clustering, clustering_config = Tools.load_clustering(CONFIG, clustering_type, clustering_version, clustering_model_type, clustering_model_name)
if drop:
db_manager.drop_embedding_partition(clustering_type, clustering_version, clustering_model_type, clustering_model_name)
if clustering_type == 'n2d':
clustering = N2DClustering(clustering_config)
elif clustering_type == 'classical':
clustering = ClassicalClustering(clustering_config)
elif clustering_type == 'dbscan':
clustering = DBScanClustering(clustering_config)
else:
raise Exception
clustering.load()
model, model_config = Tools.load_model(CONFIG, clustering_model_type, clustering_model_name)
filenames = Tools.list_directory_filenames(CONFIG.get('directory')['collections'])
generator = Tools.load_latent_representation(CONFIG, model, model_config, filenames, 496, None, True)
count = 0
for ids, latents in generator:
pictures_embedding = clustering.predict_embedding(latents)
pictures_label = clustering.predict_label(pictures_embedding)
rows = []
for i, id in enumerate(ids):
rows.append((
id,
float(np.round(pictures_embedding[i][0], 4)),
float(np.round(pictures_embedding[i][1], 4)),
int(pictures_label[i]),
clustering_type,
clustering_version,
clustering_model_type,
clustering_model_name
))
count += db_manager.insert_row_pictures_embedding(rows)
print("Nombre d'insertion: %s / %s" % (count, len(filenames)))
return
def main(action = 'populate_embedding'):
db_manager = Tools.create_db_manager(CONFIG)
if action == 'population_locations':
populate_locations(CONFIG, db_manager)
elif action == 'populate_embedding':
db_manager.create_pictures_embedding_table(False)
to_load = [
{'clustering_type': 'n2d', 'clustering_version': 1, 'clustering_model_type': 'simple_conv', 'clustering_model_name': 'model_colab', 'drop': False},
{'clustering_type': 'n2d', 'clustering_version': 2, 'clustering_model_type': 'simple_conv', 'clustering_model_name': 'model_colab', 'drop': False},
{'clustering_type': 'n2d', 'clustering_version': 3, 'clustering_model_type': 'simple_conv', 'clustering_model_name': 'model_colab', 'drop': False},
]
for kwargs in to_load:
try:
populate_embedding(CONFIG, db_manager, **kwargs)
except Exception as err:
print(err)
pass
else:
pass
if __name__ == '__main__':
main()