96 lines
3.7 KiB
Python
96 lines
3.7 KiB
Python
import os
|
|
import time
|
|
import mysql.connector
|
|
import pandas as pd
|
|
import datetime as dt
|
|
import numpy as np
|
|
|
|
from iss.init_config import CONFIG
|
|
from iss.clustering import ClassicalClustering, AdvancedClustering, N2DClustering, DBScanClustering
|
|
from iss.tools import Tools
|
|
|
|
|
|
def populate_locations(config, db_manager):
|
|
|
|
history = pd.read_csv(os.path.join(CONFIG.get("directory")['data_dir'], "raw", "history", "history.txt"), sep=";", names=['latitude', 'longitude', 'id', 'location'])
|
|
history['timestamp'] = pd.to_datetime(history.id, format="%Y%m%d-%H%M%S").dt.strftime("%Y-%m-%d %H:%M:%S")
|
|
history.fillna('NULL', inplace=True)
|
|
history = history[['latitude', 'longitude', 'id', 'timestamp', 'location']]
|
|
history_tuple = [tuple(x) for x in history.values]
|
|
|
|
db_manager.create_pictures_location_table(force=True)
|
|
count = db_manager.insert_row_pictures_location(history_tuple)
|
|
|
|
print("Nombre d'insertion: %s" % count)
|
|
|
|
|
|
def populate_embedding(config, db_manager, clustering_type, clustering_version, clustering_model_type, clustering_model_name, drop=False):
|
|
|
|
clustering, clustering_config = Tools.load_clustering(CONFIG, clustering_type, clustering_version, clustering_model_type, clustering_model_name)
|
|
|
|
if drop:
|
|
db_manager.drop_embedding_partition(clustering_type, clustering_version, clustering_model_type, clustering_model_name)
|
|
|
|
if clustering_type == 'n2d':
|
|
clustering = N2DClustering(clustering_config)
|
|
elif clustering_type == 'classical':
|
|
clustering = ClassicalClustering(clustering_config)
|
|
elif clustering_type == 'dbscan':
|
|
clustering = DBScanClustering(clustering_config)
|
|
else:
|
|
raise Exception
|
|
|
|
clustering.load()
|
|
model, model_config = Tools.load_model(CONFIG, clustering_model_type, clustering_model_name)
|
|
filenames = Tools.list_directory_filenames(CONFIG.get('directory')['collections'])
|
|
generator = Tools.load_latent_representation(CONFIG, model, model_config, filenames, 496, None, True)
|
|
|
|
count = 0
|
|
for ids, latents in generator:
|
|
pictures_embedding = clustering.predict_embedding(latents)
|
|
pictures_label = clustering.predict_label(pictures_embedding)
|
|
rows = []
|
|
for i, id in enumerate(ids):
|
|
rows.append((
|
|
id,
|
|
float(np.round(pictures_embedding[i][0], 4)),
|
|
float(np.round(pictures_embedding[i][1], 4)),
|
|
int(pictures_label[i]),
|
|
clustering_type,
|
|
clustering_version,
|
|
clustering_model_type,
|
|
clustering_model_name
|
|
))
|
|
count += db_manager.insert_row_pictures_embedding(rows)
|
|
print("Nombre d'insertion: %s / %s" % (count, len(filenames)))
|
|
|
|
|
|
return
|
|
|
|
|
|
def main(action = 'populate_embedding'):
|
|
|
|
db_manager = Tools.create_db_manager(CONFIG)
|
|
|
|
if action == 'population_locations':
|
|
populate_locations(CONFIG, db_manager)
|
|
elif action == 'populate_embedding':
|
|
db_manager.create_pictures_embedding_table(False)
|
|
to_load = [
|
|
{'clustering_type': 'n2d', 'clustering_version': 1, 'clustering_model_type': 'simple_conv', 'clustering_model_name': 'model_colab', 'drop': False},
|
|
{'clustering_type': 'n2d', 'clustering_version': 2, 'clustering_model_type': 'simple_conv', 'clustering_model_name': 'model_colab', 'drop': False},
|
|
{'clustering_type': 'n2d', 'clustering_version': 3, 'clustering_model_type': 'simple_conv', 'clustering_model_name': 'model_colab', 'drop': False},
|
|
]
|
|
for kwargs in to_load:
|
|
try:
|
|
populate_embedding(CONFIG, db_manager, **kwargs)
|
|
except Exception as err:
|
|
print(err)
|
|
pass
|
|
|
|
else:
|
|
pass
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main() |