1
0
Fork 0
mirror of https://github.com/prise6/smart-iss-posts synced 2024-05-02 05:42:43 +02:00
smart-iss-posts/notebooks/test_clustering.ipynb

771 lines
208 KiB
Plaintext
Raw Permalink Normal View History

2019-04-16 21:54:11 +02:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.chdir(os.getcwd() + '/..')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"%load_ext autoreload\n",
"%autoreload 2"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Using TensorFlow backend.\n"
]
}
],
"source": [
"from iss.tools import Config\n",
"from iss.tools import Tools\n",
"from iss.models import SimpleConvAutoEncoder\n",
"from iss.models import ImageDataGeneratorWrapper\n",
"import pandas as pd\n",
"import datetime as dt\n",
"import time\n",
"import numpy as np\n",
"from dotenv import find_dotenv, load_dotenv\n",
"from sklearn.manifold import TSNE\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"load_dotenv(find_dotenv())\n",
"cfg = Config(project_dir = os.getenv(\"PROJECT_DIR\"), mode = os.getenv(\"MODE\"))\n",
"model_type = 'simple_conv'\n",
"cfg.get('models')[model_type]['model_name'] = 'model_colab'"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"model = SimpleConvAutoEncoder(cfg.get('models')[model_type])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"model.load(\"final_model_colab\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"input_1 (InputLayer) (None, 27, 48, 3) 0 \n",
"_________________________________________________________________\n",
"enc_conv_1 (Conv2D) (None, 27, 48, 64) 1792 \n",
"_________________________________________________________________\n",
"batch_normalization_1 (Batch (None, 27, 48, 64) 256 \n",
"_________________________________________________________________\n",
"activation_1 (Activation) (None, 27, 48, 64) 0 \n",
"_________________________________________________________________\n",
"max_pooling2d_1 (MaxPooling2 (None, 13, 24, 64) 0 \n",
"_________________________________________________________________\n",
"enc_conv_2 (Conv2D) (None, 13, 24, 32) 18464 \n",
"_________________________________________________________________\n",
"batch_normalization_2 (Batch (None, 13, 24, 32) 128 \n",
"_________________________________________________________________\n",
"activation_2 (Activation) (None, 13, 24, 32) 0 \n",
"_________________________________________________________________\n",
"max_pooling2d_2 (MaxPooling2 (None, 6, 12, 32) 0 \n",
"_________________________________________________________________\n",
"enc_conv_3 (Conv2D) (None, 6, 12, 16) 4624 \n",
"_________________________________________________________________\n",
"batch_normalization_3 (Batch (None, 6, 12, 16) 64 \n",
"_________________________________________________________________\n",
"activation_3 (Activation) (None, 6, 12, 16) 0 \n",
"_________________________________________________________________\n",
"max_pooling2d_3 (MaxPooling2 (None, 3, 6, 16) 0 \n",
"=================================================================\n",
"Total params: 25,328\n",
"Trainable params: 25,104\n",
"Non-trainable params: 224\n",
"_________________________________________________________________\n"
]
}
],
"source": [
"model.encoder_model.summary()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"filenames = Tools.list_directory_filenames('data/processed/models/autoencoder/train/k/')\n",
"generator_imgs = Tools.generator_np_picture_from_filenames(filenames, target_size = (27, 48), batch = 496, nb_batch = 2)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
2019-11-16 18:29:42 +01:00
"outputs": [],
2019-04-16 21:54:11 +02:00
"source": [
"pictures_preds = Tools.encoded_pictures_from_generator(generator_imgs, model)"
]
},
{
"cell_type": "code",
2019-11-16 18:29:42 +01:00
"execution_count": 13,
2019-04-16 21:54:11 +02:00
"metadata": {},
"outputs": [
{
2019-11-16 18:29:42 +01:00
"ename": "AttributeError",
"evalue": "'tuple' object has no attribute 'reshape'",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m<ipython-input-13-e3d22d0becf7>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mintermediate_output\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpictures_preds\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mreshape\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;36m992\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m3\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;36m6\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0;36m16\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
"\u001b[0;31mAttributeError\u001b[0m: 'tuple' object has no attribute 'reshape'"
]
2019-04-16 21:54:11 +02:00
}
],
"source": [
"intermediate_output = pictures_preds.reshape((992, 3*6*16))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(992, 288)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"intermediate_output.shape"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"output_tnse = TSNE(n_components=2).fit_transform(intermediate_output)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXwAAAD8CAYAAAB0IB+mAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJztvX2QVPd55/t9ujlAD07UQ0zWUosRRKuFNRkzY81K7OWPG0giHCNQR5KFFSnrm3WtaqucuhHRzhpirgEHXyZF2WhfstlVJVvrlGRp0EvGyHgvsg2uW8suyINnEBkL1pIlQI1uTBYGW0wLenqe+0f3rzl9+vc7L33O6dfnU6US3dPd55x+eX7P73n5PsTMEARBEDqfRLNPQBAEQWgMYvAFQRC6BDH4giAIXYIYfEEQhC5BDL4gCEKXIAZfEAShSxCDLwiC0CWIwRcEQegSxOALgiB0CfOieiEiSgIYB5Bj5vuJaDmAFwAsBvAjAL/PzDfcXuOjH/0oL1u2LKpTEgRB6ApOnjz598y8xOtxkRl8AH8E4E0Av1y+/WcA9jPzC0T0HwF8HsBfuL3AsmXLMD4+HuEpCYIgdD5EdM7P4yIJ6RDR7QA2AvjL8m0CsB7AS+WHfANANopjCYIgCPURVQz/aQD/GsBc+favAJhm5tny7fcAZCI6liAIglAHoQ0+Ed0P4GfMfNJ+t+ahWllOInqCiMaJaPzSpUthT0cQBEEwEIWHvxbAZiJ6F6Uk7XqUPP40Eakcwe0ALuqezMzPMPMQMw8tWeKZcxAEQRDqJLTBZ+btzHw7My8D8FkAR5j5MQBHATxcftjnAHwr7LEEQRCE+omySsfJFwG8QER7AEwA+KsYjyUIQgjGJnLYd/gsLk7ncVs6heENK5AdlLRbpxGpwWfmHwD4QfnfPwVwT5SvLwhC9IxN5LD9ldPIF4oAgNx0HttfOQ0AYvQ7jDg9fEEQWhjl1eem8zV/yxeK2P3qlBj8DkMMviB0GWMTOex+dQpXZgquj7syU8DYRE6MfgchWjqC0EWMTeQw/OIpT2Ov2Hf4bMxnJDQS8fAFoQtwC9+4cTHg44XWRgy+IHQ4zqRsEG5Lp2I4I6FZiMEXGsLYRA67Dk5hOl8KJfT2WNi5aZXEhxvAvsNn6zL2ADC8YUXEZyM0EzH4QizY67rTPRauzhQqQktAKSE4/NIpAFL6FzdhwjLy2XQWkrQVImfH2GlsHZ1EbjoPRsm4z2keVyiyJAUbQL1hmSTpJLGEdkY8fCEynGEbP+Sm81g7ckQ6O+vAuYtiBq7mCzWdssMbVtQVwy+yVu/Q81ykU7d1EYMvREKYxKB0dgbH+X7byyyd76d6T5VBBgF+bHnGtjNwM+jSqds+EAdYxeNmaGiIZeJVe7J25Ejgkj8nmXQKx7atj+iMOhs/77fp/VS1+IU5799+kghrfq0XPzp/tWoxT1lJ7H2wH9nBjPFc5PNsHER0kpmHvB4nMXwhEsIae0BqvoPg570yPSY7mMG+z6xGJp0CoWSYH1/TV+XRK4rMOPb25Zqdm5JecDuOfJ6th4R0hNCMTeRAMEy4CcBCK4G1I0ckDuyD29Ipz0XWLVlrD/XYuXP7d3zH7q/MFLBj7LTxXKSGv/UQD18Izb7DZ0MbewDIF+YqlT0qDjw2kYvglTuP4Q0rtGPlnI/xYmwih7UjR7B82yGsHTkSKFELAM8dP491K5fASlafjZUkqeFvQcTgC6GJIpyjI18oStmmgexgxnWRTZB3wlQlW+2LbFAYwDdPnEehWH02xSJj96tTlYVEFu7WQEI6QmiSRIE9Q79IHNhMxiWs83v39nk+P0wHrh1d7ncONyuHctN5DL90CrsOTmnLRoXGIR6+EJq4jD0ApHus2F673RnesAIpK1lz/9o7F2NPtt/z+Y1cTAtFxnS+UNlJbB2dxI6x0w07vlAitMEnooVE9DoRnSKiKSLaXb5/ORGdIKKfENEoEc0Pf7pCK6Kr7gBKnn86Fc5gf/DhrIQDDGQHM9j7YH9Vtc3TWwbw3L/4p76e38ykKqMU/5fPtrFE4eFfB7CemVcDGADwKSJaA+DPAOxn5rsAXAHw+QiOJbQgOk8zZSXxtUdWY9fmVVov1C+FOZFfcCM7mMGxbevxzshGHNu2PlCYZN3KJZ6J3zhhiN5+owkdw+dS59YH5ZtW+T8GsB7A75Xv/waAXQD+IuzxhNbD2cmpYrTqvrBxYonjR8eOsdN4/sSFWMNwQZDPtrFEkrQloiSAkwD+IYA/B/A2gGlmni0/5D0AWteDiJ4A8AQA9PV5J5qE1sRZ1x1GasGJ1HNHw46x03j2+Plmn0YV8tk2lkiStsxcZOYBALcDuAfAP9Y9zPDcZ5h5iJmHlixZEsXpCC1AVBUggGiyR8XzJy7E+vpB1TVTVlI+2wYTaVkmM08T0Q8ArAGQJqJ5ZS//dgAXozyW0NpEtVXv7bGqdg72kESSCI/eu9RXRYoQbzVVphzG87urSxJVtHiExhFFlc4SIkqX/50C8FsA3gRwFMDD5Yd9DsC3wh5LaB+i2KqnrCR2blpVua1CEspwFZnx7PHzUt7nkzj17detXOJ7V6cS+mLsG08UIZ1bARwlojcA/BDAd5n52wC+COCPiegtAL8C4K8iOJbQJphqxIPw0N3VeQFTSCLuUEUr45RGcCtzfPTepbGcg5UAXj6Z892pqz7XIOcuREMUVTpvABjU3P9TlOL5Qhdir9ypV3rh6JlLVbdNIYlWqThpNEF16FXoyxkSO3rmUih5jMIcUJjzn695/sQFPHv8fJXgnl8NfRm0Eg7RwxcagvqhBjEsBOCdkY2V2yYlxyQR3t776ShOs6XwMm5R6dBHWVEVFrdz152nWjSUvEemSxcB0cMXWgrVIPT0lgHfoR5nHsAUkogrVNFM1JASu7DZ8IunqsIeUerQL5jXGqbAzSHQ5QjU8q8cAVFZdac1PmWha9DJAay9c3FNx6euZG9Pth+Pr+mrJB+TRHh8TV9HVunsOjhVM5GqMMfYdXCqctuUGE8Q+TZ4ymsOMoe4WfhdyERl1YyoZQoNRzd8w29sdk+2vyMNvBOTAbbfbyqDLDJj+yunMX7uMo6eueT6nkbZLxE3foa+KKSDV48YfKElME1gEsyo92vrgcmaoeT5QhHPHT/vmRRtRcO4duSIdnEa3rACT45O+noN6eDVIyEdQWgR7GWKCUPJfK9GLtpUd+G8WxfqaEXDaIrDZwcz2ut3Ih28ZsTDF4QWYGwih+GXTlUmR+mMuJWkqkY0ILjaZG46j+XbDiHdY4G5FCKKYh6xX6wEYQ5AUTc1xYZanJxe/s5Nq1y9/G6t0vGLGHwBgNQ3N5s/eeWNmjGBQGlUITOMn0k9IRnGzWlU6nbcRp9QuoZ1K5dg9PUL8JM10F1bdjCDXQentDmOdMpyLemU77eEdAToSwCfjHEikXRYVjM2kcNMYU77tzkG9m8ZAABsHZ2seb9MIZlF85OBOp2VsY9DfoFQuoZj29bj0Bvv11QfmTBd267Nq7Qhr2s39MNydLN7u7V0Uwy+oC0BBIBnY5hItGPsNLaOTtYsLoNfea0rf4CAd1hm+CXzYmwaPvO7n8xgoRX85x1H17IadDI2kavaWbjhFYfXrRmFon5Yjq4SqVtLNyWkI7jWYOviqEEZm8hh96tTrj/2KzMFX631nYhbWIYAbahH6dofPXMJ+UKxqtN03colePlkTtuRGpRMOoWZG7O+DbWJi9N5VwNLVArJXJkpIElUZZB1paQmVI7CHraJskGt3REPX3Al7I9CJSP9GIxu9brcKmXcjPRzx89X6tKLzBWvWC0CztcJGqxRrzcd0tgDpWt0+y7tf2QAGz9xKwjeXbNe38lKZ/JLpzCw+zXje9iKFUpxIwZfcG2rT/sog3PDlIw00Y1ely4sQwAeX+M+Ac5Udml6DxmodDj3+Aj3qNcLahh1IabhDSuMr6MG3dv7BpznYMfv+RSKbNy9dmvppoR0BFyf1ScMAXONtx92jJ02JiNNdJrXNTaRq6oq6e2xsHPTqqowhWkmcHY
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(\n",
" output_tnse[:,0],\n",
" output_tnse[:,1]\n",
")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.decomposition import PCA"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(992, 288)"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"intermediate_output.shape"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"pca = PCA( n_components = 10)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"PCA(copy=True, iterated_power='auto', n_components=10, random_state=None,\n",
" svd_solver='auto', tol=0.0, whiten=False)"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pca.fit(intermediate_output)"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 0.42109594 0.57931983 0.66647762 0.72199494 0.75129557 0.77454692\n",
" 0.79685652 0.81197721 0.82250977 0.83269101]\n"
]
}
],
"source": [
"somme_cum = np.cumsum(pca.explained_variance_ratio_)\n",
"print(somme_cum)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<matplotlib.lines.Line2D at 0x7fcf7cde9a90>]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAHihJREFUeJzt3Xl0VeW9//H3l4SQiRBIQgIZSKLILFMEcaitiKKt2tZW0arVStFerZ2uv9r+bu29tr3r/lzt7XBr20vFqmDFoRNVFOss4kDCHCZpBhIgCSEkQELIcJ7fH4kYMJgDJNnn7PN5rcVa2SdPTr5syIeHZ9rmnENERPxlgNcFiIhI71O4i4j4kMJdRMSHFO4iIj6kcBcR8SGFu4iIDyncRUR8SOEuIuJDCncRER+K9uobp6amutzcXK++vYhIWCoqKqp1zqX11M6zcM/NzaWwsNCrby8iEpbMrDyYdhqWERHxIYW7iIgPKdxFRHxI4S4i4kMKdxERH1K4i4j4kMJdRMSHPFvnLiISKRoOt1JW20jZvkZKaxuZPTadSVlD+vR7KtxFRHrBoSNtlNV2hHdZbSOl+xo7A72JusaWo+3MICVxkMJdRCRUNLW0UVbbdLQH/mFvvInaQ0eOaZuRFEtuajyXTUgnNyWB3NQE8lITyBkWT+zAqD6vVeEuItJFc2s75fuaOsK7s/f9wcfVB44N8LTBg8hLSeDisWkd4d0Z4qNS4omP8TZeFe4iEnGOtLVTUddEaW3TsUMotY3sOdCMcx+2TUmIITc1gQvOTCMvNZ7c1ISjPfHEQaEboaFbmYjIaWgPOHbXH6aktpGSvYco7eyBl9Y2srv+MIEuAZ4cP5DclARm5qd0Bnc8eakJjEpJYEjcQO9+E6dB4S4iYcs5R11jC6W1jZTsbaSktpHS2kOdwyhNtLQFjrYdPCia3NQEpuYM5fNTMzt64J1DKUMTYjz8XfQNhbuIhLwPJjJLu/TCP+iRH2huO9puYJSRMyye/LREPjVmOHmpCeSnJZKXmkBqYgxm5uHvon8p3EUkJLS1B9h1dBjlwx54yd5G9jQ0H9N2xJBY8tMSuGrKSPJSE8lPSyA/NYHM5Diio7Q3ExTuItKPnHPUHmrptge+s66J1vYPB8KTYqPJT0tkVn7KMT3w3FTvV6KEA90hEekTh460saGynnUV9WyvOni0F37wyIfDKDFRA8hNjefM4YnMGZ9BfmoC+Wkd68GHJUTWMEpvU7iLyGlrDzi2Vx9kXUU963Z2BPr7NQePrkjJTI4jPy2Bz03LPNoLz09NYGRyHFEDFOB9QeEuIietqqGZdRX7WdsZ5ht3NdDU0g50LCucnJXM3IkZTMlJZkpWsi9Xo4S6oMLdzOYCvwSigIecc/913OdzgEeB5M429zrnlvdyrSLigcYjbWzc1XBMr7zqQMcE58AoY/yIJL44PasjyLOHkpsSr+GUENBjuJtZFPAgMAeoBFab2TLn3OYuzf4NeMo591szGw8sB3L7oF4R6UPtAcf7NQePhvi6inq2V384vJIzLJ4ZecOYkp3MlJxkxo9I6pdzUuTkBdNznwHscM6VAJjZUuBqoGu4OyCp8+MhwO7eLFJE+kb1gWbWHg3y/WysbKCxc3hlSNxAJmcnc+mEDKZkD2FyVjIpiYM8rliCFUy4ZwIVXa4rgZnHtfl34EUz+zqQAFzSK9WJSK9pamljY2XD0R75uor6o+vHowcY40cmcc30rI5eeXYyeakJGl4JY8GEe3d/uu646+uBR5xzPzOzWcBiM5vonAt0bWRmC4AFADk5OadSr4gEaVf9Yd56v7Zj0rNzeKW9c3wle1gcBbnDjgb5hJEaXvGbYMK9Esjucp3FR4ddbgPmAjjn3jazWCAVqOnayDm3EFgIUFBQcPw/ECJyGpxz7Kg5xIriKlYUV7NxVwMAg2OjmZKdzCXjzmBKdjKTs5NJ1fCK7wUT7quB0WaWB+wC5gE3HNdmJzAbeMTMxgGxwN7eLFREPioQcKyvrGdFcTUvFldRUtsIwJTsZO69fCyzxw7njLREBmgtecTpMdydc21mdhewgo5ljg8754rN7H6g0Dm3DPgO8Hsz+xYdQza3OOfUMxfpA63tAd4rreOFTVX8Y3M1VQeaiR5gnJufwq3n5zJnfAYZQ2K9LlM8Zl5lcEFBgSssLPTke4uEm8Mt7bzx/l5WFFfx8pYaGg63EjtwABedlcZlEzKYPTadIfHhee64nBwzK3LOFfTUTjtURUJUQ1MrL2+tZkVxFa9v30tza4Ck2GguGZfOZRMz+MToNOJiNAkq3VO4i4SQ6gPNvNg5IfpOyT7aAo70pEF8cXo2l03IYGb+MAbqSFsJgsJdxGOltY2sKK7ihU1VrKuoByA/NYH5F+Zz2YR0Jmcla0JUTprCXaSfOeco3n2gc8liFdurDwEwMTOJ78w5i7kTMzhzeKI2EMlpUbiL9IP2gGN1WR0riqt4sbiaXfWHGWBwTu4w7vvMeC6dkE7W0HivyxQfUbiL9JHm1nZW/bOWFzZV8dKWGuoaW4iJHsCFZ6byjdmjmT1uuM5qkT6jcBfpRU0tbbyytYbnN1Xx2tYaGlvaSRwUzafGDmfuhAwuGpNG4iD92Enf098ykdPUeKSNl7fWsHzDHl7bXkNza4DUxBiumjKSSydkcN4ZKQyK1pJF6V8Kd5FTcLC5lVe21vDchj28vn0vR9oCpA0exLUF2Vw+cQQz8obp8XHiKYW7SJAONLfy0uZqlm+s4o3399LSFiA9aRDXz8jhikkjmD5qqAJdQobCXeRjNDS18o8t1SzfuIc3399La7tjxJBYbpw5iismZTAtZ6jWoEtIUriLHKe+qYUXi6tZvmkPb+2opbXdkZkcx5dn5XL5pBFMzdamIgl9CncRoK6xhReLq1i+qYpVO2ppCziyhsZx6/l5XDFpBJOzhmhTkYQVhbtErH2HjrCiuGPI5e2SfbQHHDnD4pl/YT5XTMpgUqYCXcKXwl0iyt6DR3ihuIrnN+7hnZJ9BBzkpsRz+yfyuWLSCCaMTFKgiy8o3MX3ag4080JxFcs37uG90joCruNgrn/55JlcMWkE40YMVqCL7yjcxZeqGpp5YdMelm+sYnV5Hc7BmcMTuevi0VwxKYMx6Qp08TeFu/hGfVMLf1m7i+c27KGwfD8AY9IH843Zo/n0pBGMTh/scYUi/UfhLmFvQ2U9j71dzt/X7+ZIW4CxGYP5zpyzuHzSCM4cnuh1eSKeULhLWGpubefZDXtY/HYZ6ysbiI+J4gvTs7jx3FGMG5HkdXkinlO4S1jZua+Jx98t58nCCuqbWjlzeCL/cdUEPj8tk8GxekC0yAcU7hLy2gOON7bv5bG3y3ht+14GmHHp+HRumjWKWfkpmhgV6YbCXUJWXWMLTxdWsOTdcirqDpM2eBBfv3g0N8zIIWNIrNfliYQ0hbuEnHUV9Tz2dhnPbthDS1uAmXnD+O7csVw2IYOBUQO8Lk8kLCjcJSQ0t7azbP1ulrxTzobKBhJioriuIJsbzx3FmAwtYRQ5WQp38VT5vkaWvFPOU4WVNBxuZfTwRH509QQ+O1UTpCKnQ+Eu/a494HhtWw2PvV3O69v3Ej3AuGxCBjfNGsXMvGGaIBXpBQp36Tf7Dh3hqcJKHn+3nMr9h0lPGsQ3LxnN9TNySE/SBKlIb1K4S59yzrG2op4lb5d3TJC2B5iVn8L3rxjHnPHpmiAV6SMKd+kTh1vaWbZ+F4vfKWfTrgMkDorm+hkdE6Q640Wk7yncpVeV1nZMkD5dWMGB5jbGpA/mx5+dyGenZpI4SH/dRPqLftqkV7y2rYZFK0t58/1aogcYcydmcPOsXM7JHaoJUhEPKNzltLS2B/jJc1t4ZFUZGUmxfHvOWcw7J5vhmiAV8ZTCXU5ZzYFm7vzjGlaX7ee2C/K49/KxmiAVCREKdzklReV1fG3JGg42t/HLeVO4ekqm1yWJSBcKdzkpzjmWvFPO/c9uZmRyHI/dNoOxGTo/XSTUKNwlaM2t7Xz/Lxv585pdXDx2OD+/bgpD4nREgEg
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.plot(somme_cum[0:10])"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(992, 10)\n"
]
}
],
"source": [
"reduction = pca.transform(intermediate_output)\n",
"print(reduction.shape)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x7fcf7cdc4eb8>"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD8CAYAAAB3u9PLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJztnX+QVNd157+nmwfqIbYaYpKV2oyQXQ6sKaIZM7FIsZsKSiIcK0JjYRkrYuPaTUX7h1O1YO3UoooSBq9SIsva0mY3lV1l41pnrcggyZmgyFkUG7KpkoPsITMYE0MiWQKpUdlkYZyIaUFPz90/um/z+vW99937fvR7PXM+VRTw+se7fd9759x7fpIQAgzDMAxTyHoADMMwTD5ghcAwDMMAYIXAMAzDtGCFwDAMwwBghcAwDMO0YIXAMAzDAGCFwDAMw7RghcAwDMMAYIXAMAzDtFiS9QBseM973iPWrFmT9TAYhmH6ihMnTvyDEGKV7fv7QiGsWbMGk5OTWQ+DYRimryCicy7vZ5MRwzAMA4AVAsMwDNOCFQLDMAwDgBUCwzAM04IVAsMwDAOgT6KMGCaPTExVceDIWVyYqeHmcgljW9didLiS9bAYJjKsEBgmAhNTVTz8lVOo1RsAgOpMDQ9/5RQAsFJg+hZWCAwTgQNHzraVgaRWb+DAkbOpKgTelTBpwgqBYSJwYabmdDwJeFfCpA07lRkmAjeXS07Hk8C0K2GYJGCFwDARGNu6FiWv2HGs5BUxtnVtaufMYlfCLC5YITBMBEaHK3js3g2olEsgAJVyCY/duyFV000WuxJmccE+BCZz+tVROjpc6ek4x7au7fAhAOnvSpjFBSsEJlPYUWqPnI9+VJ5Mf8AKgcmUrMI3o5L1bsZlV5L1WJn+gxUCkzomwdRPjtJ+2s3001iZ/MBOZSZVpGCqztQgcF0wTUxVAcRzlE5MVbF5/1HcuucFbN5/tP2dadFPYZ/9NFYmP7BCYFIlTDCpwje9IuHK1TmjoA9TNK7YKJd+2s3001iZ/MAKgUmVMMEUDN9cMeABApip1Y2CPskVsK1y6aewz34aK5MfWCEwqWIjmEaHK3hpzx14bf9dGFi6BPV50fFelaBPcgVsq1xsk9F6bcpSoRoroansshoTk39YITCp4prRayvok1wB25xTOsZr9QaKRACAIlFbcUgBm7QpKyr+nRfQVAZSzVZnath9cBqPTJzq6ZiY/MMKgUkNlRANy+i1FfRJlo4IO6dfyANAQ4iOv/1CX7fb2Pf8aauxJLm7kDuvSrkEEXhNAHjq+HneKTAdsEJgUkElRKXANoU92gr6JEtHhJ1TJeSDyJ2CbrdxebYeKnzT2l3oxiQAjjpiOkhEIRDRF4joB0T0Hd+xlUT0F0T0962/V7SOExH9LhG9QkTfJqIPJTEGJl9Edfq6CHq/72Fs61ocOHI20so67Jy2fgmZZ6Ej7LenFSpqGhNHHTF+ktoh/C8AHwkc2wPg60KIDwD4euv/APCLAD7Q+vMggN9PaAxMjojj9PUL+pf23BG66k9iZS3P+fiOIQDA7oPTbcVi65eQSXc6wn57WqGiY1vXgjSvcdQR4ycRhSCE+CsAlwKH7wHwxda/vwhg1Hf8j0ST4wDKRHRTEuNg8kMvwx6TWlnrFMuWdau6TEpB/OawcslTvifst6c1Z6PDFTywabBLKXBhPCZImj6EHxdCvAUArb9/rHW8AuAN3/vebB3rgIgeJKJJIpq8ePFiisNk0qCX/QKirqyDDtzxw6eViuXYmYtdJqWdmwY7/r99Y6VtsgKayXV+bH57mnP26OgGPL5jqKflupn+I4taRqrdazAIAkKIJwE8CQAjIyNdrzP5ppeVOW8ul9rO6+BxHapaPzqqM7X27ygPeLhydQ5PHT+Pm8slPLBpEC98+y186fj59vtnanV4BcKAV8BsfR5AU7GMH25GGunmIO0563W5bqb/SFMhfJ+IbhJCvNUyCf2gdfxNAKt973svgAspjiMyXC0yHrYCKO48R+kTYBM55EcqjMuz9Y5jfkXgpz4vuhLsZmp1jD1zEoBZKfA9xmRFmiajwwA+1fr3pwD8qe/4r7SijTYB+KE0LeWJvCQYLXSScgi7hqBmFV1Tnxcc6snklkR2CET0NICfBfAeInoTwF4A+wEcIqJfBXAewH2tt38VwEcBvAJgFsC/TmIMSdNvdfr7laTm2XVlrTMzFQiYT9lAmZUy4h0vE0YiCkEIcb/mpZ9TvFcA+HQS500TrhbZG3o9zxNTVex7/nSH6UdS8opOZiQT/lIRQbII9eT+CIwNnKmsgatF9oY05llX/mFiqoqxZ08qlcGKAQ/bN1ZQ0AXsO1AueXhg02BXpBEAeAXKJNST+yMwNrBC0NDLsElJHqpk9hqbeXaZF5NPYt/zp1FvqNftM7N1fOn4+cjmIn846vJlS/DU8fNYvnQJli+9/tvKJQ8H7rstkxU573gZG7iFpoZeNzRfrFv6sHl2nRdTcTnVzkAS123w2v67usY6U6uj5BXxxI6hzK9hlNBcZvHBCsFAL0MAF7MT2zTPunl56JA6fNNUXC4tlrZMQ3m+hlFCc5nFB5uMcgJv6dXofn9DCKeuZmlyrSEwMVXN9TVMsjoss3DhHUJO4C29Gt28AOrVt2ol7BUIc0JApBhOuu/501bXMBjlVC55GN+2vieCmZPemDB4h5ATsnBi9wOqefETXH0HV8LlkgcQlMqg5BWxc9NgaOE6Gy7P1pVF8PzXUBXlJLOXF0MAAZN/eIeQE3rtxM6CKIlR8vWHDp1sdyjzo9pB+VfCm/cfxUyt239QJGqbTEZuWYnPHJqOnZAmi+AdOHIW1ZlaR4tNoHltVVFOMns56WvNiWiMKyTS3EcnxMjIiJicnMx6GEwEpFCqztS6krVKXtHajh2M4JGf376xgmNnLmqF3q17XlBGEBGakUGSNa0qpXEgAI/vGML44dNdSsgmA7pSLnX8DiD6AkE3X+w3WFwQ0QkhxIjt+3mHwKTGIxOn8NTx822BHJSHLhE4qh3UlnWr8NyJqjEk1daub8ostkWg2VhH9T1hyoBwvYBedaaGXQenO153DUPOc8QTk19YITCpMDFV7VAGOlwicIJO0c37j4YKPV245ZZ1q7B5/1Fj2esoRFUqNp9zEeh5jnhi8gsrhD6hX+zBj0ycwtMvv6G096twjaLyz4PuDH6hZ7Oz6CdsBTpHrTFRYIXQB/RLFvMjE6e0/QFUuEZRqeziKoJCz2Zn0S/YCnRORGOiwGGnfUC/FCZ7+uU3wt/UIkpilE1TGxuhl7SZqFd4BcLstTmrmk4y/Nbf3/kGjx93xgzvEPqAfrEHh5mJCMADmwbx6OiGSN9v+r0EWJnSbBzIBWqGpQY7niVFkQj3374ax85ctFZOJa+AuXnRzmGw3SVenZtv//vybD2XO0smP7BC6ANuLHnKWPqwnsG99jkUibRKoZLAGHR28Uq5hJf23GH1HQeOnDUqA69IOPDx2wBAGT4al9d9oa42JjA5bzJ010+Yk5kjjRhXeA+Zcyamqrhyba7ruKmuvmtbyqTKbt9/+2rl8Z2bBvHSnjtiCyFV1rIM17Qdd9iuquFLHLtytXve4xDsjqAy60hklVQ5b1F2if2ys2TyA+8Qco4uu/VHbliSyMowzGHtstOQpiAZZSRNI1FNREH8EUPBRDedCSU4/vKAZ6x8Og9g96FplJYUEjcZFQqEialqx/ikwztsnqNEDXGkEeMKK4Sco1vNzRiEmsvKMMxh7Rrd9OjohsQUgAopQFU5BEGlp1J2NggBzNbnw9/oSMNQoiKs8FyUqCGONGJcYZNRzrFtMek3+xRI3QdS9V0m5ZHn6CYbpWcTldRroppr/EX7JLIvxCMTp6w+46+txMX0GBWsEHKObYtJv89A5djVrQxNCidNG3Rcv4Vu3AWi9nfl0VbuH58ro8MVbFm3quNYQwh86fh5o1KQ95C8L8J8SszihRVCzrFpbKJbCReJQpuhmBSO7e7EhYmpKoY/+yJ2HZy2dnqr0JXF9jfOyaOtvCE
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(reduction[:, 0], reduction[:, 1])"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(10, 288)"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pca.components_.shape"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.cluster import KMeans"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"km = KMeans(n_clusters=10)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"KMeans(algorithm='auto', copy_x=True, init='k-means++', max_iter=300,\n",
" n_clusters=10, n_init=10, n_jobs=None, precompute_distances='auto',\n",
" random_state=None, tol=0.0001, verbose=0)"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"km.fit(reduction)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([7, 1, 1, 2, 4, 3, 9, 5, 5, 6], dtype=int32)"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"km.labels_[:10]"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x7fcf69bbc9e8>"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD8CAYAAAB3u9PLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzsnXd4FVX6xz9nZm5LLyQh9N5BKdIEVBArir13XXVXt7mr66772+K667rrrmVX194LoiKCCFKlg3RCgCRAekJ6z20zc35/JAQu994kYBDE+TwPT8jcuXPOJDfnnfOW7yuklFhYWFhYWCgnewIWFhYWFqcGlkGwsLCwsAAsg2BhYWFh0YxlECwsLCwsAMsgWFhYWFg0YxkECwsLCwvAMggWFhYWFs1YBsHCwsLCArAMgoWFhYVFM9rJnkB76NSpk+zVq9fJnoaFhYXF94otW7aUSymT2nv+98Ig9OrVi82bN5/saVhYWFh8rxBC5B7L+ZbLyMLCwsICsAyChYWFhUUzlkGwsLCwsAAsg2BhYWFh0YxlECwsjhOfrrO9oJis0nKsviIWpwPfiywjC4tTjYXpGTz2xVKEAMM06RwTzSs3XEGPhLiTPTULi+PG2iFYWBwjGSXlPDpvMQ0+H/VeH26/Tk5FFbe/9wnmCd4p5O0t5PMXFrH8g9W4GzwndCyLHx7WDsHC4hj5cMsOfIYRcEwCNW4PW/IKOatntw4fU0rJ8w+8xuK3vwYkqqby3E9e5clFv2fI+AEdPp7FDxNrh2BhcYyU1jWE3AkIBJWN7hMy5vp5m1n67kp8bh8+tx93nYfGWjd/uPzvGEcZJwuL48UyCBYWx8h5A3rjstmCjvsMg5HdUk/ImAtfX4anwRt03O/V2bM+84SMafHDwzIIFhbHyGXDBtMtLgandtjj6rJp3D5uJMnRUSdkTL9PD/2CAN1v7RAsOgYrhmBxSpBfVU1hTR0DkjuREOE62dNpFadN46O7bmDWlp0s3J1JjMPBzWedydQBfU7YmOffMoX0tXuDdwkShkwceMLGtfhhYRkEi5NKvdfHTz+ez5b8Iuyqglc3uHH0CH57wTkIIU729MISabdz94Qx3D1hzHcy3nk3nM3yD1aTtmYvnnoPNruGoio8+t7PsDuC3VcWFseDZRAsTiq//2IJm/MK8RkG3mavyOxtafTplMANo0ec3MmFIL+qmq+zsrFrKtMH9iMhMuI7GVfVVJ744rdsXZrGpkXbiOkUzfRbppDcI7yysZSSPRuzqK9qYMiEAUTFRX4nc7X4/iK+DxWWY8aMkZb89fcTKSUbcwvYWVhMSnQ0Fwzu1xKQbfT5Gfv0//CHyJLpnRDPogfu+I5n2zovrt7IS2s2AqAIgZTwjysu4sLB/U/yzIIpyCzi0YueoLaiDiEEuk/n7idv4qqfzzjZU7P4DhFCbJFStnsba+0QLE4YXl3n7vfnkH6wFK9fx6Fp/G3x17x/+3X0S0qk0ecjnFOoxtN20VVZfQMHyivpER9Hamx0x07+KNKLS3h5zTd49UDj9fDcRYzv1Z1Yl/OEjn8sSCl59KInKM0NlNR447FZDBjdl2GTBp/E2VmcylhZRhYnjLc2bGVnUQmNPj+GlDT6/dS4Pfzi0wUAJEZGkBgZHEAWQGpMNJ/tSKfOE5xqaZgmv5u3mKnPv84Ds+dzwQtv8uDH8/HqYTJx2iC3spqH5y5k6vOvc/Nbs1m9PyfonHlpe/GFuL6qCL7Oyj6ucU8Ue7/ZR215XZC+ks/t5fMXFp2kWVl8H7AMgsUJY86O9KBFWgJ5VdUcrG1yZTx+6XScmobSHEBWhEACWWXlPL5wBZOffZUNOfkB13hl7SYWpGfgMwzqvF58hsGqfdn8fcmqY55jTkUVV732Pl/syqCwppbN+YX89OP5fLwtLeA8wzQJ51w1pHnM455I6qsbEErw3ktKqCmvOwkzsvi+YBkEixNGa9GpQw+vU/r14qM7b+CyYYPokxjf4kLyGSaNfj9uv58HZs8LeDp/d9N2PEcZGq9uMGd7+jFrCT23ch2NPn/A+9x+naeWrA6IbVw8dABOW7CH1TBNzunXu+X7/KoaHl+4nOvfmMUfv1xKbmX1Mc2nIxgyvj+6L3RtQuaW/bz3l4/x+/zf8awsvg9YBsHihDFz+GAcmhp0vGtcbIDPf1DnJP5xxUUM7pyMEWZB35hb0PL/em+wGwmaKoX1Y5Rx2JpXFNKI6KbJwdrDT9PDuyZxwdDuODUVBVCFQBGCLjHRrNyXjW6a7D1YxsxX3uWjrWlsLyzm4627uOKV99hZdPCY5vRtiYyN5Ef/uAVHhIOjM3cbqht5+4+zubXPA1SX1Xyn87I49bEMgsUJIb8xG1JWExXdgE1rWnBdNo0Yp4Nnrrok5Ht0M7zrRTcOvza6e9eQ5/RLSsCuHVueRLjKYsM0iXM1xTfWli/lsbR7ET3mcdaEdOJidYQAU0qyK6t5fOFy7p/1OY8vWk6Dz99yH4fiJo/NX9Lu+ZQXVvDBk3P4709fY93nm45bp+iKBy/mqcX/R//RfQkVua8oquIno3+D7j++uIvF6YllECw6nAP1GTyX+ScyG7cyZuJORozKZMCAg9w1pR8rfnY3g1JC585fNmxQSI0gwzQZ16t7y/e/veAcIu02NKXp46sKgcum8adLph3zXO+fNDbIFeTQVC4c3J9op4Osut18VvAuPtOLx3RjCj/VdQq6Gehi2pJXyLaC4pBjZJaW89PZ80Km1x7J1qU7uWPgz3nv8Y/5/IWv+Putz/PQlD/g8x6fe2foxIF0G5Aa1ndXU17Lhi+2HNe1LU5PLINg0eF8VvgOfukDQAhISq6hz4A8auOWEOVwhH3ftIF9OadfrxajYFMVnJrGk5dfSIT9sKEYkNyJeffdyvWjhjOiSwqXDx/EbWNH8tevvuaGN2fx2Y72xxKmDezLw9MmE2m3E2G34dBUzh/YjydmTAdgRemClnsBqCiPCXmdRr8fLUQg9xCr9ufyxvrwi69hGPz1pmfxNnrxN1foues97N+Rw4KXF7frXkLRfWAXVC30n7nfp5O7uyDkaxY/TDrEIAgh3hBClAohdh1xLEEIsUQIkdX8Nb75uBBCPC+E2CeE2CmEGNURc7A4dSh054U8XuYtxpDhn5IVIXj26kt59cYruGfiGH46ZQJf/vh2Lh4SrPffLS6WP1w8lVl33kB2ZTVvf7ON9OJSthUU8+eFy3l47sJ2z/eWs85kw6/u4xfnTKRTZCQLd2dy8f/eZl7aHmr8VQHn2uw6QgQbG7uqMqp7lwDBuyPx6DofbNkZdg4HduTiD7ET8Db6WPb+mnbfy9FcfM/5KGpwHAfAGemgx+CO791g8f2lo3YIbwEXHXXsUWCZlLI/sKz5e4CLgf7N/+4F/tdBc7A4RYjUQheJORUXShsfOSEEZ/XsxsPTJnPfpLF0jQv9RH6IFVnZZJaW4znCF+726yzdu5+9JWXtnvOyzAP8e8VaCmtqMaWkqKaW/1uwlPqSfmji8CKf0rkqpEFQFMHfLruAc/v3DnqtZV4+X9jXNJuKNEPvamyO468fTUyN5+kVf0S1BRoFVVOISYxmwmWjj/vaFqcfHWIQpJSrgMqjDs8E3m7+/9vAFUccf0c2sQGIE0KcGBF5i5PCtOTLsCuBriG7cHBu8iUdLli3MSePxhAplBLYklfY6nvzq6p5YdUG/rF0FX/76uugVFaPX2f5Np0ILRq12Shomsn48dlEu1Qi7TaiHHZinA7+e+1lJEdE8NQl0+kVH9xXWRWiVWPRa1gP4pJig447Ix1ceu/0Vu+jLYaMH8j7Of9j3IzRqJqKZlOZOPMs/rP+b2ghUmktfricyE9DipSyGEBKWSyESG4+3hU4stKooPlYQEROCHEvTTsIevTocQKnadHRnJN0EY16HctLF6AIBVOaTOw0jQs7X9XhYyVFRWJX1aCWlpoiWhWem5+2h8e+WIphmq1mN5XWNfDwwCdZdvBLNpZsobLex4F9SSS6opk5YjCDU5Jx17n59I+f8NePN4OUJJ3bn4PnJqN
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(reduction[:, 0], reduction[:, 1], c = km.labels_)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0.42109594, 0.15822393, 0.08715776, 0.05551729, 0.02930062,\n",
" 0.02325135, 0.02230961, 0.01512069, 0.01053257, 0.01018127], dtype=float32)"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pca.explained_variance_ratio_"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.5381844e-06"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.mean(reduction)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0\n",
"(71, 10)\n",
"1\n",
"(141, 10)\n",
"2\n",
"(52, 10)\n",
"3\n",
"(125, 10)\n",
"4\n",
"(194, 10)\n",
"5\n",
"(133, 10)\n",
"6\n",
"(58, 10)\n",
"7\n",
"(101, 10)\n",
"8\n",
"(78, 10)\n",
"9\n",
"(39, 10)\n"
]
}
],
"source": [
"centers = []\n",
"for cl in range(10):\n",
" tmp = reduction[np.where(km.labels_ == cl)]\n",
" print(cl)\n",
" print(tmp.shape)\n",
" centers.append(np.mean(tmp, axis = 0))"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"centers = np.array(centers)"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(10, 10)"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"centers.shape"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.cluster import AgglomerativeClustering"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([2, 1, 2, 1, 2, 3, 0, 3, 1, 0])"
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cluster = AgglomerativeClustering(n_clusters = 4, affinity='euclidean', linkage='ward') \n",
"cluster.fit_predict(centers)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([2, 1, 2, 1, 2, 3, 0, 3, 1, 0])"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cluster.labels_"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([7, 1, 1, 2, 4, 3, 9, 5, 5, 6], dtype=int32)"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"km.labels_[0:10]"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"new_cluster = [cluster.labels_[old_cl] for old_cl in km.labels_]"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"<matplotlib.collections.PathCollection at 0x7fcf6830a630>"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD8CAYAAAB3u9PLAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzsnXecFdX5/99n5vbtyxZ67yiCAgqCgjH2aOy9l2+isUcTNTFRY4vG/DQqsZcYCwa7olJEpUiv0kFggWV7v3Vmzu+Pubvs3Tt39y7swiLzfr14LXvvlDO7d89zzlM+j5BSYmNjY2NjoxzoAdjY2NjYdAxsg2BjY2NjA9gGwcbGxsYmim0QbGxsbGwA2yDY2NjY2ESxDYKNjY2NDWAbBBsbGxubKLZBsLGxsbEBbINgY2NjYxPFcaAHkAw5OTmyd+/eB3oYNjY2NgcVS5YsKZVS5iZ7/EFhEHr37s3ixYsP9DBsbGxsDiqEENtac7ztMrKxsbGxAWyDYGNjY2MTxTYINjY2NjaAbRBsbGxsbKLYBsHGZi8JGxqrKrezuaYIu6+Izc+BgyLLyMamozGjcCUPr/4QAB2DfE8G/zzqSrr7Oh3gkdnY7D32DsHGppVsqtnNA6umUqeHqNNDBPUI2+vKuHHhKxjSaNd7b60t5v1t8/ly13ICWrhd72Vz6GHvEGxsWsnU7QuIGFrMaxJJdSTA8optHJndp83vKaXk8TWf8PnOJQCoQuHxNR/zzKirOTyzZ5vfz+bQxN4h2Ni0kpJQNQbxMQMhoDJc1y73/K54LdN2LSNkaIQMDb8epk4LceeSN9HbeVdic+hgGwQbm1YyPncwHtUZ93rE0BneTqv1j3csJqDHu4jChs6qyu3tck+bQw/bINjYtJJTuo6gqzcLt7LHKHhUJxf3GkeOJ71d7tnURVWPADRDb5d72hx62DEEmw7BTn85hYEK+qd1JtOVcqCH0ywe1cmrx/yWDwoWMmP3KtIcHs7vNZYJuYPb7Z6ndRvJysrtlruE4Vm92u2+NocWtkGwOaDUaSH+sOy/rKjYilNRCRs65/QYw+2DT0cIcaCHlxCfw81lfSZwWZ8J++V+v+w8nC93LWd5xTYCehinUFGEwoNHXIBLsf+MbdoG+5Nkc0B5ePUHLCv/iYjUCUXdIh/tWETvlFzO6Xn0AR5dPDv95cwpWYdLcTAxfyhZrtT9cl+HovLPo65kYdlm5pdsIMPl47SuI+nszUx4jpSS1VUF1EQCHJ7ZkzSnd7+M1ebgxTYINu2KlJIl5Vv4sWoHeZ50JuUPw6O6AAhoYb4tWkNExvrAg3qEt7fO7XAG4ZVNs3hty2wEIBA8tfYzHhh+ASd0Pmy/3F8RCsfkDOCYnAEtHrutrpRbFr9KVdiPEALN0Llx4Mlc3PvY/TBSm4MV2yDYtBshPcIti19jXfUuwnoEl+rkqbWf88LR19M3NR+/HjLdQhaqDzVaoMXrl4Zq2FZbQndfNvnNrJTbgnVVO3l9y7eEmwR3/7JyCqM69SO9A62+pZTcsvg1dgeqkI1+uJM3fM2QjG6MyOp94AZn06GxDYJNu/HO1rmsqdrR4AoK6GGCeph7l7/Lu+NvJduVSpYrhaJgVcx5AkG+J4PPdi5lYt5QUp2emPd1afDI6g/5qnAFLsVB2NAYlzuIh4ZfgNsiHbQlCurKeHnTTJZXbCXfm8nVfScyNndgzDHTCpfHGQMwC8Tmlqzj1K4jW33f9uLHqh1UhutijAFAyIjw/rb5tkGwSYiddmrTbny2c2mDMahHAjv8ZRQFqxBCcO+ws/EoThTMALKCQCLZXFPEE2s+4fTZj7G4bHPMNd7Y8i3TC1cSNjRqtSBhQ2N+yXqeXj+t1WPcXlfKFfOf5avCFRQGK1lesZU/LPsvHxUsijnOMKyLvySgJ3jvQFETCaBYBOQlUBn27/8B2Rw02AbBpt1oukKNfdN8b2zuQF4+5jec3PUIeqXkUj+NRaROQA8T0MPcteytmNX5lG3zCBqRmMuFDI1PdyxptZbQCxtnENDCMZXHQSPCM+unxeT3n9hlOG41fkOtS4NxuYMavt/pL+eJNZ9wzfzJPPbjRxTUlbVqPG3B4Zk9E9YmrK3eycubZiasa7A5tLENgk27cWrXEZYpkV29WTE+/4HpXXhg+AUMSu+CbmVEJCwp29LwbZ0WsrxfxNDQWmkQVlRutZSh0KUR48oanpnPr7r0xqM4EAgUBAJBF08m80rWoxk6G6oLuXTuM3xYsJDVVQV8XLCYy+b9izVVO1o1pn0l1enh5kGn4lGcNN0n1GpBXtw0k7O+fYKKcO1+HZdNx8c2CDbtgoz8yCWZUxjgKcermJO0V3WS5vDw8IiLLM/RmnG9NJ7oj0hQiNUnNa/VOfm5buvKYl0aZDh9ABh17yKLj+HO3H8xuf8s+nmCKMJ0bW3zl/L3tZ9w59I3eXLNJ/j1cMNYdQwCepi/rZqa9HiKg1W8tvkbnljzCd8WrdlrnaILeo3l2dHXMDi9m+X7paEaLp/3nF3lbBODbRBs2hwZXoIsuwSPNpuXBnzNw73ncW3+Bm7vP4iPJ97NgLQulued3PUIvBZBYR2DUdl9G76/ffDp+FQ3DmF+fFWh4FGd/HHYr1s91qv7TcKjxN7TrTiYlD+MVKcHGVoANY+C9IOsxSmCFITUGAMV1CMsr9jGygSaQptqi7h76VstTr4LSzdx3vdP8fLmWby//QfuXzmFGxa8aBnMTobhWb3omZKT8P3KcB3fl6zbq2vb/DyxDYJNmyOrHwECgEQRMC59N9d3WcmZqa+T6vAkPO/4vCGMyxmEN1qn4BQqbsXJ/Yedh9fhajiuX1pn3j72Fs7uMYahGd05pcsILuo5jn+s/Yzrfvg3n+1cmnQs4bi8Idw86BR8qhuf6sKlODg+byj3HXaO+Sx1r0afxWRJbR5GnCPGzKCqN1BWzC/dyFs/fZ/wfV0a/GnFuwT1CJGo4QjoYTZWF/LB9gVJPYsVvVJyUBOMK2Jo/FRbvNfXtvn50SZpp0KIV4EzgGIp5WHR17KB94DewFbgAillhTD1CJ4GTgP8wFVSyqVtMQ6bDoKWYNWpb0NKDSGsP3aKUHhkxMUsq9jK3JJ1pDk8nNx1BF28WXHHdvVlcdfQM9GlwQ0LXmTm7lUNgeYN1YUsKN3IQ0dcmNRwz+81lrN6jOaDbQt4Z9tcZuxexarK7fxmwC852R07YaarYRzCICLVmNddQmV4Vi9WVW6Py6wCM+VzasECruo30XIMG6sLLXcCQSPCl4UruGgvC8p+3X00r2+ejdXexKu66JOSu1fXtfl50lY7hNeBU5q89kdgppRyADAz+j3AqcCA6L8bgMltNAabjoISP4EDIFIA1fq9+kOE4MjsPtw86FSu6jfJ0hg0Zk7xOjbV7I7JOgoaEWYXrWFjTWHSQ/6uaC3PbfyawmAlBpLCYCWP/vgRX1SNBvbsTo7P2Gn5R6MIhT8fdi7jmxG4sxKmq8ehqAlzspyi+Z9Zc+R40pk85vq43YuKQobLx4S8IXt9bZufH21iEKSU3wHlTV4+C3gj+v83gF83ev1NafIDkCmEsHYq2xycpFwPomnlrhd8V7W5YN3i8s0JJlrJ8vKtzZ6701/OK5tm8cz6afxz3WeEmqSyBo0I/94OKJnUGwWfqvN0v4VkOVV8qpsUh5s0h4fHR15KjieNvww/jx7e+L7KKoJjG6WnNqVfaj5ZFiqvXtXFOT3HNPscLXF4Vk8+Of5uxucORhUKDqFwXP4QXj3mtziUvTc2Nj8/2rNSOV9KWQggpSwUQuRFX+8GFDQ6bkf0tZjlnBDiBswdBD172i0CDyaE7wqkUQl1r5ptxKQBvgsRqTe1+b1y3Om4hEq4iR6SKhSy3ImF577ctYyHV3+ILiWaTBzsLQ7VIjt9jFb7GmU1sygOhflPcT/SnJlc0HUkg9K7UqeFePOn77hjyRsgBD28nXArTnRDR8NAQeBRXfxmwEkJ7yOE4MkjL+e3C18mYugN2UUT84dycpcjWvlTiSfHk85TR12BjNZ/dGQ
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"plt.scatter(reduction[:, 0], reduction[:, 1], c = new_cluster)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Clustering avec la classe adequat"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {},
"outputs": [],
"source": [
"from iss.clustering import ClassicalClustering"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}