Files
kano-wand-smarthome/train_wand_model.py
T
2021-03-05 16:18:12 -06:00

77 lines
2.2 KiB
Python

from glob import glob
import numpy as np
import scipy as sp
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, confusion_matrix
import os
from sklearn.ensemble import RandomForestClassifier
og_data_columns = 4
def apply_k_means_clustering(data, kmeans):
hist = np.zeros((len(kmeans.cluster_centers_),))
for x in kmeans.predict(data):
hist[x]+=1
print(hist)
return hist
def quantize_flattened_data(data, length):
# print("quantizing the data")
# print(data)
quantile_length = length*og_data_columns
rows = int(len(data)/quantile_length)
return np.resize(data, (rows, quantile_length))
def load_data(data_dir):
data = []
for root, dirs, files in os.walk(data_dir):
if root is data_dir:
continue
for file in files:
data.append([os.path.basename(root), pd.read_csv(os.path.join(root, file)).values.flatten()])
return pd.DataFrame(data, columns=["exercise", "flat_data"])
train_data = load_data("./train")
test_data = load_data("./test")
# val_data = load_data("./val")
quant_len = 3
train_data["quantized_data"] = train_data["flat_data"].apply(lambda x : quantize_flattened_data(x, quant_len))
test_data["quantized_data"] = test_data["flat_data"].apply(lambda x : quantize_flattened_data(x, quant_len))
linked_data = train_data["quantized_data"][0]
for x in train_data["quantized_data"]:
linked_data = np.append(linked_data, x, axis=0)
print(linked_data)
k_means_model = KMeans(n_clusters=25, n_init=15, max_iter= 500).fit(linked_data)
print(k_means_model.cluster_centers_)
train_data["histogram"] = train_data["quantized_data"].apply(lambda x: apply_k_means_clustering(x, k_means_model))
test_data["histogram"] = test_data["quantized_data"].apply(lambda x: apply_k_means_clustering(x, k_means_model))
print(train_data)
clf = RandomForestClassifier(n_estimators=50)
print(train_data["histogram"].values)
clf.fit([x for x in train_data["histogram"]], train_data["exercise"].values)
results = clf.predict([x for x in test_data["histogram"]])
print(results)
print(test_data["exercise"].values)
print(accuracy_score( test_data["exercise"].values,results ))
print(confusion_matrix( test_data["exercise"].values,results))