diff --git a/.gitignore b/.gitignore index c5c3717..a4200ed 100644 --- a/.gitignore +++ b/.gitignore @@ -201,3 +201,5 @@ data train test val + +.idea \ No newline at end of file diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..5a6c691 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "kano-wand-async-python"] + path = kano-wand-async-python + url = git@personalid:lucasoskorep/kano-wand-async-python.git diff --git a/kano-wand-async-python b/kano-wand-async-python new file mode 160000 index 0000000..ffbe092 --- /dev/null +++ b/kano-wand-async-python @@ -0,0 +1 @@ +Subproject commit ffbe0925062e35afc008cf043d55cb536df6f0e2 diff --git a/kano-wand-test.py b/kano-wand-test.py index 62279c4..931e4d1 100644 --- a/kano-wand-test.py +++ b/kano-wand-test.py @@ -1,8 +1,12 @@ -from kanowandasync import Shop, Wand -from kanowandasync.constants import * +# from kanowandasync import Shop, Wand +# from kanowandasync.constants import * + import asyncio # Custom wand class extending the default wand +from kanowandasync import Shop, PATTERN, Wand + + class MyWand(Wand): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) diff --git a/live_wand_model_demo.py b/live_wand_model_demo.py new file mode 100644 index 0000000..e69de29 diff --git a/train-test-split.py b/train-test-split.py index f0c96e0..5a60373 100644 --- a/train-test-split.py +++ b/train-test-split.py @@ -6,9 +6,9 @@ import multiprocessing train_dir = "./train/" test_dir = "./test/" val_dir = "./val/" -train = .80 -test = .15 -val = .05 +train = .70 +test = .30 +val = .00 def add_train_data(file, filename, label): @@ -62,7 +62,7 @@ def test_split_file(file_root): file = file_root[1] # print(file) - if file is ".DS_Store": + if file == ".DS_Store": return c = random() diff --git a/train_wand_model.py b/train_wand_model.py new file mode 100644 index 0000000..ff66e0d --- /dev/null +++ b/train_wand_model.py @@ -0,0 +1,76 @@ +from glob import glob +import numpy as np +import scipy as sp +import pandas as pd + + + +from sklearn.cluster import KMeans +from sklearn.metrics import accuracy_score, confusion_matrix + +import os + +from sklearn.ensemble import RandomForestClassifier + +og_data_columns = 4 + + +def apply_k_means_clustering(data, kmeans): + hist = np.zeros((len(kmeans.cluster_centers_),)) + for x in kmeans.predict(data): + hist[x]+=1 + print(hist) + return hist + +def quantize_flattened_data(data, length): + # print("quantizing the data") + # print(data) + quantile_length = length*og_data_columns + rows = int(len(data)/quantile_length) + return np.resize(data, (rows, quantile_length)) + +def load_data(data_dir): + data = [] + for root, dirs, files in os.walk(data_dir): + if root is data_dir: + continue + for file in files: + data.append([os.path.basename(root), pd.read_csv(os.path.join(root, file)).values.flatten()]) + return pd.DataFrame(data, columns=["exercise", "flat_data"]) + +train_data = load_data("./train") +test_data = load_data("./test") +# val_data = load_data("./val") + +quant_len = 3 + +train_data["quantized_data"] = train_data["flat_data"].apply(lambda x : quantize_flattened_data(x, quant_len)) +test_data["quantized_data"] = test_data["flat_data"].apply(lambda x : quantize_flattened_data(x, quant_len)) + + +linked_data = train_data["quantized_data"][0] + +for x in train_data["quantized_data"]: + linked_data = np.append(linked_data, x, axis=0) + +print(linked_data) + +k_means_model = KMeans(n_clusters=25, n_init=15, max_iter= 500).fit(linked_data) + +print(k_means_model.cluster_centers_) + +train_data["histogram"] = train_data["quantized_data"].apply(lambda x: apply_k_means_clustering(x, k_means_model)) +test_data["histogram"] = test_data["quantized_data"].apply(lambda x: apply_k_means_clustering(x, k_means_model)) +print(train_data) + +clf = RandomForestClassifier(n_estimators=50) +print(train_data["histogram"].values) + +clf.fit([x for x in train_data["histogram"]], train_data["exercise"].values) +results = clf.predict([x for x in test_data["histogram"]]) +print(results) +print(test_data["exercise"].values) + +print(accuracy_score( test_data["exercise"].values,results )) +print(confusion_matrix( test_data["exercise"].values,results)) +