diff --git a/LICENSE.txt b/LICENSE.txt new file mode 100644 index 0000000..82b8b2b --- /dev/null +++ b/LICENSE.txt @@ -0,0 +1,120 @@ +GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for software. + By contrast, the GPL guarantees your freedom to share and change all versions. + + To protect your rights, we need to prevent others from denying you these rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License. + + Some devices are designed to deny users access to install or run modified versions. +This is fundamentally incompatible with the aim of protecting users' freedom to change software. + + Finally, every program is threatened constantly by software patents. +The GPL assures that patents cannot be used to render the program non-free. + + TERMS AND CONDITIONS + + 0. Definitions. + "This License" refers to version 3 of the GNU General Public License. + "Copyright" also means copyright-like laws that apply to other kinds of works. + "The Program" refers to any copyrightable work licensed under this License. + + 1. Source Code. + The "source code" for a work means the preferred form for making modifications. + + 2. Basic Permissions. + All rights granted under this License are granted for the term of copyright on the Program, + and are irrevocable provided the stated conditions are met. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + No covered work shall be deemed part of an effective technological measure. + + 4. Conveying Verbatim Copies. + You may convey verbatim copies of the Program's source code as you receive it. + + 5. Conveying Modified Source Versions. + You may convey a work based on the Program under section 4, provided that you also meet all conditions: + a) The work must carry prominent notices stating that you modified it. + b) You must license the entire work under this License. + + 6. Conveying Non-Source Forms. + You may convey a covered work in object code form under sections 4 and 5, provided that you also convey + the machine-readable Corresponding Source under the terms of this License. + + 7. Additional Terms. + "Additional permissions" are terms that supplement the terms of this License by making exceptions. + + 8. Termination. + You may not propagate or modify a covered work except as expressly provided under this License. + + 9. Acceptance Not Required for Having Copies. + You are not required to accept this License in order to receive or run a copy of the Program. + + 10. Automatic Licensing of Downstream Recipients. + Each time you convey a covered work, the recipient automatically receives a license from the original licensors. + + 11. Patents. + A "contributor" is a copyright holder who authorizes use under this License of the Program. + + 12. No Surrender of Others' Freedom. + If conditions are imposed on you that contradict the conditions of this License, they do not excuse you from the conditions. + + 13. Use with the GNU Affero General Public License. + You have permission to link or combine any covered work with a work licensed under version 3 of the GNU Affero General Public License. + + 14. Revised Versions of this License. + The Free Software Foundation may publish revised and/or new versions of the GNU General Public License from time to time. + + 15. Disclaimer of Warranty. + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. + + 16. Limitation of Liability. + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER BE LIABLE TO YOU FOR DAMAGES. + + 17. Interpretation of Sections 15 and 16. + If the disclaimer of warranty and limitation of liability provided above cannot be given local legal effect according to their terms, + reviewing courts shall apply local law that most closely approximates an absolute waiver of all civil liability. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest possible use to the public, +the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program: + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along with this program. If not, see . + +======================================================================== + +Copyright (C) YOUR NAME 2024 + +Licensed under the GNU General Public License Version 3 (GPL-3.0-or-later). +This program is free software: you can redistribute it and/or modify +it under the terms of the GPL v3 as published by the Free Software Foundation, +either version 3 of the License, or (at your option) any later version. + +See for full license text. diff --git a/test.json b/test.json new file mode 100644 index 0000000..e69de29 diff --git a/train_model.py b/train_model.py deleted file mode 100644 index 051feba..0000000 --- a/train_model.py +++ /dev/null @@ -1,231 +0,0 @@ -import numpy as np -import pandas as pd -from sklearn.cluster import KMeans -from sklearn.metrics import confusion_matrix -from sklearn.ensemble import RandomForestClassifier -from sklearn.naive_bayes import GaussianNB -import matplotlib.pyplot as plt -from sklearn import svm -from pprint import pprint -import os -import seaborn as sn - -def normalized(a, axis=-1, order=2): - return a / sum(a) - -def get_activities(dir): - activity_data_train = {} - activity_data_test = {} - i= 0 - for root, dirs, files in os.walk(dir): - - activity_name = root.split('\\')[1] - activity_data_train[activity_name] = [] - activity_data_test[activity_name] = [] - for i in range(int(.8 * len(files))): - data = pd.read_csv(os.path.join(root, files[i]), delimiter=' ').values.flatten() - activity_data_train[activity_name].append(data) - i+=1 - while i < len(files): - data = pd.read_csv(os.path.join(root, files[i]), delimiter=' ').values.flatten() - activity_data_test[activity_name].append(data) - i+=1 - - return activity_data_train, activity_data_test - -def quantize_data(data, vector_len): - del data[""] - print("quantizing", data) - print(vector_len) - quantized_data = {} - total = 0 - total2 = 0 - for key, value in data.items(): - - print(key, len(value)) - quantized_data[key] = {} - print(vector_len) - for i in range(len(value)): - total2+= len(value[i])/vector_len - while(len(value[i]) %vector_len != 0): - value[i] = value[i][:-1] - print(len(value[i]), vector_len) - new_data = np.split(value[i], len(value[i]) / vector_len) - # if len(new_data[len(new_data) - 1]) != vector_len: - # print(vector_len,len(new_data[len(new_data) - 1])) - # del new_data[len(new_data) - 1] - quantized_data[key][i] = new_data - total+= len(new_data) - - - print("Total is:", total, total2) - - return quantized_data - -lengths = [4] -# for i in range(3,4): -# lengths.append((i+1)*21) -data_train, data_test = get_activities('data\\') -print("----------------") - -X = np.array([[1, 2], [1, 4], [1, 0], [4, 2], [4, 4], [4, 0]]) - - # print(X) -clusters =[1000]# [600,600,600,1000,1000,1000,1400,1400,1400] -q_count = 0 -n_count = 0 -accuracies = [] -for quantize_length in lengths: - accuracies.append([]) - for num_clusters in clusters: - - qdata_test = quantize_data(data_test, quantize_length) - print("----------------") - qdata_train = quantize_data(data_train, quantize_length) - print("----------------") - #fill kmeans_data with all of the data of all of the exercises reguardless of what they are - k_means_data = None - - for exercise_name, exercise_data in qdata_train.items(): - for example_number, example_data in exercise_data.items(): - if k_means_data is None: - k_means_data = np.array(example_data) - else: - # print(k_means_data.shape) - # print( np.atleast_1d(part).shape) - # print( np.atleast_2d(part).shape) - k_means_data = np.concatenate((k_means_data,np.array(example_data)), axis=0) - - print(len(k_means_data[0])) - print(len(k_means_data[:,0])) - - - print("training the model") - kmeans = KMeans(n_clusters=num_clusters, n_init=15, max_iter= 500).fit(k_means_data) - - print("Done training model") - centers = kmeans.cluster_centers_ - #Time to run the training by again and make the histograms for each training data point. - classifier_training_X = [] - classifier_training_Y = [] - - # print("creating histograms for training") - for exercise_name, exercise_data in qdata_train.items(): - for example_number, example_data in exercise_data.items(): - center_histo = np.zeros((num_clusters,)) - center_indices = kmeans.predict(example_data) - for i in center_indices: - center_histo[i] += 1 - classifier_training_X.append(normalized(center_histo)) - classifier_training_Y.append(exercise_name) - - - # print("done") - - classifier_test_X = [] - classifier_test_Y = [] - # print("creating histograms for testing") - for exercise_name, exercise_data in qdata_test.items(): - for example_number, example_data in exercise_data.items(): - center_histo = np.zeros((num_clusters,)) - center_indices = kmeans.predict(example_data) - for i in center_indices: - center_histo[i] += 1 - classifier_test_X.append(normalized(center_histo)) - classifier_test_Y.append(exercise_name) - - # print(classifier_test_Y) - - print("done") - forest_average = [] - for g in range(1): - clf = RandomForestClassifier(n_estimators=(50)*2) - clf.fit(classifier_training_X, classifier_training_Y) - - results = clf.predict(classifier_test_X) - print(results) - i = 0 - # - # - con_mat = confusion_matrix(results, classifier_test_Y) - print(con_mat) - - np.savetxt("con_mat.csv", con_mat, '%5.0f', delimiter=",\t") - - wrong = 0 - right = 0 - for i in range(len(con_mat)): - for j in range(len(con_mat[0])): - if i != j: - wrong += con_mat[i][j] - else: - right += con_mat[i][j] - - # print(right/len(results)) - # print((g+1)*2) - forest_average.append(right/len(results)) - print("Random Forest Average Accuracy:") - print(sum(forest_average) / len(forest_average)) - print("SVM Average Accuracy:") - clf = svm.LinearSVC() - clf.fit(classifier_training_X, classifier_training_Y) - - results = clf.predict(classifier_test_X) - # print(results) - i = 0 - - - con_mat = confusion_matrix(results, classifier_test_Y) - # print(con_mat) - - wrong = 0 - right = 0 - for i in range(len(con_mat)): - for j in range(len(con_mat[0])): - if i != j: - wrong += con_mat[i][j] - else: - right += con_mat[i][j] - - print(right/len(results)) - - print("Gaussian Naive Bayes Average Accuracy:") - clf = GaussianNB() - clf.fit(classifier_training_X, classifier_training_Y) - - results = clf.predict(classifier_test_X) - # print(results) - i = 0 - - - con_mat = confusion_matrix(results, classifier_test_Y) - # print(con_mat) - - wrong = 0 - right = 0 - for i in range(len(con_mat)): - for j in range(len(con_mat[0])): - if i != j: - wrong += con_mat[i][j] - else: - right += con_mat[i][j] - - print(right/len(results)) - - accuracies[q_count].append(sum(forest_average) / len(forest_average)) - print(num_clusters) - print(quantize_length) - - n_count+=1 - n_count = 0 - q_count +=1 - -#Following code copied froms tack overflow for printing out the confusion matrix - -#convert the string into an array -df_cm = pd.DataFrame(con_mat, index = [i for i in "ABCDEFGHIJKLMN"], - columns = [i for i in "ABCDEFGHIJKLMN"]) -plt.figure(figsize = (10,7)) -sn.heatmap(df_cm, annot=True) -pprint(accuracies) -plt.show() \ No newline at end of file