Adding in data with sentiment analysis

2020-05-05 13:06:46 -05:00
parent 72ad18d95d
commit fca9d9efa2
9 changed files with 2699 additions and 14 deletions
@@ -0,0 +1,27 @@
+from textblob import TextBlob
+import pandas as pd
+import re
+import preprocessor as p
+import numpy as np
+from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
+
+df = pd.read_csv("./data/sts_gold_tweets.csv")
+df = df.sort_values(by=['polarity'])
+
+reals = []
+preds = []
+
+for index, row in df.iterrows():
+    tweet = row["tweet.text"]
+    # tweet = re.sub(r'^https?:\/\/.*[\r\n]*', '', tweet, flags=re.MULTILINE)
+    tweet = p.clean(tweet)
+    print(tweet, row["tweet.text"])
+    tb = TextBlob(tweet)
+    print(row["polarity"], 4 if tb.polarity > 0 else 0, tb.subjectivity)
+    reals.append(row["polarity"])
+    preds.append(4 if tb.polarity > 0 else 0)
+
+# default accuracy is 72% - NICE!
+print(accuracy_score(reals, preds))
+print(confusion_matrix(reals, preds))
+print(classification_report(reals, preds))