Adding in data with sentiment analysis
This commit is contained in:
@@ -0,0 +1,27 @@
|
||||
from textblob import TextBlob
|
||||
import pandas as pd
|
||||
import re
|
||||
import preprocessor as p
|
||||
import numpy as np
|
||||
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
|
||||
|
||||
df = pd.read_csv("./data/sts_gold_tweets.csv")
|
||||
df = df.sort_values(by=['polarity'])
|
||||
|
||||
reals = []
|
||||
preds = []
|
||||
|
||||
for index, row in df.iterrows():
|
||||
tweet = row["tweet.text"]
|
||||
# tweet = re.sub(r'^https?:\/\/.*[\r\n]*', '', tweet, flags=re.MULTILINE)
|
||||
tweet = p.clean(tweet)
|
||||
print(tweet, row["tweet.text"])
|
||||
tb = TextBlob(tweet)
|
||||
print(row["polarity"], 4 if tb.polarity > 0 else 0, tb.subjectivity)
|
||||
reals.append(row["polarity"])
|
||||
preds.append(4 if tb.polarity > 0 else 0)
|
||||
|
||||
# default accuracy is 72% - NICE!
|
||||
print(accuracy_score(reals, preds))
|
||||
print(confusion_matrix(reals, preds))
|
||||
print(classification_report(reals, preds))
|
||||
Reference in New Issue
Block a user