diff --git a/.DS_Store b/.DS_Store new file mode 100644 index 0000000..e859bb2 Binary files /dev/null and b/.DS_Store differ diff --git a/requirements.txt b/requirements.txt index e4f1f2e..3486c35 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ pandas==0.25.0 spacy==2.2.4 numpy==1.16.4 python_twitter==3.5 +tweepy==3.8.0 \ No newline at end of file diff --git a/twitter_credentials.py b/twitter_credentials.py new file mode 100644 index 0000000..2051e67 --- /dev/null +++ b/twitter_credentials.py @@ -0,0 +1,5 @@ +# Variables that contains the user credentials to access Twitter API +ACCESS_TOKEN = "" +ACCESS_TOKEN_SECRET = "" +CONSUMER_KEY = "" +CONSUMER_SECRET = "" diff --git a/twitter_data.py b/twitter_data.py new file mode 100644 index 0000000..725f740 --- /dev/null +++ b/twitter_data.py @@ -0,0 +1,114 @@ +from tweepy import API +from tweepy import Cursor +from tweepy.streaming import StreamListener +from tweepy import OAuthHandler +from tweepy import Stream + +import twitter_credentials +import numpy as np +import pandas as pd + + +# # # # TWITTER CLIENT # # # # +class TwitterClient(): + def __init__(self, twitter_user=None): + self.auth = TwitterAuthenticator().authenticate_twitter_app() + self.twitter_client = API(self.auth) + + self.twitter_user = twitter_user + + def get_twitter_client_api(self): + return self.twitter_client + + def get_tweets(self, hash_tag, num_tweets): + tweets = [] + for tweet in Cursor(self.twitter_client.search, q=hash_tag).items(num_tweets): + tweets.append(tweet) + return tweets + + +# # # # TWITTER AUTHENTICATER # # # # +class TwitterAuthenticator(): + + def authenticate_twitter_app(self): + auth = OAuthHandler(twitter_credentials.CONSUMER_KEY, twitter_credentials.CONSUMER_SECRET) + auth.set_access_token(twitter_credentials.ACCESS_TOKEN, twitter_credentials.ACCESS_TOKEN_SECRET) + return auth + +class TweetAnalyzer(): + """ + Functionality for analyzing and categorizing content from tweets. + """ + def tweets_to_data_frame(self, tweets): + df = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweets']) + + df['id'] = np.array([tweet.id for tweet in tweets]) + df['geo'] = np.array([tweet.geo for tweet in tweets]) + df['date'] = np.array([tweet.created_at for tweet in tweets]) + df['len_of_tweet'] = np.array([len(tweet.text) for tweet in tweets]) + df['likes'] = np.array([tweet.favorite_count for tweet in tweets]) + df['retweets'] = np.array([tweet.retweet_count for tweet in tweets]) + df['source'] = np.array([tweet.source for tweet in tweets]) + + return df + + +# # # # TWITTER STREAMER # # # # +class TwitterStreamer(): + """ + Unused ATM + Class for streaming and processing live tweets. + """ + + def __init__(self): + self.twitter_autenticator = TwitterAuthenticator() + + def stream_tweets(self, fetched_tweets_filename, hash_tag_list): + # This handles Twitter authetification and the connection to Twitter Streaming API + listener = TwitterListener(fetched_tweets_filename) + auth = self.twitter_autenticator.authenticate_twitter_app() + stream = Stream(auth, listener) + + # This line filter Twitter Streams to capture data by the keywords: + stream.filter(track=hash_tag_list) + + +# # # # TWITTER STREAM LISTENER # # # # +class TwitterListener(StreamListener): + """ + Unused ATM + This is a basic listener that just prints received tweets to a fetched_tweets_filename. + """ + + def __init__(self, fetched_tweets_filename): + self.fetched_tweets_filename = fetched_tweets_filename + + def on_data(self, data): + try: + print(data) + with open(self.fetched_tweets_filename, 'a') as tf: + tf.write(data) + return True + except BaseException as e: + print("Error on_data %s" % str(e)) + return True + + def on_error(self, status): + if status == 420: + # Returning False on_data method in case rate limit occurs. + return False + print(status) + +if __name__ == '__main__': + + twitter_client = TwitterClient() + tweet_analyzer = TweetAnalyzer() + + api = twitter_client.get_twitter_client_api() + + tweets = twitter_client.get_tweets('COVID-19', 100) + + df = tweet_analyzer.tweets_to_data_frame(tweets) + + print(df.head(10)) +