from tweepy import API from tweepy import Cursor from tweepy.streaming import StreamListener from tweepy import OAuthHandler from tweepy import Stream import twitter_credentials import numpy as np import pandas as pd # # # # TWITTER CLIENT # # # # class TwitterClient(): def __init__(self, twitter_user=None): self.auth = TwitterAuthenticator().authenticate_twitter_app() self.twitter_client = API(self.auth) self.twitter_user = twitter_user def get_twitter_client_api(self): return self.twitter_client def get_tweets(self, hash_tag, num_tweets): tweets = [] for tweet in Cursor(self.twitter_client.search, q=hash_tag).items(num_tweets): tweets.append(tweet) return tweets # # # # TWITTER AUTHENTICATER # # # # class TwitterAuthenticator(): def authenticate_twitter_app(self): auth = OAuthHandler(twitter_credentials.CONSUMER_KEY, twitter_credentials.CONSUMER_SECRET) auth.set_access_token(twitter_credentials.ACCESS_TOKEN, twitter_credentials.ACCESS_TOKEN_SECRET) return auth class TweetAnalyzer(): """ Functionality for analyzing and categorizing content from tweets. """ def tweets_to_data_frame(self, tweets): df = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweets']) df['id'] = np.array([tweet.id for tweet in tweets]) df['geo'] = np.array([tweet.geo for tweet in tweets]) df['date'] = np.array([tweet.created_at for tweet in tweets]) df['len_of_tweet'] = np.array([len(tweet.text) for tweet in tweets]) df['likes'] = np.array([tweet.favorite_count for tweet in tweets]) df['retweets'] = np.array([tweet.retweet_count for tweet in tweets]) df['source'] = np.array([tweet.source for tweet in tweets]) return df # # # # TWITTER STREAMER # # # # class TwitterStreamer(): """ Unused ATM Class for streaming and processing live tweets. """ def __init__(self): self.twitter_autenticator = TwitterAuthenticator() def stream_tweets(self, fetched_tweets_filename, hash_tag_list): # This handles Twitter authetification and the connection to Twitter Streaming API listener = TwitterListener(fetched_tweets_filename) auth = self.twitter_autenticator.authenticate_twitter_app() stream = Stream(auth, listener) # This line filter Twitter Streams to capture data by the keywords: stream.filter(track=hash_tag_list) # # # # TWITTER STREAM LISTENER # # # # class TwitterListener(StreamListener): """ Unused ATM This is a basic listener that just prints received tweets to a fetched_tweets_filename. """ def __init__(self, fetched_tweets_filename): self.fetched_tweets_filename = fetched_tweets_filename def on_data(self, data): try: print(data) with open(self.fetched_tweets_filename, 'a') as tf: tf.write(data) return True except BaseException as e: print("Error on_data %s" % str(e)) return True def on_error(self, status): if status == 420: # Returning False on_data method in case rate limit occurs. return False print(status) if __name__ == '__main__': twitter_client = TwitterClient() tweet_analyzer = TweetAnalyzer() api = twitter_client.get_twitter_client_api() tweets = twitter_client.get_tweets('COVID-19', 100) df = tweet_analyzer.tweets_to_data_frame(tweets) print(df.head(10))