added basic files for extracting twitter data
This commit is contained in:
+114
@@ -0,0 +1,114 @@
|
||||
from tweepy import API
|
||||
from tweepy import Cursor
|
||||
from tweepy.streaming import StreamListener
|
||||
from tweepy import OAuthHandler
|
||||
from tweepy import Stream
|
||||
|
||||
import twitter_credentials
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
|
||||
|
||||
# # # # TWITTER CLIENT # # # #
|
||||
class TwitterClient():
|
||||
def __init__(self, twitter_user=None):
|
||||
self.auth = TwitterAuthenticator().authenticate_twitter_app()
|
||||
self.twitter_client = API(self.auth)
|
||||
|
||||
self.twitter_user = twitter_user
|
||||
|
||||
def get_twitter_client_api(self):
|
||||
return self.twitter_client
|
||||
|
||||
def get_tweets(self, hash_tag, num_tweets):
|
||||
tweets = []
|
||||
for tweet in Cursor(self.twitter_client.search, q=hash_tag).items(num_tweets):
|
||||
tweets.append(tweet)
|
||||
return tweets
|
||||
|
||||
|
||||
# # # # TWITTER AUTHENTICATER # # # #
|
||||
class TwitterAuthenticator():
|
||||
|
||||
def authenticate_twitter_app(self):
|
||||
auth = OAuthHandler(twitter_credentials.CONSUMER_KEY, twitter_credentials.CONSUMER_SECRET)
|
||||
auth.set_access_token(twitter_credentials.ACCESS_TOKEN, twitter_credentials.ACCESS_TOKEN_SECRET)
|
||||
return auth
|
||||
|
||||
class TweetAnalyzer():
|
||||
"""
|
||||
Functionality for analyzing and categorizing content from tweets.
|
||||
"""
|
||||
def tweets_to_data_frame(self, tweets):
|
||||
df = pd.DataFrame(data=[tweet.text for tweet in tweets], columns=['Tweets'])
|
||||
|
||||
df['id'] = np.array([tweet.id for tweet in tweets])
|
||||
df['geo'] = np.array([tweet.geo for tweet in tweets])
|
||||
df['date'] = np.array([tweet.created_at for tweet in tweets])
|
||||
df['len_of_tweet'] = np.array([len(tweet.text) for tweet in tweets])
|
||||
df['likes'] = np.array([tweet.favorite_count for tweet in tweets])
|
||||
df['retweets'] = np.array([tweet.retweet_count for tweet in tweets])
|
||||
df['source'] = np.array([tweet.source for tweet in tweets])
|
||||
|
||||
return df
|
||||
|
||||
|
||||
# # # # TWITTER STREAMER # # # #
|
||||
class TwitterStreamer():
|
||||
"""
|
||||
Unused ATM
|
||||
Class for streaming and processing live tweets.
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.twitter_autenticator = TwitterAuthenticator()
|
||||
|
||||
def stream_tweets(self, fetched_tweets_filename, hash_tag_list):
|
||||
# This handles Twitter authetification and the connection to Twitter Streaming API
|
||||
listener = TwitterListener(fetched_tweets_filename)
|
||||
auth = self.twitter_autenticator.authenticate_twitter_app()
|
||||
stream = Stream(auth, listener)
|
||||
|
||||
# This line filter Twitter Streams to capture data by the keywords:
|
||||
stream.filter(track=hash_tag_list)
|
||||
|
||||
|
||||
# # # # TWITTER STREAM LISTENER # # # #
|
||||
class TwitterListener(StreamListener):
|
||||
"""
|
||||
Unused ATM
|
||||
This is a basic listener that just prints received tweets to a fetched_tweets_filename.
|
||||
"""
|
||||
|
||||
def __init__(self, fetched_tweets_filename):
|
||||
self.fetched_tweets_filename = fetched_tweets_filename
|
||||
|
||||
def on_data(self, data):
|
||||
try:
|
||||
print(data)
|
||||
with open(self.fetched_tweets_filename, 'a') as tf:
|
||||
tf.write(data)
|
||||
return True
|
||||
except BaseException as e:
|
||||
print("Error on_data %s" % str(e))
|
||||
return True
|
||||
|
||||
def on_error(self, status):
|
||||
if status == 420:
|
||||
# Returning False on_data method in case rate limit occurs.
|
||||
return False
|
||||
print(status)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
twitter_client = TwitterClient()
|
||||
tweet_analyzer = TweetAnalyzer()
|
||||
|
||||
api = twitter_client.get_twitter_client_api()
|
||||
|
||||
tweets = twitter_client.get_tweets('COVID-19', 100)
|
||||
|
||||
df = tweet_analyzer.tweets_to_data_frame(tweets)
|
||||
|
||||
print(df.head(10))
|
||||
|
||||
Reference in New Issue
Block a user