From 2a413b6fcb2a7c3bce9dacb2f9fca32dcaf06991 Mon Sep 17 00:00:00 2001 From: Lucas Oskorep Date: Wed, 29 Apr 2020 15:43:14 -0500 Subject: [PATCH] Adding in script to fetch tweets --- ColletTweets.py | 73 +++++++++++++++++++++++++++++++++++++++++++++++++ TrainModel.py | 1 + 2 files changed, 74 insertions(+) create mode 100644 ColletTweets.py diff --git a/ColletTweets.py b/ColletTweets.py new file mode 100644 index 0000000..fddd19b --- /dev/null +++ b/ColletTweets.py @@ -0,0 +1,73 @@ +# import twitter as tw +import os +import csv +import tweepy +import datetime + +from pprint import pprint +from dotenv import load_dotenv + +load_dotenv() + +consumer_key = os.getenv('CKEY', '0') +consumer_secret = os.getenv('CSECRET', '0') +access_token = os.getenv('ATOKEN', '0') +access_token_secret = os.getenv('ASECRET', '0') + +auth = tweepy.OAuthHandler(consumer_key, consumer_secret) +auth.set_access_token(access_token, access_token_secret) +api = tweepy.API(auth, wait_on_rate_limit=True) + + +def save_hashtag_to_csv(hashtag, days, file): + csvFile = open(file, 'w', newline='', encoding='utf8') + csvWriter = csv.writer(csvFile) + print(os.path.getsize(file)) + if os.path.getsize(file) ==0: + csvWriter.writerow( + [ + "tweet.id", + "tweet.created_at", + "tweet.text", + "tweet.user.name", + "tweet.user.screen_name", + "tweet.user.location", + "tweet.user.id", + "tweet.user.followers_count", + "tweet.user.friends_count", + "tweet.user.favourites_count", + "tweet.user.description" + ] + ) + csvFile.flush() + print("CSVWRITER WROTE THE ROW") + base = datetime.date.today()+ datetime.timedelta(days=1) + for day in [base - datetime.timedelta(days=x) for x in range(days)]: + print(str(day), str(day - datetime.timedelta(days=1))) + for tweet in tweepy.Cursor( + api.search, + q=hashtag, + lang="en", + until=str(day), + since=str(day- datetime.timedelta(days=1)), + result_type= "mixed").items(100): + print(tweet.created_at, tweet.text) + csvWriter.writerow( + [ + tweet.id, + tweet.created_at, + tweet.text.replace('\n', ' ').replace('\r', ''), + tweet.user.name, + tweet.user.screen_name, + tweet.user.location, + tweet.user.id, + tweet.user.followers_count, + tweet.user.friends_count, + tweet.user.favourites_count, + tweet.user.description.replace('\n', ' ').replace('\r', '') + ] + ) + +save_hashtag_to_csv("#covid", 7, "data/covid.csv") +save_hashtag_to_csv("#quarantine", 7, "data/quarantine.csv") + diff --git a/TrainModel.py b/TrainModel.py index a73f098..3b9289f 100644 --- a/TrainModel.py +++ b/TrainModel.py @@ -3,3 +3,4 @@ import nltk as nltk import numpy as np import pandas as pd import twitter as tw +from xml import sax \ No newline at end of file