diff --git a/README.md b/README.md new file mode 100644 index 0000000..6e361ba --- /dev/null +++ b/README.md @@ -0,0 +1,15 @@ +## Twitter scraper +Scrape user's tweets :D + +### Usage: + +`tweets = TweetsScraper().get_tweets_anonymous("")` + +returns a list of tweets from the user as viewed from a logged-out session. Will only return 100 tweets (not necessarily the most recent) + +`tweets = TweetsScraper().get_tweets("")` + +not implemented yet, will get tweets as a logged in user + +### Tweet object +Contains the text of the tweet, along with the timestamp and some stats (like count, repost count, views, etc) diff --git a/scraper.py b/scraper.py index e44a835..2eecc7b 100644 --- a/scraper.py +++ b/scraper.py @@ -86,6 +86,46 @@ class TweetsScraper: entries = [i for i in res_json['data']['user']['result']['timeline_v2']['timeline']['instructions'] if i['type'] == "TimelineAddEntries"][0]['entries'] return [Tweet(entry) for entry in entries if "tweet" in entry['entryId']] + def get_tweets(self, user, count=100): + return + # self._get_guest_token() + # user_id = self._get_user_by_screen_name(user) + + + tweets = [] + variables = { + "userId": user, + "count": min(count, 100), + "includePromotedContent": True, + "withQuickPromoteEligibilityTweetFields": True, + "withVoice": True, + "withV2Timeline": True + } + last_len = 0 + + while len(tweets) < count: + res = requests.get(self._GET_TWEETS_URL, params={"variables": json.dumps(variables, separators=(',', ':')), "features": self._FEATURES_USER_TWEETS, "fieldToggles": self._FIELD_TOGGLES_USER_TWEETS}, headers=self._HEADERS) + + res_json = None + try: + res_json = json.loads(zstd.decompress(res.content)) + except: + res_json = json.loads(res.text) + + entries = [i for i in res_json['data']['user']['result']['timeline_v2']['timeline']['instructions'] if i['type'] == "TimelineAddEntries"][0]['entries'] + tweets.extend([Tweet(entry) for entry in entries if "tweet" in entry['entryId']]) + # variables['cursor'] = [entry for entry in entries if "cursor-bottom" in entry['entryId']][0]['content']['value'] + + break + + if len(tweets) == last_len: + break + + print(f"Got {len(tweets)} tweets") + last_len = len(tweets) + + return tweets + class Tweet(): def __init__(self, tweet_object): tweet = tweet_object['content']['itemContent']['tweet_results']['result'] @@ -109,7 +149,7 @@ class Tweet(): if __name__ == "__main__": - tweets = TweetsScraper().get_tweets_anonymous("1279948441968246785") # pobnellion + user_tweets = TweetsScraper().get_tweets_anonymous("1279948441968246785") # pobnellion - for t in tweets: + for t in user_tweets: print(t) \ No newline at end of file