add readme

This commit is contained in:
2025-01-21 01:42:47 +13:00
parent 75f7a28b60
commit 278d0a56c4
2 changed files with 57 additions and 2 deletions

15
README.md Normal file
View File

@ -0,0 +1,15 @@
## Twitter scraper
Scrape user's tweets :D
### Usage:
`tweets = TweetsScraper().get_tweets_anonymous("<user_id>")`
returns a list of tweets from the user as viewed from a logged-out session. Will only return 100 tweets (not necessarily the most recent)
`tweets = TweetsScraper().get_tweets("<user_id>")`
not implemented yet, will get tweets as a logged in user
### Tweet object
Contains the text of the tweet, along with the timestamp and some stats (like count, repost count, views, etc)

View File

@ -86,6 +86,46 @@ class TweetsScraper:
entries = [i for i in res_json['data']['user']['result']['timeline_v2']['timeline']['instructions'] if i['type'] == "TimelineAddEntries"][0]['entries']
return [Tweet(entry) for entry in entries if "tweet" in entry['entryId']]
def get_tweets(self, user, count=100):
return
# self._get_guest_token()
# user_id = self._get_user_by_screen_name(user)
tweets = []
variables = {
"userId": user,
"count": min(count, 100),
"includePromotedContent": True,
"withQuickPromoteEligibilityTweetFields": True,
"withVoice": True,
"withV2Timeline": True
}
last_len = 0
while len(tweets) < count:
res = requests.get(self._GET_TWEETS_URL, params={"variables": json.dumps(variables, separators=(',', ':')), "features": self._FEATURES_USER_TWEETS, "fieldToggles": self._FIELD_TOGGLES_USER_TWEETS}, headers=self._HEADERS)
res_json = None
try:
res_json = json.loads(zstd.decompress(res.content))
except:
res_json = json.loads(res.text)
entries = [i for i in res_json['data']['user']['result']['timeline_v2']['timeline']['instructions'] if i['type'] == "TimelineAddEntries"][0]['entries']
tweets.extend([Tweet(entry) for entry in entries if "tweet" in entry['entryId']])
# variables['cursor'] = [entry for entry in entries if "cursor-bottom" in entry['entryId']][0]['content']['value']
break
if len(tweets) == last_len:
break
print(f"Got {len(tweets)} tweets")
last_len = len(tweets)
return tweets
class Tweet():
def __init__(self, tweet_object):
tweet = tweet_object['content']['itemContent']['tweet_results']['result']
@ -109,7 +149,7 @@ class Tweet():
if __name__ == "__main__":
tweets = TweetsScraper().get_tweets_anonymous("1279948441968246785") # pobnellion
user_tweets = TweetsScraper().get_tweets_anonymous("1279948441968246785") # pobnellion
for t in tweets:
for t in user_tweets:
print(t)