add readme
This commit is contained in:
15
README.md
Normal file
15
README.md
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
## Twitter scraper
|
||||||
|
Scrape user's tweets :D
|
||||||
|
|
||||||
|
### Usage:
|
||||||
|
|
||||||
|
`tweets = TweetsScraper().get_tweets_anonymous("<user_id>")`
|
||||||
|
|
||||||
|
returns a list of tweets from the user as viewed from a logged-out session. Will only return 100 tweets (not necessarily the most recent)
|
||||||
|
|
||||||
|
`tweets = TweetsScraper().get_tweets("<user_id>")`
|
||||||
|
|
||||||
|
not implemented yet, will get tweets as a logged in user
|
||||||
|
|
||||||
|
### Tweet object
|
||||||
|
Contains the text of the tweet, along with the timestamp and some stats (like count, repost count, views, etc)
|
||||||
44
scraper.py
44
scraper.py
@ -86,6 +86,46 @@ class TweetsScraper:
|
|||||||
entries = [i for i in res_json['data']['user']['result']['timeline_v2']['timeline']['instructions'] if i['type'] == "TimelineAddEntries"][0]['entries']
|
entries = [i for i in res_json['data']['user']['result']['timeline_v2']['timeline']['instructions'] if i['type'] == "TimelineAddEntries"][0]['entries']
|
||||||
return [Tweet(entry) for entry in entries if "tweet" in entry['entryId']]
|
return [Tweet(entry) for entry in entries if "tweet" in entry['entryId']]
|
||||||
|
|
||||||
|
def get_tweets(self, user, count=100):
|
||||||
|
return
|
||||||
|
# self._get_guest_token()
|
||||||
|
# user_id = self._get_user_by_screen_name(user)
|
||||||
|
|
||||||
|
|
||||||
|
tweets = []
|
||||||
|
variables = {
|
||||||
|
"userId": user,
|
||||||
|
"count": min(count, 100),
|
||||||
|
"includePromotedContent": True,
|
||||||
|
"withQuickPromoteEligibilityTweetFields": True,
|
||||||
|
"withVoice": True,
|
||||||
|
"withV2Timeline": True
|
||||||
|
}
|
||||||
|
last_len = 0
|
||||||
|
|
||||||
|
while len(tweets) < count:
|
||||||
|
res = requests.get(self._GET_TWEETS_URL, params={"variables": json.dumps(variables, separators=(',', ':')), "features": self._FEATURES_USER_TWEETS, "fieldToggles": self._FIELD_TOGGLES_USER_TWEETS}, headers=self._HEADERS)
|
||||||
|
|
||||||
|
res_json = None
|
||||||
|
try:
|
||||||
|
res_json = json.loads(zstd.decompress(res.content))
|
||||||
|
except:
|
||||||
|
res_json = json.loads(res.text)
|
||||||
|
|
||||||
|
entries = [i for i in res_json['data']['user']['result']['timeline_v2']['timeline']['instructions'] if i['type'] == "TimelineAddEntries"][0]['entries']
|
||||||
|
tweets.extend([Tweet(entry) for entry in entries if "tweet" in entry['entryId']])
|
||||||
|
# variables['cursor'] = [entry for entry in entries if "cursor-bottom" in entry['entryId']][0]['content']['value']
|
||||||
|
|
||||||
|
break
|
||||||
|
|
||||||
|
if len(tweets) == last_len:
|
||||||
|
break
|
||||||
|
|
||||||
|
print(f"Got {len(tweets)} tweets")
|
||||||
|
last_len = len(tweets)
|
||||||
|
|
||||||
|
return tweets
|
||||||
|
|
||||||
class Tweet():
|
class Tweet():
|
||||||
def __init__(self, tweet_object):
|
def __init__(self, tweet_object):
|
||||||
tweet = tweet_object['content']['itemContent']['tweet_results']['result']
|
tweet = tweet_object['content']['itemContent']['tweet_results']['result']
|
||||||
@ -109,7 +149,7 @@ class Tweet():
|
|||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
tweets = TweetsScraper().get_tweets_anonymous("1279948441968246785") # pobnellion
|
user_tweets = TweetsScraper().get_tweets_anonymous("1279948441968246785") # pobnellion
|
||||||
|
|
||||||
for t in tweets:
|
for t in user_tweets:
|
||||||
print(t)
|
print(t)
|
||||||
Reference in New Issue
Block a user