import requests, re, json, pytz, zstd, dotenv, os from datetime import datetime class Tweet: def __init__(self, tweet_object): tweet = tweet_object['itemContent']['tweet_results']['result'] self.id = tweet['rest_id'] self.views = tweet['views']['count'] if "count" in tweet["views"].keys() else 0 self.text = tweet['legacy']['full_text'] self.likes = tweet['legacy']['favorite_count'] self.replies = tweet['legacy']['reply_count'] self.retweets = tweet['legacy']['retweet_count'] self.quotes = tweet['legacy']['quote_count'] self.date = datetime.strptime(tweet['legacy']['created_at'], "%a %b %d %H:%M:%S %z %Y").astimezone(pytz.utc) self.is_retweet = tweet['legacy']['retweeted'] self.is_quote = tweet['legacy']['is_quote_status'] self.user = User(tweet['core']['user_results']) def __repr__(self): return f"L:{self.likes} \tRT:{self.replies} \tQ:{self.quotes} \tR:{self.replies} \tV:{self.views} \t{self.text}" def __str__(self): return f"L:{self.likes} \tRT:{self.replies} \tQ:{self.quotes} \tR:{self.replies} \tV:{self.views}\t {self.date.isoformat()}\t{self.text}" def time_since_post(self): return datetime.now().astimezone(pytz.utc) - self.date class Conversation: def __init__(self, converation_object): self.items = [] for item in converation_object['content']['items']: self.items.append(Tweet(item['item'])) def __repr__(self): return f"Conversation - {len(self.items)} tweets" def __str__(self): return f"{repr(self)}{''.join([f"\n\t{str(tweet)}" for tweet in self.items])}" class User: def __init__(self, user_object): self.id = user_object['result']['rest_id'] self.handle = user_object['result']['legacy']['screen_name'] self.display_name = user_object['result']['legacy']['name'] self.description = user_object['result']['legacy']['description'] self.join_date = user_object['result']['legacy']['created_at'] self.location = user_object['result']['legacy']['location'] self.tweets_count = user_object['result']['legacy']['statuses_count'] self.blue_verified = user_object['result']['is_blue_verified'] self.follower_count = user_object['result']['legacy']['normal_followers_count'] def __str__(self): return f"{self.display_name} - @{self.handle}" def __repr__(self): return f"{self.display_name} - @{self.handle} (Id {self.id})" class TweetsScraper: _GET_ID_BY_SCREEN_NAME_URL = "https://x.com/i/api/graphql/32pL5BWe9WKeSK1MoPvFQQ/UserByScreenName" # public non-logged-in access token (same for everyone, doesn't expire) _PUBLIC_AUTHORIZATION_TOKEN = 'AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA' _COMMON_HEADERS = { "Accept": "*/*", "Accept-Encoding": "gzip, deflate, br, zstd", "Accept-Language": "en-US,en;q=0.5", "authorization": f"Bearer {_PUBLIC_AUTHORIZATION_TOKEN}", "Cache-Control": "no-cache", "Connection": "keep-alive", "content-type": "application/json", "Pragma": "no-cache", "Sec-GPC": "1", "TE": "trailers", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0", } _FEATURES_USER_TWEETS = '{"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"responsive_web_graphql_exclude_directive_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":false,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"rweb_video_timestamps_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":false,"responsive_web_enhance_cards_enabled":false}' _FEATURES_USER_TWEETS_AND_REPLIES = '{"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"responsive_web_graphql_exclude_directive_enabled":true,"verified_phone_label_enabled":false,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_timeline_navigation_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"premium_content_api_read_enabled":false,"communities_web_enable_tweet_community_results_fetch":true,"c9s_tweet_anatomy_moderator_badge_enabled":true,"responsive_web_grok_analyze_button_fetch_trends_enabled":false,"responsive_web_grok_analyze_post_followups_enabled":true,"responsive_web_jetfuel_frame":false,"responsive_web_grok_share_attachment_enabled":true,"articles_preview_enabled":true,"responsive_web_edit_tweet_api_enabled":true,"graphql_is_translatable_rweb_tweet_is_translatable_enabled":true,"view_counts_everywhere_api_enabled":true,"longform_notetweets_consumption_enabled":true,"responsive_web_twitter_article_tweet_consumption_enabled":true,"tweet_awards_web_tipping_enabled":false,"responsive_web_grok_analysis_button_from_backend":false,"creator_subscriptions_quote_tweet_preview_enabled":false,"freedom_of_speech_not_reach_fetch_enabled":true,"standardized_nudges_misinfo":true,"tweet_with_visibility_results_prefer_gql_limited_actions_policy_enabled":true,"rweb_video_timestamps_enabled":true,"longform_notetweets_rich_text_read_enabled":true,"longform_notetweets_inline_media_enabled":true,"responsive_web_grok_image_annotation_enabled":true,"responsive_web_enhance_cards_enabled":false}' _FIELD_TOGGLES_USER_TWEETS = '{"withArticlePlainText":false}' _FEATURES_SCREEN_NAME = '{"hidden_profile_subscriptions_enabled":true,"profile_label_improvements_pcf_label_in_post_enabled":true,"rweb_tipjar_consumption_enabled":true,"responsive_web_graphql_exclude_directive_enabled":true,"verified_phone_label_enabled":false,"subscriptions_verification_info_is_identity_verified_enabled":true,"subscriptions_verification_info_verified_since_enabled":true,"highlights_tweets_tab_ui_enabled":true,"responsive_web_twitter_article_notes_tab_enabled":true,"subscriptions_feature_can_gift_premium":true,"creator_subscriptions_tweet_preview_api_enabled":true,"responsive_web_graphql_skip_user_profile_image_extensions_enabled":false,"responsive_web_graphql_timeline_navigation_enabled":true}' _FIELD_TOGGLES_SCREEN_NAME = '{"withAuxiliaryUserLabels":false}' def __init__(self, auth_token: str = None, csrf_token: str = None): self._session = requests.Session() self._session.headers.update(self._COMMON_HEADERS) if auth_token is not None and csrf_token is not None: self._session.cookies.set("auth_token", auth_token) self._session.cookies.set("ct0", csrf_token) self._session.headers.update({ "x-csrf-token": csrf_token }) def _get_guest_token(self): # Get guest token from x.com request if "x-guest-token" in self._session.headers.keys(): return # Different headers necessary so we dont get a 400 response headers = { "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, br, zstd", "Accept-Language": "en-US,en;q=0.5", "Cache-Control": "no-cache", "Connection": "keep-alive", "Pragma": "no-cache", "Sec-Fetch-Dest": "document", "Sec-Fetch-Mode": "navigate", "Sec-Fetch-Site": "none", "Sec-Fetch-User": "?1", "Sec-GPC": "1", "Upgrade-Insecure-Requests": "1", "User-Agent": "Mozilla/5.0 (X11; Linux x86_64; rv:134.0) Gecko/20100101 Firefox/134.0", } res = requests.get("https://x.com/?mx=2", headers=headers) # find the guest token in the response self._session.headers.update({ "x-guest-token": res.text.split("gt=")[1].split(";")[0] }) def get_tweets_anonymous(self, user_id: str) -> list[Tweet]: self._get_guest_token() variables = { "userId": user_id, "count": 100, "includePromotedContent": True, "withQuickPromoteEligibilityTweetFields": True, "withVoice": True, "withV2Timeline": True } res = self._session.get(self._GET_TWEETS_URL, params={"variables": json.dumps(variables, separators=(',', ':')), "features": self._FEATURES_USER_TWEETS, "fieldToggles": self._FIELD_TOGGLES_USER_TWEETS}) res_json = None try: res_json = json.loads(zstd.decompress(res.content)) except: res_json = json.loads(res.text) entries = [i for i in res_json['data']['user']['result']['timeline_v2']['timeline']['instructions'] if i['type'] == "TimelineAddEntries"][0]['entries'] return [Tweet(entry) for entry in entries if "tweet" in entry['entryId']] def _get_tweets_internal(self, url: str, features: str, user_id: str, count: int=100) -> list[Tweet]: tweets = [] variables = { "userId": user_id, "count": min(count, 100), "includePromotedContent": True, "withQuickPromoteEligibilityTweetFields": True, "withVoice": True, "withV2Timeline": True } last_len = 0 while len(tweets) < count: res = self._session.get(url, params={"variables": json.dumps(variables, separators=(',', ':')), "features": features, "fieldToggles": self._FIELD_TOGGLES_USER_TWEETS}) res_json = None try: res_json = json.loads(zstd.decompress(res.content)) except: res_json = json.loads(res.text) entries = [i for i in res_json['data']['user']['result']['timeline_v2']['timeline']['instructions'] if i['type'] == "TimelineAddEntries"][0]['entries'] for entry in entries: if "tweet" in entry['entryId']: tweets.append(Tweet(entry['content'])) elif "profile-conversation" in entry['entryId']: tweets.append(Conversation(entry)) # get cursor for next page variables['cursor'] = [entry for entry in entries if "cursor-bottom" in entry['entryId']][0]['content']['value'] if len(tweets) == last_len: break print(f"Got {len(tweets)} tweets") last_len = len(tweets) return tweets def get_tweets_and_replies(self, user_id: str, count: int=100) -> list[Tweet]: return self._get_tweets_internal('https://x.com/i/api/graphql/zMHMBPv4hZcCNwWLf-XYuA/UserTweetsAndReplies', self._FEATURES_USER_TWEETS_AND_REPLIES, user_id, count) def get_tweets(self, user_id: str, count: int=100) -> list[Tweet]: return self._get_tweets_internal('https://x.com/i/api/graphql/MpOINUGH_YVb2BKjYZOPaQ/UserTweets', self._FEATURES_USER_TWEETS, user_id, count) def get_id_from_handle(self, handle: str) -> str: if handle.startswith('@'): handle = handle[1:] variables = { "screen_name": handle } res = self._session.get(self._GET_ID_BY_SCREEN_NAME_URL, params={"variables": json.dumps(variables, separators=(',', ':')), "features": self._FEATURES_SCREEN_NAME, "fieldToggles": self._FIELD_TOGGLES_SCREEN_NAME}) res_json = None try: res_json = json.loads(zstd.decompress(res.content)) except: res_json = json.loads(res.text) return res_json['data']['user']['result']['rest_id'] if __name__ == "__main__": dotenv.load_dotenv() auth_token = os.environ["AUTH_TOKEN"] csrf_token = os.environ["CSRF_TOKEN"] scraper = TweetsScraper(auth_token, csrf_token) user_id = scraper.get_id_from_handle("@pobnellion") user_tweets = scraper.get_tweets_and_replies(user_id, 100) for t in user_tweets: print(t)