From cc3498af87d7df711ee42490cbac01d5831ac4ae Mon Sep 17 00:00:00 2001 From: minamotorin <76122224+minamotorin@users.noreply.github.com> Date: Sun, 2 Jan 2022 15:00:44 +0000 Subject: [PATCH 1/2] Fix RefreshTokenException('Could not find the Guest token in HTML') ABOUT THE PROBLEM This problem has recently begun to occur on some environments. This doesn't happen every time, so if you are lucky, you don't get the error. The cause is literally literally that twint could not find the Guest token in HTML. Actually, sometimes token isn't included in HTML recently. #!/usr/bin/env python3 # This program is WTFPL. import requests res = requests.get('https://twitter.com') print(res.text.split('\n')[-1]) twint require the result of running the above code is })();. However, sometimes the result is only })(); and missing the Guest token. ABOUT THE SOLUTION In this patch, twint get the Guest token from https://api.twitter.com/1.1/guest/activate.json if could not find the one. The author referred to the code of gallery-dl: https://github.com/mikf/gallery-dl/blob/47eae4c393f09937a5dbcc2cb978702fb173e747/gallery_dl/extractor/twitter.py#L780-L783 Author's note: > I don't understand session of requests, so the code may be not good. > I hope someone rewrite the patch better and create a pull request. This commit was adopted from: https://github.com/twintproject/twint/issues/1320#issuecomment-1003094346 Closes https://github.com/twintproject/twint/issues/1320. --- twint/token.py | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/twint/token.py b/twint/token.py index ae66a24a..2eedcee4 100644 --- a/twint/token.py +++ b/twint/token.py @@ -65,5 +65,30 @@ def refresh(self): logme.debug('Found guest token in HTML') self.config.Guest_token = str(match.group(1)) else: - self.config.Guest_token = None - raise RefreshTokenException('Could not find the Guest token in HTML') + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:78.0) Gecko/20100101 Firefox/78.0', + 'authority': 'api.twitter.com', + 'content-length': '0', + 'authorization': self.config.Bearer_token, + 'x-twitter-client-language': 'en', + 'x-csrf-token': res.cookies.get("ct0"), + 'x-twitter-active-user': 'yes', + 'content-type': 'application/x-www-form-urlencoded', + 'accept': '*/*', + 'sec-gpc': '1', + 'origin': 'https://twitter.com', + 'sec-fetch-site': 'same-site', + 'sec-fetch-mode': 'cors', + 'sec-fetch-dest': 'empty', + 'referer': 'https://twitter.com/', + 'accept-language': 'en-US', + } + self._session.headers.update(headers) + req = self._session.prepare_request(requests.Request('POST', 'https://api.twitter.com/1.1/guest/activate.json')) + res = self._session.send(req, allow_redirects=True, timeout=self._timeout) + match = re.search(r'{"guest_token":"(\d+)"}', res.text) + if match: + self.config.Guest_token = str(match.group(1)) + else: + self.config.Guest_token = None + raise RefreshTokenException('Could not find the Guest token in HTML') From 62a1b7ccbe689a3e4827e31f822fb3f901477ac4 Mon Sep 17 00:00:00 2001 From: minamotorin <76122224+minamotorin@users.noreply.github.com> Date: Sun, 2 Jan 2022 18:46:28 +0100 Subject: [PATCH 2/2] Update log and error messages via https://github.com/twintproject/twint/issues/1320#issuecomment-1003744905 --- twint/token.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/twint/token.py b/twint/token.py index 2eedcee4..5211e44a 100644 --- a/twint/token.py +++ b/twint/token.py @@ -9,11 +9,11 @@ class TokenExpiryException(Exception): def __init__(self, msg): super().__init__(msg) - + class RefreshTokenException(Exception): def __init__(self, msg): super().__init__(msg) - + class Token: def __init__(self, config): @@ -88,7 +88,8 @@ def refresh(self): res = self._session.send(req, allow_redirects=True, timeout=self._timeout) match = re.search(r'{"guest_token":"(\d+)"}', res.text) if match: + logme.debug('Found guest token in JSON') self.config.Guest_token = str(match.group(1)) else: self.config.Guest_token = None - raise RefreshTokenException('Could not find the Guest token in HTML') + raise RefreshTokenException('Could not find the Guest token in JSON')