Add actual scraping
This commit is contained in:
parent
f28d600024
commit
28b11548bf
@ -43,5 +43,8 @@ tasks:
|
||||
cd twitter_offline_engine/scraper
|
||||
golangci-lint run
|
||||
|
||||
cd ../cmd
|
||||
golangci-lint run
|
||||
|
||||
duration=$SECONDS
|
||||
echo "Task completed in $(($duration / 60))m$(($duration % 60))s."
|
||||
|
57
cmd/fetch_tweet_to_stdout.go
Normal file
57
cmd/fetch_tweet_to_stdout.go
Normal file
@ -0,0 +1,57 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"fmt"
|
||||
"offline_twitter/scraper"
|
||||
// "time"
|
||||
"log"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const INCLUDE_REPLIES = true;
|
||||
|
||||
// input: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
|
||||
func parse_tweet(url string) (string, error) {
|
||||
parts := strings.Split(url, "/")
|
||||
if len(parts) != 6 {
|
||||
return "", fmt.Errorf("Tweet format isn't right (%d)", len(parts))
|
||||
}
|
||||
if parts[0] != "https:" || parts[1] != "" || parts[2] != "twitter.com" || parts[4] != "status" {
|
||||
return "", fmt.Errorf("Tweet format isn't right")
|
||||
}
|
||||
return parts[5], nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
log.Fatal("Must provide tweet! Exiting...")
|
||||
}
|
||||
|
||||
tweet_id, err := parse_tweet(os.Args[1])
|
||||
if err != nil {
|
||||
log.Fatal(err.Error())
|
||||
}
|
||||
|
||||
if INCLUDE_REPLIES {
|
||||
tweets, retweets, users, err := scraper.GetTweetFull(tweet_id)
|
||||
if err != nil {
|
||||
log.Fatal(err.Error())
|
||||
}
|
||||
for _, t := range tweets {
|
||||
fmt.Printf("%v\n", t)
|
||||
}
|
||||
for _, t := range retweets {
|
||||
fmt.Printf("%v\n", t)
|
||||
}
|
||||
for _, u := range users {
|
||||
fmt.Printf("%v\n", u)
|
||||
}
|
||||
} else {
|
||||
tweet, err := scraper.GetTweet(tweet_id)
|
||||
if err != nil {
|
||||
log.Fatal(err.Error())
|
||||
}
|
||||
fmt.Printf("%v\n", tweet)
|
||||
}
|
||||
}
|
57
cmd/fetch_tweet_to_stdout/main.go
Normal file
57
cmd/fetch_tweet_to_stdout/main.go
Normal file
@ -0,0 +1,57 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"fmt"
|
||||
"offline_twitter/scraper"
|
||||
// "time"
|
||||
"log"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const INCLUDE_REPLIES = true;
|
||||
|
||||
// input: e.g., "https://twitter.com/michaelmalice/status/1395882872729477131"
|
||||
func parse_tweet(url string) (string, error) {
|
||||
parts := strings.Split(url, "/")
|
||||
if len(parts) != 6 {
|
||||
return "", fmt.Errorf("Tweet format isn't right (%d)", len(parts))
|
||||
}
|
||||
if parts[0] != "https:" || parts[1] != "" || parts[2] != "twitter.com" || parts[4] != "status" {
|
||||
return "", fmt.Errorf("Tweet format isn't right")
|
||||
}
|
||||
return parts[5], nil
|
||||
}
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
log.Fatal("Must provide tweet! Exiting...")
|
||||
}
|
||||
|
||||
tweet_id, err := parse_tweet(os.Args[1])
|
||||
if err != nil {
|
||||
log.Fatal(err.Error())
|
||||
}
|
||||
|
||||
if INCLUDE_REPLIES {
|
||||
tweets, retweets, users, err := scraper.GetTweetFull(tweet_id)
|
||||
if err != nil {
|
||||
log.Fatal(err.Error())
|
||||
}
|
||||
for _, t := range tweets {
|
||||
fmt.Printf("%v\n", t)
|
||||
}
|
||||
for _, t := range retweets {
|
||||
fmt.Printf("%v\n", t)
|
||||
}
|
||||
for _, u := range users {
|
||||
fmt.Printf("%v\n", u)
|
||||
}
|
||||
} else {
|
||||
tweet, err := scraper.GetTweet(tweet_id)
|
||||
if err != nil {
|
||||
log.Fatal(err.Error())
|
||||
}
|
||||
fmt.Printf("%v\n", tweet)
|
||||
}
|
||||
}
|
96
cmd/fetch_user_feed_to_stdout.go
Normal file
96
cmd/fetch_user_feed_to_stdout.go
Normal file
@ -0,0 +1,96 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"fmt"
|
||||
"offline_twitter/scraper"
|
||||
"log"
|
||||
"sort"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
log.Fatal("Must provide a user handle! Exiting...")
|
||||
}
|
||||
handle := scraper.UserHandle(os.Args[1])
|
||||
|
||||
user, err := scraper.GetUser(handle)
|
||||
if err != nil {
|
||||
log.Fatal("Error getting user profile: " + err.Error())
|
||||
}
|
||||
|
||||
tweets, retweets, users, err := scraper.GetFeedFull(user.ID, 1)
|
||||
if err != nil {
|
||||
log.Fatal("Error getting user feed: " + err.Error())
|
||||
}
|
||||
|
||||
display_feed(user, tweets, retweets, users)
|
||||
|
||||
fmt.Printf("Got a total of %d tweets, %d retweets, from %d users\n", len(tweets), len(retweets), len(users))
|
||||
}
|
||||
|
||||
func display_feed(user scraper.User, tweets []scraper.Tweet, retweets []scraper.Retweet, users []scraper.User) {
|
||||
sort.Slice(tweets, func(i, j int) bool { return !tweets[i].PostedAt.Before(tweets[j].PostedAt) })
|
||||
tweet_map := make(map[scraper.TweetID]scraper.Tweet)
|
||||
for _, t := range tweets {
|
||||
tweet_map[t.ID] = t
|
||||
}
|
||||
|
||||
sort.Slice(retweets, func(i, j int) bool { return !retweets[i].RetweetedAt.Before(retweets[j].RetweetedAt) })
|
||||
users_dict := make(map[scraper.UserID]scraper.User)
|
||||
for _, u := range users {
|
||||
users_dict[u.ID] = u
|
||||
}
|
||||
|
||||
i := 0
|
||||
j := 0
|
||||
for {
|
||||
if i < len(tweets) && j < len(retweets) {
|
||||
if !tweets[i].PostedAt.Before(retweets[j].RetweetedAt) {
|
||||
tweet := tweets[i]
|
||||
if tweet.User != user.ID {
|
||||
i += 1
|
||||
continue
|
||||
}
|
||||
|
||||
user, ok := users_dict[tweet.User]
|
||||
if !ok {
|
||||
log.Fatalf("User not found: %q", tweet.User)
|
||||
}
|
||||
|
||||
print_tweet(tweets[i], user)
|
||||
i += 1
|
||||
} else {
|
||||
retweet := retweets[j]
|
||||
if retweet.RetweetedBy != user.ID {
|
||||
j += 1
|
||||
continue
|
||||
}
|
||||
tweet, ok := tweet_map[retweet.TweetID]
|
||||
if !ok {
|
||||
log.Fatalf("Tweet not found: %q", retweet.TweetID)
|
||||
}
|
||||
original_poster, ok := users_dict[tweet.User]
|
||||
if !ok {
|
||||
log.Fatalf("User not found: %q", tweet.User)
|
||||
}
|
||||
retweeter, ok := users_dict[retweet.RetweetedBy]
|
||||
if !ok {
|
||||
log.Fatalf("User not found: %q", retweet.RetweetedBy)
|
||||
}
|
||||
print_retweet(retweet, tweet, original_poster, retweeter)
|
||||
j += 1
|
||||
}
|
||||
} else {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func print_tweet(tweet scraper.Tweet, user scraper.User) {
|
||||
fmt.Printf("%s => %s\n Replies: %d Retweets: %d Likes: %d\n", user.DisplayName, tweet.Text, tweet.NumReplies, tweet.NumRetweets, tweet.NumLikes)
|
||||
}
|
||||
|
||||
func print_retweet(retweet scraper.Retweet, original_tweet scraper.Tweet, original_poster scraper.User, retweeter scraper.User) {
|
||||
fmt.Printf("%s [retweet] %s => %s\n Replies: %d Retweets: %d Likes: %d\n", retweeter.DisplayName, original_poster.DisplayName, original_tweet.Text, original_tweet.NumReplies, original_tweet.NumRetweets, original_tweet.NumLikes)
|
||||
}
|
128
cmd/fetch_user_feed_to_stdout/main.go
Normal file
128
cmd/fetch_user_feed_to_stdout/main.go
Normal file
@ -0,0 +1,128 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"fmt"
|
||||
"offline_twitter/scraper"
|
||||
"log"
|
||||
"sort"
|
||||
)
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
log.Fatal("Must provide a user handle! Exiting...")
|
||||
}
|
||||
handle := scraper.UserHandle(os.Args[1])
|
||||
|
||||
user, err := scraper.GetUser(handle)
|
||||
if err != nil {
|
||||
log.Fatal("Error getting user profile: " + err.Error())
|
||||
}
|
||||
|
||||
tweets, retweets, users, err := scraper.GetFeedFull(user.ID, 1)
|
||||
if err != nil {
|
||||
log.Fatal("Error getting user feed: " + err.Error())
|
||||
}
|
||||
|
||||
display_feed(user, tweets, retweets, users)
|
||||
|
||||
fmt.Printf("Got a total of %d tweets, %d retweets, from %d users\n", len(tweets), len(retweets), len(users))
|
||||
}
|
||||
|
||||
func display_feed(user scraper.User, tweets []scraper.Tweet, retweets []scraper.Retweet, users []scraper.User) {
|
||||
sort.Slice(tweets, func(i, j int) bool { return !tweets[i].PostedAt.Before(tweets[j].PostedAt) })
|
||||
tweet_map := make(map[scraper.TweetID]scraper.Tweet)
|
||||
for _, t := range tweets {
|
||||
tweet_map[t.ID] = t
|
||||
}
|
||||
|
||||
sort.Slice(retweets, func(i, j int) bool { return !retweets[i].RetweetedAt.Before(retweets[j].RetweetedAt) })
|
||||
users_dict := make(map[scraper.UserID]scraper.User)
|
||||
for _, u := range users {
|
||||
users_dict[u.ID] = u
|
||||
}
|
||||
|
||||
i := 0
|
||||
j := 0
|
||||
for i < len(tweets) && j < len(retweets) {
|
||||
if !tweets[i].PostedAt.Before(retweets[j].RetweetedAt) {
|
||||
tweet := tweets[i]
|
||||
if tweet.User != user.ID {
|
||||
i += 1
|
||||
continue
|
||||
}
|
||||
|
||||
user, ok := users_dict[tweet.User]
|
||||
if !ok {
|
||||
log.Fatalf("User not found: %q", tweet.User)
|
||||
}
|
||||
|
||||
print_tweet(tweets[i], user)
|
||||
i += 1
|
||||
} else {
|
||||
retweet := retweets[j]
|
||||
if retweet.RetweetedBy != user.ID {
|
||||
j += 1
|
||||
continue
|
||||
}
|
||||
tweet, ok := tweet_map[retweet.TweetID]
|
||||
if !ok {
|
||||
log.Fatalf("Tweet not found: %q", retweet.TweetID)
|
||||
}
|
||||
original_poster, ok := users_dict[tweet.User]
|
||||
if !ok {
|
||||
log.Fatalf("User not found: %q", tweet.User)
|
||||
}
|
||||
retweeter, ok := users_dict[retweet.RetweetedBy]
|
||||
if !ok {
|
||||
log.Fatalf("User not found: %q", retweet.RetweetedBy)
|
||||
}
|
||||
print_retweet(retweet, tweet, original_poster, retweeter)
|
||||
j += 1
|
||||
}
|
||||
}
|
||||
for i < len(tweets) {
|
||||
tweet := tweets[i]
|
||||
if tweet.User != user.ID {
|
||||
i += 1
|
||||
continue
|
||||
}
|
||||
|
||||
user, ok := users_dict[tweet.User]
|
||||
if !ok {
|
||||
log.Fatalf("User not found: %q", tweet.User)
|
||||
}
|
||||
|
||||
print_tweet(tweets[i], user)
|
||||
i += 1
|
||||
}
|
||||
for j < len(retweets) {
|
||||
retweet := retweets[j]
|
||||
if retweet.RetweetedBy != user.ID {
|
||||
j += 1
|
||||
continue
|
||||
}
|
||||
tweet, ok := tweet_map[retweet.TweetID]
|
||||
if !ok {
|
||||
log.Fatalf("Tweet not found: %q", retweet.TweetID)
|
||||
}
|
||||
original_poster, ok := users_dict[tweet.User]
|
||||
if !ok {
|
||||
log.Fatalf("User not found: %q", tweet.User)
|
||||
}
|
||||
retweeter, ok := users_dict[retweet.RetweetedBy]
|
||||
if !ok {
|
||||
log.Fatalf("User not found: %q", retweet.RetweetedBy)
|
||||
}
|
||||
print_retweet(retweet, tweet, original_poster, retweeter)
|
||||
j += 1
|
||||
}
|
||||
}
|
||||
|
||||
func print_tweet(tweet scraper.Tweet, user scraper.User) {
|
||||
fmt.Printf("%s => %s\n Replies: %d Retweets: %d Likes: %d\n", user.DisplayName, tweet.Text, tweet.NumReplies, tweet.NumRetweets, tweet.NumLikes)
|
||||
}
|
||||
|
||||
func print_retweet(retweet scraper.Retweet, original_tweet scraper.Tweet, original_poster scraper.User, retweeter scraper.User) {
|
||||
fmt.Printf("%s [retweet] %s => %s\n Replies: %d Retweets: %d Likes: %d\n", retweeter.DisplayName, original_poster.DisplayName, original_tweet.Text, original_tweet.NumReplies, original_tweet.NumRetweets, original_tweet.NumLikes)
|
||||
}
|
25
cmd/fetch_user_profile_to_stdout.go
Normal file
25
cmd/fetch_user_profile_to_stdout.go
Normal file
@ -0,0 +1,25 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"fmt"
|
||||
"offline_twitter/scraper"
|
||||
"log"
|
||||
)
|
||||
|
||||
const INCLUDE_REPLIES = true;
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
log.Fatal("Must provide tweet!")
|
||||
}
|
||||
|
||||
user_handle := os.Args[1]
|
||||
|
||||
user, err := scraper.GetUser(user_handle)
|
||||
if err != nil {
|
||||
log.Fatal(err.Error())
|
||||
}
|
||||
|
||||
fmt.Printf("%v\n", user)
|
||||
}
|
25
cmd/fetch_user_profile_to_stdout/main.go
Normal file
25
cmd/fetch_user_profile_to_stdout/main.go
Normal file
@ -0,0 +1,25 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
"fmt"
|
||||
"offline_twitter/scraper"
|
||||
"log"
|
||||
)
|
||||
|
||||
// const INCLUDE_REPLIES = true;
|
||||
|
||||
func main() {
|
||||
if len(os.Args) < 2 {
|
||||
log.Fatal("Must provide tweet!")
|
||||
}
|
||||
|
||||
user_handle := scraper.UserHandle(os.Args[1])
|
||||
|
||||
user, err := scraper.GetUser(user_handle)
|
||||
if err != nil {
|
||||
log.Fatal(err.Error())
|
||||
}
|
||||
|
||||
fmt.Printf("%v\n", user)
|
||||
}
|
220
scraper/api_request_utils.go
Normal file
220
scraper/api_request_utils.go
Normal file
@ -0,0 +1,220 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
const API_CONVERSATION_BASE_PATH = "https://twitter.com/i/api/2/timeline/conversation/"
|
||||
const API_USER_TIMELINE_BASE_PATH = "https://api.twitter.com/2/timeline/profile/"
|
||||
|
||||
type API struct{}
|
||||
|
||||
func (api API) GetFeedFor(user_id UserID, cursor string) (TweetResponse, error) {
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
req, err := http.NewRequest("GET", API_USER_TIMELINE_BASE_PATH + string(user_id) + ".json", nil)
|
||||
if err != nil {
|
||||
return TweetResponse{}, err
|
||||
}
|
||||
|
||||
err = ApiRequestAddTokens(req)
|
||||
if err != nil {
|
||||
return TweetResponse{}, err
|
||||
}
|
||||
|
||||
ApiRequestAddAllParams(req)
|
||||
|
||||
if cursor != "" {
|
||||
UpdateQueryCursor(req, cursor, false)
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return TweetResponse{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
content, _ := ioutil.ReadAll(resp.Body)
|
||||
return TweetResponse{}, fmt.Errorf("HTTP %s: %s", resp.Status, content)
|
||||
}
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return TweetResponse{}, err
|
||||
}
|
||||
|
||||
var response TweetResponse
|
||||
err = json.Unmarshal(body, &response)
|
||||
return response, err
|
||||
}
|
||||
|
||||
// Resend the request to get more tweets if necessary
|
||||
func (api API) GetMoreTweets(user_id UserID, response *TweetResponse, max_tweets int) error {
|
||||
last_response := response
|
||||
for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_tweets {
|
||||
fresh_response, err := api.GetFeedFor(user_id, last_response.GetCursor())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
last_response = &fresh_response
|
||||
|
||||
// Copy over the tweets and the users
|
||||
for id, tweet := range last_response.GlobalObjects.Tweets {
|
||||
response.GlobalObjects.Tweets[id] = tweet
|
||||
}
|
||||
for id, user := range last_response.GlobalObjects.Users {
|
||||
response.GlobalObjects.Users[id] = user
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
func (api API) GetTweet(id string, cursor string) (TweetResponse, error) {
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
req, err := http.NewRequest("GET", API_CONVERSATION_BASE_PATH + id + ".json", nil)
|
||||
if err != nil {
|
||||
return TweetResponse{}, err
|
||||
}
|
||||
|
||||
err = ApiRequestAddTokens(req)
|
||||
if err != nil {
|
||||
return TweetResponse{}, err
|
||||
}
|
||||
|
||||
ApiRequestAddAllParams(req)
|
||||
if cursor != "" {
|
||||
UpdateQueryCursor(req, cursor, true)
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return TweetResponse{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if !(resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusForbidden) {
|
||||
content, _ := ioutil.ReadAll(resp.Body)
|
||||
return TweetResponse{}, fmt.Errorf("HTTP %d %s: %s", resp.StatusCode, resp.Status, content)
|
||||
}
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return TweetResponse{}, err
|
||||
}
|
||||
|
||||
var response TweetResponse
|
||||
err = json.Unmarshal(body, &response)
|
||||
return response, err
|
||||
}
|
||||
|
||||
// Resend the request to get more replies if necessary
|
||||
func (api API) GetMoreReplies(tweet_id string, response *TweetResponse, max_replies int) error {
|
||||
last_response := response
|
||||
for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_replies {
|
||||
fresh_response, err := api.GetTweet(tweet_id, last_response.GetCursor())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
last_response = &fresh_response
|
||||
|
||||
// Copy over the tweets and the users
|
||||
for id, tweet := range last_response.GlobalObjects.Tweets {
|
||||
response.GlobalObjects.Tweets[id] = tweet
|
||||
}
|
||||
for id, user := range last_response.GlobalObjects.Users {
|
||||
response.GlobalObjects.Users[id] = user
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func UpdateQueryCursor(req *http.Request, new_cursor string, is_tweet bool) {
|
||||
query := req.URL.Query()
|
||||
query.Add("cursor", new_cursor)
|
||||
if is_tweet {
|
||||
query.Add("referrer", "tweet")
|
||||
}
|
||||
req.URL.RawQuery = query.Encode()
|
||||
}
|
||||
|
||||
|
||||
func (api API) GetUser(handle UserHandle) (APIUser, error) {
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
req, err := http.NewRequest("GET", "https://api.twitter.com/graphql/4S2ihIKfF3xhp-ENxvUAfQ/UserByScreenName?variables=%7B%22screen_name%22%3A%22" + string(handle) + "%22%2C%22withHighlightedLabel%22%3Atrue%7D", nil)
|
||||
if err != nil {
|
||||
return APIUser{}, err
|
||||
}
|
||||
err = ApiRequestAddTokens(req)
|
||||
if err != nil {
|
||||
return APIUser{}, err
|
||||
}
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return APIUser{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if !(resp.StatusCode == http.StatusOK || resp.StatusCode == http.StatusForbidden) {
|
||||
content, _ := ioutil.ReadAll(resp.Body)
|
||||
return APIUser{}, fmt.Errorf("response status %s: %s", resp.Status, content)
|
||||
}
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return APIUser{}, err
|
||||
}
|
||||
|
||||
var response UserResponse
|
||||
err = json.Unmarshal(body, &response)
|
||||
return response.ConvertToAPIUser(), err
|
||||
}
|
||||
|
||||
// Add Bearer token and guest token
|
||||
func ApiRequestAddTokens(req *http.Request) error {
|
||||
req.Header.Set("Authorization", "Bearer " + BEARER_TOKEN)
|
||||
|
||||
guestToken, err := GetGuestToken()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
req.Header.Set("X-Guest-Token", guestToken)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Add the query params to get all data
|
||||
func ApiRequestAddAllParams(req *http.Request) {
|
||||
query := req.URL.Query()
|
||||
query.Add("include_profile_interstitial_type", "1")
|
||||
query.Add("include_blocking", "1")
|
||||
query.Add("include_blocked_by", "1")
|
||||
query.Add("include_followed_by", "1")
|
||||
query.Add("include_want_retweets", "1")
|
||||
query.Add("include_mute_edge", "1")
|
||||
query.Add("include_can_dm", "1")
|
||||
query.Add("include_can_media_tag", "1")
|
||||
query.Add("skip_status", "1")
|
||||
query.Add("cards_platform", "Web-12")
|
||||
query.Add("include_cards", "1")
|
||||
query.Add("include_ext_alt_text", "true")
|
||||
query.Add("include_quote_count", "true")
|
||||
query.Add("include_reply_count", "1")
|
||||
query.Add("tweet_mode", "extended")
|
||||
query.Add("include_entities", "true")
|
||||
query.Add("include_user_entities", "true")
|
||||
query.Add("include_ext_media_color", "true")
|
||||
query.Add("include_ext_media_availability", "true")
|
||||
query.Add("send_error_codes", "true")
|
||||
query.Add("simple_quoted_tweet", "true")
|
||||
query.Add("include_tweet_replies", "true")
|
||||
query.Add("ext", "mediaStats,highlightedLabel")
|
||||
query.Add("count", "20")
|
||||
req.URL.RawQuery = query.Encode()
|
||||
}
|
@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
)
|
||||
|
||||
const DEFAULT_MAX_REPLIES_EAGER_LOAD = 50
|
||||
|
||||
type TweetID string
|
||||
|
||||
@ -75,6 +76,43 @@ func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
||||
}
|
||||
|
||||
|
||||
// Return a single tweet, nothing else
|
||||
func GetTweet(id string) (Tweet, error) {
|
||||
api := API{}
|
||||
tweet_response, err := api.GetTweet(id, "")
|
||||
if err != nil {
|
||||
return Tweet{}, err
|
||||
}
|
||||
|
||||
single_tweet, ok := tweet_response.GlobalObjects.Tweets[id]
|
||||
|
||||
if !ok {
|
||||
return Tweet{}, fmt.Errorf("Didn't get the tweet!\n%v", tweet_response)
|
||||
}
|
||||
|
||||
return ParseSingleTweet(single_tweet)
|
||||
}
|
||||
|
||||
|
||||
// Return a list of tweets, including the original and the rest of its thread,
|
||||
// along with a list of associated users
|
||||
func GetTweetFull(id string) (tweets []Tweet, retweets []Retweet, users []User, err error) {
|
||||
api := API{}
|
||||
tweet_response, err := api.GetTweet(id, "")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
if len(tweet_response.GlobalObjects.Tweets) < DEFAULT_MAX_REPLIES_EAGER_LOAD &&
|
||||
tweet_response.GetCursor() != "" {
|
||||
err = api.GetMoreReplies(id, &tweet_response, DEFAULT_MAX_REPLIES_EAGER_LOAD)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return ParseTweetResponse(tweet_response)
|
||||
}
|
||||
|
||||
func ParseTweetResponse(resp TweetResponse) (tweets []Tweet, retweets []Retweet, users []User, err error) {
|
||||
var new_tweet Tweet
|
||||
var new_retweet Retweet
|
||||
|
@ -8,6 +8,14 @@ import (
|
||||
type UserID string
|
||||
type UserHandle string
|
||||
|
||||
func UIDArrayToStrArray(uids []UserID) []string {
|
||||
ret := []string{}
|
||||
for _, uid := range uids {
|
||||
ret = append(ret, string(uid))
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
type User struct {
|
||||
ID UserID
|
||||
DisplayName string
|
||||
@ -54,3 +62,13 @@ func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Calls API#GetUser and returns the parsed result
|
||||
func GetUser(handle UserHandle) (User, error) {
|
||||
api := API{}
|
||||
apiUser, err := api.GetUser(handle)
|
||||
if err != nil {
|
||||
return User{}, err
|
||||
}
|
||||
return ParseSingleUser(apiUser)
|
||||
}
|
||||
|
22
scraper/user_feed.go
Normal file
22
scraper/user_feed.go
Normal file
@ -0,0 +1,22 @@
|
||||
package scraper
|
||||
|
||||
|
||||
// Return a list of tweets, including the original and the rest of its thread,
|
||||
// along with a list of associated users
|
||||
func GetFeedFull(user_id UserID, max_tweets int) (tweets []Tweet, retweets []Retweet, users []User, err error) {
|
||||
api := API{}
|
||||
tweet_response, err := api.GetFeedFor(user_id, "")
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if len(tweet_response.GlobalObjects.Tweets) < max_tweets &&
|
||||
tweet_response.GetCursor() != "" {
|
||||
err = api.GetMoreTweets(user_id, &tweet_response, max_tweets)
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
return ParseTweetResponse(tweet_response)
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user