Add search

This commit is contained in:
Alessio 2021-11-06 14:50:39 -07:00
parent 470dce1d27
commit 6d7d940df4
6 changed files with 131 additions and 4 deletions

View File

@ -181,6 +181,10 @@ test $(sqlite3 twitter.db "select is_stub from tweets where id = 145452142414465
test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1 test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1
# Test search
tw search "from:michaelmalice constitution"
test $(sqlite3 twitter.db "select count(*) from tweets where user_id = 44067298 and text like '%constitution%'") -gt "30" # Not sure exactly how many
# TODO: Maybe this file should be broken up into multiple test scripts # TODO: Maybe this file should be broken up into multiple test scripts
echo -e "\033[32mAll tests passed. Finished successfully.\033[0m" echo -e "\033[32mAll tests passed. Finished successfully.\033[0m"

View File

@ -46,6 +46,9 @@ This application downloads tweets from twitter and saves them in a SQLite databa
<TARGET> is the user handle. <TARGET> is the user handle.
Gets the most recent ~50 tweets. Gets the most recent ~50 tweets.
If "get_user_tweets_all" is used, gets up to ~3200 tweets (API limit). If "get_user_tweets_all" is used, gets up to ~3200 tweets (API limit).
search
<TARGET> is the search query. Should be wrapped in quotes if it has spaces.
` `

View File

@ -61,6 +61,8 @@ func main() {
fetch_user_feed(target, 999999999) fetch_user_feed(target, 999999999)
case "download_tweet_content": case "download_tweet_content":
download_tweet_content(target) download_tweet_content(target)
case "search":
search(target)
default: default:
die("Invalid operation: " + operation, true, 3) die("Invalid operation: " + operation, true, 3)
} }
@ -238,7 +240,7 @@ func download_tweet_content(tweet_identifier string) {
tweet, err := profile.GetTweetById(tweet_id) tweet, err := profile.GetTweetById(tweet_id)
if err != nil { if err != nil {
panic("Couldn't get tweet from database: " + err.Error()) panic(fmt.Sprintf("Couldn't get tweet (ID %d) from database: %s", tweet_id, err.Error()))
} }
err = profile.DownloadTweetContentFor(&tweet) err = profile.DownloadTweetContentFor(&tweet)
if err != nil { if err != nil {
@ -256,3 +258,36 @@ func download_user_content(handle scraper.UserHandle) {
panic("Error getting content: " + err.Error()) panic("Error getting content: " + err.Error())
} }
} }
func search(query string) {
tweets, retweets, users, err := scraper.Search(query, 1000);
if err != nil {
die("Error scraping search results: " + err.Error(), false, -100)
}
for _, u := range users {
err = profile.SaveUser(u)
if err != nil {
die("Error saving user: " + err.Error(), false, 4)
}
err = profile.DownloadUserContentFor(&u)
if err != nil {
die("Error getting user content: " + err.Error(), false, 10)
}
}
for _, t := range tweets {
// fmt.Println(t)
err = profile.SaveTweet(t)
if err != nil {
die("Error saving tweet: " + err.Error(), false, 4)
}
err = profile.DownloadTweetContentFor(&t)
if err != nil {
die("Error getting tweet content: " + err.Error(), false, 11)
}
}
fmt.Printf("Saved %d tweets, %d retweets and %d users. Exiting successfully\n", len(tweets), len(retweets), len(users))
}

View File

@ -26,3 +26,9 @@ curl \
-H "Authorization: Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" \ -H "Authorization: Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" \
-H "X-Guest-Token: 1396177150890348547" \ -H "X-Guest-Token: 1396177150890348547" \
https://twitter.com/i/api/2/timeline/conversation/1395881699142160387.json https://twitter.com/i/api/2/timeline/conversation/1395881699142160387.json
curl \
-H "Authorization: Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" \
-H "X-Guest-Token: 1449946080792104970" \
"https://twitter.com/i/api/2/search/adaptive.json?count=50&spelling_corrections=1&query_source=typed_query&pc=1&q=potatoes"

View File

@ -5,6 +5,7 @@ import (
"fmt" "fmt"
"io/ioutil" "io/ioutil"
"net/http" "net/http"
"net/url"
"time" "time"
) )
@ -217,6 +218,75 @@ func (api API) GetUser(handle UserHandle) (APIUser, error) {
return response.ConvertToAPIUser(), err return response.ConvertToAPIUser(), err
} }
func (api API) Search(query string, cursor string) (TweetResponse, error) {
client := &http.Client{Timeout: 10 * time.Second}
req, err := http.NewRequest("GET", "https://twitter.com/i/api/2/search/adaptive.json?count=50&spelling_corrections=1&query_source=typed_query&pc=1&q=" + url.QueryEscape(query), nil)
if err != nil {
return TweetResponse{}, err
}
err = ApiRequestAddTokens(req)
if err != nil {
return TweetResponse{}, err
}
ApiRequestAddAllParams(req)
if cursor != "" {
UpdateQueryCursor(req, cursor, false)
}
fmt.Println(req.URL.String())
resp, err := client.Do(req)
if err != nil {
return TweetResponse{}, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
content, _ := ioutil.ReadAll(resp.Body)
return TweetResponse{}, fmt.Errorf("Error while searching for %q. HTTP %s: %s", req.URL, resp.Status, content)
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return TweetResponse{}, err
}
// fmt.Println(string(body))
var response TweetResponse
err = json.Unmarshal(body, &response)
return response, err
}
func (api API) GetMoreTweetsFromSearch(query string, response *TweetResponse, max_results int) error {
last_response := response
for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_results {
fresh_response, err := api.Search(query, last_response.GetCursor())
if err != nil {
return err
}
if fresh_response.GetCursor() == last_response.GetCursor() && len(fresh_response.GlobalObjects.Tweets) == 0 {
// Empty response, cursor same as previous: end of feed has been reached
return END_OF_FEED
}
last_response = &fresh_response
// Copy the results over
for id, tweet := range last_response.GlobalObjects.Tweets {
response.GlobalObjects.Tweets[id] = tweet
}
for id, user := range last_response.GlobalObjects.Users {
response.GlobalObjects.Users[id] = user
}
fmt.Printf("Have %d tweets\n", len(response.GlobalObjects.Tweets))
// fmt.Printf("Cursor: %s\n", last_response.GetCursor())
}
return nil
}
// Add Bearer token and guest token // Add Bearer token and guest token
func ApiRequestAddTokens(req *http.Request) error { func ApiRequestAddTokens(req *http.Request) error {
req.Header.Set("Authorization", "Bearer " + BEARER_TOKEN) req.Header.Set("Authorization", "Bearer " + BEARER_TOKEN)

View File

@ -308,9 +308,18 @@ func (t *TweetResponse) HandleTombstones() []string {
func (t *TweetResponse) GetCursor() string { func (t *TweetResponse) GetCursor() string {
entries := t.Timeline.Instructions[0].AddEntries.Entries entries := t.Timeline.Instructions[0].AddEntries.Entries
last_entry := entries[len(entries) - 1] if len(entries) > 0 {
if strings.Contains(last_entry.EntryID, "cursor") { last_entry := entries[len(entries) - 1]
return last_entry.Content.Operation.Cursor.Value if strings.Contains(last_entry.EntryID, "cursor") {
return last_entry.Content.Operation.Cursor.Value
}
}
// Next, try the other format ("replaceEntry")
instructions := t.Timeline.Instructions
last_replace_entry := instructions[len(instructions) - 1].ReplaceEntry.Entry
if strings.Contains(last_replace_entry.EntryID, "cursor") {
return last_replace_entry.Content.Operation.Cursor.Value
} }
return "" return ""
} }