Add search
This commit is contained in:
parent
470dce1d27
commit
6d7d940df4
@ -181,6 +181,10 @@ test $(sqlite3 twitter.db "select is_stub from tweets where id = 145452142414465
|
||||
test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1
|
||||
|
||||
|
||||
# Test search
|
||||
tw search "from:michaelmalice constitution"
|
||||
test $(sqlite3 twitter.db "select count(*) from tweets where user_id = 44067298 and text like '%constitution%'") -gt "30" # Not sure exactly how many
|
||||
|
||||
# TODO: Maybe this file should be broken up into multiple test scripts
|
||||
|
||||
echo -e "\033[32mAll tests passed. Finished successfully.\033[0m"
|
||||
|
@ -46,6 +46,9 @@ This application downloads tweets from twitter and saves them in a SQLite databa
|
||||
<TARGET> is the user handle.
|
||||
Gets the most recent ~50 tweets.
|
||||
If "get_user_tweets_all" is used, gets up to ~3200 tweets (API limit).
|
||||
|
||||
search
|
||||
<TARGET> is the search query. Should be wrapped in quotes if it has spaces.
|
||||
`
|
||||
|
||||
|
||||
|
@ -61,6 +61,8 @@ func main() {
|
||||
fetch_user_feed(target, 999999999)
|
||||
case "download_tweet_content":
|
||||
download_tweet_content(target)
|
||||
case "search":
|
||||
search(target)
|
||||
default:
|
||||
die("Invalid operation: " + operation, true, 3)
|
||||
}
|
||||
@ -238,7 +240,7 @@ func download_tweet_content(tweet_identifier string) {
|
||||
|
||||
tweet, err := profile.GetTweetById(tweet_id)
|
||||
if err != nil {
|
||||
panic("Couldn't get tweet from database: " + err.Error())
|
||||
panic(fmt.Sprintf("Couldn't get tweet (ID %d) from database: %s", tweet_id, err.Error()))
|
||||
}
|
||||
err = profile.DownloadTweetContentFor(&tweet)
|
||||
if err != nil {
|
||||
@ -256,3 +258,36 @@ func download_user_content(handle scraper.UserHandle) {
|
||||
panic("Error getting content: " + err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
func search(query string) {
|
||||
tweets, retweets, users, err := scraper.Search(query, 1000);
|
||||
if err != nil {
|
||||
die("Error scraping search results: " + err.Error(), false, -100)
|
||||
}
|
||||
|
||||
for _, u := range users {
|
||||
err = profile.SaveUser(u)
|
||||
if err != nil {
|
||||
die("Error saving user: " + err.Error(), false, 4)
|
||||
}
|
||||
err = profile.DownloadUserContentFor(&u)
|
||||
if err != nil {
|
||||
die("Error getting user content: " + err.Error(), false, 10)
|
||||
}
|
||||
}
|
||||
|
||||
for _, t := range tweets {
|
||||
// fmt.Println(t)
|
||||
err = profile.SaveTweet(t)
|
||||
if err != nil {
|
||||
die("Error saving tweet: " + err.Error(), false, 4)
|
||||
}
|
||||
err = profile.DownloadTweetContentFor(&t)
|
||||
if err != nil {
|
||||
die("Error getting tweet content: " + err.Error(), false, 11)
|
||||
}
|
||||
}
|
||||
|
||||
fmt.Printf("Saved %d tweets, %d retweets and %d users. Exiting successfully\n", len(tweets), len(retweets), len(users))
|
||||
}
|
||||
|
@ -26,3 +26,9 @@ curl \
|
||||
-H "Authorization: Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" \
|
||||
-H "X-Guest-Token: 1396177150890348547" \
|
||||
https://twitter.com/i/api/2/timeline/conversation/1395881699142160387.json
|
||||
|
||||
|
||||
curl \
|
||||
-H "Authorization: Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" \
|
||||
-H "X-Guest-Token: 1449946080792104970" \
|
||||
"https://twitter.com/i/api/2/search/adaptive.json?count=50&spelling_corrections=1&query_source=typed_query&pc=1&q=potatoes"
|
||||
|
@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"time"
|
||||
)
|
||||
|
||||
@ -217,6 +218,75 @@ func (api API) GetUser(handle UserHandle) (APIUser, error) {
|
||||
return response.ConvertToAPIUser(), err
|
||||
}
|
||||
|
||||
func (api API) Search(query string, cursor string) (TweetResponse, error) {
|
||||
client := &http.Client{Timeout: 10 * time.Second}
|
||||
req, err := http.NewRequest("GET", "https://twitter.com/i/api/2/search/adaptive.json?count=50&spelling_corrections=1&query_source=typed_query&pc=1&q=" + url.QueryEscape(query), nil)
|
||||
if err != nil {
|
||||
return TweetResponse{}, err
|
||||
}
|
||||
|
||||
err = ApiRequestAddTokens(req)
|
||||
if err != nil {
|
||||
return TweetResponse{}, err
|
||||
}
|
||||
|
||||
ApiRequestAddAllParams(req)
|
||||
if cursor != "" {
|
||||
UpdateQueryCursor(req, cursor, false)
|
||||
}
|
||||
|
||||
fmt.Println(req.URL.String())
|
||||
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
return TweetResponse{}, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
content, _ := ioutil.ReadAll(resp.Body)
|
||||
return TweetResponse{}, fmt.Errorf("Error while searching for %q. HTTP %s: %s", req.URL, resp.Status, content)
|
||||
}
|
||||
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return TweetResponse{}, err
|
||||
}
|
||||
// fmt.Println(string(body))
|
||||
|
||||
var response TweetResponse
|
||||
err = json.Unmarshal(body, &response)
|
||||
return response, err
|
||||
}
|
||||
|
||||
func (api API) GetMoreTweetsFromSearch(query string, response *TweetResponse, max_results int) error {
|
||||
last_response := response
|
||||
for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_results {
|
||||
fresh_response, err := api.Search(query, last_response.GetCursor())
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if fresh_response.GetCursor() == last_response.GetCursor() && len(fresh_response.GlobalObjects.Tweets) == 0 {
|
||||
// Empty response, cursor same as previous: end of feed has been reached
|
||||
return END_OF_FEED
|
||||
}
|
||||
|
||||
last_response = &fresh_response
|
||||
|
||||
// Copy the results over
|
||||
for id, tweet := range last_response.GlobalObjects.Tweets {
|
||||
response.GlobalObjects.Tweets[id] = tweet
|
||||
}
|
||||
for id, user := range last_response.GlobalObjects.Users {
|
||||
response.GlobalObjects.Users[id] = user
|
||||
}
|
||||
fmt.Printf("Have %d tweets\n", len(response.GlobalObjects.Tweets))
|
||||
// fmt.Printf("Cursor: %s\n", last_response.GetCursor())
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
// Add Bearer token and guest token
|
||||
func ApiRequestAddTokens(req *http.Request) error {
|
||||
req.Header.Set("Authorization", "Bearer " + BEARER_TOKEN)
|
||||
|
@ -308,9 +308,18 @@ func (t *TweetResponse) HandleTombstones() []string {
|
||||
|
||||
func (t *TweetResponse) GetCursor() string {
|
||||
entries := t.Timeline.Instructions[0].AddEntries.Entries
|
||||
last_entry := entries[len(entries) - 1]
|
||||
if strings.Contains(last_entry.EntryID, "cursor") {
|
||||
return last_entry.Content.Operation.Cursor.Value
|
||||
if len(entries) > 0 {
|
||||
last_entry := entries[len(entries) - 1]
|
||||
if strings.Contains(last_entry.EntryID, "cursor") {
|
||||
return last_entry.Content.Operation.Cursor.Value
|
||||
}
|
||||
}
|
||||
|
||||
// Next, try the other format ("replaceEntry")
|
||||
instructions := t.Timeline.Instructions
|
||||
last_replace_entry := instructions[len(instructions) - 1].ReplaceEntry.Entry
|
||||
if strings.Contains(last_replace_entry.EntryID, "cursor") {
|
||||
return last_replace_entry.Content.Operation.Cursor.Value
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user