Add search
This commit is contained in:
parent
470dce1d27
commit
6d7d940df4
@ -181,6 +181,10 @@ test $(sqlite3 twitter.db "select is_stub from tweets where id = 145452142414465
|
|||||||
test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1
|
test $(sqlite3 twitter.db "select is_stub from tweets where id = 1454522147750260742") = 1
|
||||||
|
|
||||||
|
|
||||||
|
# Test search
|
||||||
|
tw search "from:michaelmalice constitution"
|
||||||
|
test $(sqlite3 twitter.db "select count(*) from tweets where user_id = 44067298 and text like '%constitution%'") -gt "30" # Not sure exactly how many
|
||||||
|
|
||||||
# TODO: Maybe this file should be broken up into multiple test scripts
|
# TODO: Maybe this file should be broken up into multiple test scripts
|
||||||
|
|
||||||
echo -e "\033[32mAll tests passed. Finished successfully.\033[0m"
|
echo -e "\033[32mAll tests passed. Finished successfully.\033[0m"
|
||||||
|
@ -46,6 +46,9 @@ This application downloads tweets from twitter and saves them in a SQLite databa
|
|||||||
<TARGET> is the user handle.
|
<TARGET> is the user handle.
|
||||||
Gets the most recent ~50 tweets.
|
Gets the most recent ~50 tweets.
|
||||||
If "get_user_tweets_all" is used, gets up to ~3200 tweets (API limit).
|
If "get_user_tweets_all" is used, gets up to ~3200 tweets (API limit).
|
||||||
|
|
||||||
|
search
|
||||||
|
<TARGET> is the search query. Should be wrapped in quotes if it has spaces.
|
||||||
`
|
`
|
||||||
|
|
||||||
|
|
||||||
|
@ -61,6 +61,8 @@ func main() {
|
|||||||
fetch_user_feed(target, 999999999)
|
fetch_user_feed(target, 999999999)
|
||||||
case "download_tweet_content":
|
case "download_tweet_content":
|
||||||
download_tweet_content(target)
|
download_tweet_content(target)
|
||||||
|
case "search":
|
||||||
|
search(target)
|
||||||
default:
|
default:
|
||||||
die("Invalid operation: " + operation, true, 3)
|
die("Invalid operation: " + operation, true, 3)
|
||||||
}
|
}
|
||||||
@ -238,7 +240,7 @@ func download_tweet_content(tweet_identifier string) {
|
|||||||
|
|
||||||
tweet, err := profile.GetTweetById(tweet_id)
|
tweet, err := profile.GetTweetById(tweet_id)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic("Couldn't get tweet from database: " + err.Error())
|
panic(fmt.Sprintf("Couldn't get tweet (ID %d) from database: %s", tweet_id, err.Error()))
|
||||||
}
|
}
|
||||||
err = profile.DownloadTweetContentFor(&tweet)
|
err = profile.DownloadTweetContentFor(&tweet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -256,3 +258,36 @@ func download_user_content(handle scraper.UserHandle) {
|
|||||||
panic("Error getting content: " + err.Error())
|
panic("Error getting content: " + err.Error())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
func search(query string) {
|
||||||
|
tweets, retweets, users, err := scraper.Search(query, 1000);
|
||||||
|
if err != nil {
|
||||||
|
die("Error scraping search results: " + err.Error(), false, -100)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, u := range users {
|
||||||
|
err = profile.SaveUser(u)
|
||||||
|
if err != nil {
|
||||||
|
die("Error saving user: " + err.Error(), false, 4)
|
||||||
|
}
|
||||||
|
err = profile.DownloadUserContentFor(&u)
|
||||||
|
if err != nil {
|
||||||
|
die("Error getting user content: " + err.Error(), false, 10)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, t := range tweets {
|
||||||
|
// fmt.Println(t)
|
||||||
|
err = profile.SaveTweet(t)
|
||||||
|
if err != nil {
|
||||||
|
die("Error saving tweet: " + err.Error(), false, 4)
|
||||||
|
}
|
||||||
|
err = profile.DownloadTweetContentFor(&t)
|
||||||
|
if err != nil {
|
||||||
|
die("Error getting tweet content: " + err.Error(), false, 11)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Printf("Saved %d tweets, %d retweets and %d users. Exiting successfully\n", len(tweets), len(retweets), len(users))
|
||||||
|
}
|
||||||
|
@ -26,3 +26,9 @@ curl \
|
|||||||
-H "Authorization: Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" \
|
-H "Authorization: Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" \
|
||||||
-H "X-Guest-Token: 1396177150890348547" \
|
-H "X-Guest-Token: 1396177150890348547" \
|
||||||
https://twitter.com/i/api/2/timeline/conversation/1395881699142160387.json
|
https://twitter.com/i/api/2/timeline/conversation/1395881699142160387.json
|
||||||
|
|
||||||
|
|
||||||
|
curl \
|
||||||
|
-H "Authorization: Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA" \
|
||||||
|
-H "X-Guest-Token: 1449946080792104970" \
|
||||||
|
"https://twitter.com/i/api/2/search/adaptive.json?count=50&spelling_corrections=1&query_source=typed_query&pc=1&q=potatoes"
|
||||||
|
@ -5,6 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io/ioutil"
|
"io/ioutil"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/url"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -217,6 +218,75 @@ func (api API) GetUser(handle UserHandle) (APIUser, error) {
|
|||||||
return response.ConvertToAPIUser(), err
|
return response.ConvertToAPIUser(), err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (api API) Search(query string, cursor string) (TweetResponse, error) {
|
||||||
|
client := &http.Client{Timeout: 10 * time.Second}
|
||||||
|
req, err := http.NewRequest("GET", "https://twitter.com/i/api/2/search/adaptive.json?count=50&spelling_corrections=1&query_source=typed_query&pc=1&q=" + url.QueryEscape(query), nil)
|
||||||
|
if err != nil {
|
||||||
|
return TweetResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
err = ApiRequestAddTokens(req)
|
||||||
|
if err != nil {
|
||||||
|
return TweetResponse{}, err
|
||||||
|
}
|
||||||
|
|
||||||
|
ApiRequestAddAllParams(req)
|
||||||
|
if cursor != "" {
|
||||||
|
UpdateQueryCursor(req, cursor, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println(req.URL.String())
|
||||||
|
|
||||||
|
resp, err := client.Do(req)
|
||||||
|
if err != nil {
|
||||||
|
return TweetResponse{}, err
|
||||||
|
}
|
||||||
|
defer resp.Body.Close()
|
||||||
|
|
||||||
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
content, _ := ioutil.ReadAll(resp.Body)
|
||||||
|
return TweetResponse{}, fmt.Errorf("Error while searching for %q. HTTP %s: %s", req.URL, resp.Status, content)
|
||||||
|
}
|
||||||
|
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
if err != nil {
|
||||||
|
return TweetResponse{}, err
|
||||||
|
}
|
||||||
|
// fmt.Println(string(body))
|
||||||
|
|
||||||
|
var response TweetResponse
|
||||||
|
err = json.Unmarshal(body, &response)
|
||||||
|
return response, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func (api API) GetMoreTweetsFromSearch(query string, response *TweetResponse, max_results int) error {
|
||||||
|
last_response := response
|
||||||
|
for last_response.GetCursor() != "" && len(response.GlobalObjects.Tweets) < max_results {
|
||||||
|
fresh_response, err := api.Search(query, last_response.GetCursor())
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if fresh_response.GetCursor() == last_response.GetCursor() && len(fresh_response.GlobalObjects.Tweets) == 0 {
|
||||||
|
// Empty response, cursor same as previous: end of feed has been reached
|
||||||
|
return END_OF_FEED
|
||||||
|
}
|
||||||
|
|
||||||
|
last_response = &fresh_response
|
||||||
|
|
||||||
|
// Copy the results over
|
||||||
|
for id, tweet := range last_response.GlobalObjects.Tweets {
|
||||||
|
response.GlobalObjects.Tweets[id] = tweet
|
||||||
|
}
|
||||||
|
for id, user := range last_response.GlobalObjects.Users {
|
||||||
|
response.GlobalObjects.Users[id] = user
|
||||||
|
}
|
||||||
|
fmt.Printf("Have %d tweets\n", len(response.GlobalObjects.Tweets))
|
||||||
|
// fmt.Printf("Cursor: %s\n", last_response.GetCursor())
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// Add Bearer token and guest token
|
// Add Bearer token and guest token
|
||||||
func ApiRequestAddTokens(req *http.Request) error {
|
func ApiRequestAddTokens(req *http.Request) error {
|
||||||
req.Header.Set("Authorization", "Bearer " + BEARER_TOKEN)
|
req.Header.Set("Authorization", "Bearer " + BEARER_TOKEN)
|
||||||
|
@ -308,9 +308,18 @@ func (t *TweetResponse) HandleTombstones() []string {
|
|||||||
|
|
||||||
func (t *TweetResponse) GetCursor() string {
|
func (t *TweetResponse) GetCursor() string {
|
||||||
entries := t.Timeline.Instructions[0].AddEntries.Entries
|
entries := t.Timeline.Instructions[0].AddEntries.Entries
|
||||||
last_entry := entries[len(entries) - 1]
|
if len(entries) > 0 {
|
||||||
if strings.Contains(last_entry.EntryID, "cursor") {
|
last_entry := entries[len(entries) - 1]
|
||||||
return last_entry.Content.Operation.Cursor.Value
|
if strings.Contains(last_entry.EntryID, "cursor") {
|
||||||
|
return last_entry.Content.Operation.Cursor.Value
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next, try the other format ("replaceEntry")
|
||||||
|
instructions := t.Timeline.Instructions
|
||||||
|
last_replace_entry := instructions[len(instructions) - 1].ReplaceEntry.Entry
|
||||||
|
if strings.Contains(last_replace_entry.EntryID, "cursor") {
|
||||||
|
return last_replace_entry.Content.Operation.Cursor.Value
|
||||||
}
|
}
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user