REFACTOR: move as much API code out of the types files and into api_types_...
files as possible
- also remove a few useless functions
This commit is contained in:
parent
1f44fb0961
commit
850662c3cb
@ -4,6 +4,10 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"html"
|
"html"
|
||||||
|
"log"
|
||||||
|
"net/url"
|
||||||
|
"path"
|
||||||
|
"regexp"
|
||||||
"sort"
|
"sort"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@ -21,6 +25,19 @@ type APIMedia struct {
|
|||||||
} `json:"original_info"`
|
} `json:"original_info"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ParseAPIMedia(apiMedia APIMedia) Image {
|
||||||
|
local_filename := get_prefixed_path(path.Base(apiMedia.MediaURLHttps))
|
||||||
|
|
||||||
|
return Image{
|
||||||
|
ID: ImageID(apiMedia.ID),
|
||||||
|
RemoteURL: apiMedia.MediaURLHttps,
|
||||||
|
Width: apiMedia.OriginalInfo.Width,
|
||||||
|
Height: apiMedia.OriginalInfo.Height,
|
||||||
|
LocalFilename: local_filename,
|
||||||
|
IsDownloaded: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type SortableVariants []struct {
|
type SortableVariants []struct {
|
||||||
Bitrate int `json:"bitrate,omitempty"`
|
Bitrate int `json:"bitrate,omitempty"`
|
||||||
URL string `json:"url"`
|
URL string `json:"url"`
|
||||||
@ -137,6 +154,164 @@ type APICard struct {
|
|||||||
} `json:"binding_values"`
|
} `json:"binding_values"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ParseAPIPoll(apiCard APICard) Poll {
|
||||||
|
card_url, err := url.Parse(apiCard.ShortenedUrl)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
id := int_or_panic(card_url.Hostname())
|
||||||
|
|
||||||
|
ret := Poll{}
|
||||||
|
ret.ID = PollID(id)
|
||||||
|
ret.NumChoices = parse_num_choices(apiCard.Name)
|
||||||
|
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
|
||||||
|
ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
|
||||||
|
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)
|
||||||
|
ret.Choice2 = apiCard.BindingValues.Choice2.StringValue
|
||||||
|
ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue)
|
||||||
|
|
||||||
|
if ret.NumChoices > 2 {
|
||||||
|
ret.Choice3 = apiCard.BindingValues.Choice3.StringValue
|
||||||
|
ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue)
|
||||||
|
}
|
||||||
|
if ret.NumChoices > 3 {
|
||||||
|
ret.Choice4 = apiCard.BindingValues.Choice4.StringValue
|
||||||
|
ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue)
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func parse_num_choices(card_name string) int {
|
||||||
|
if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 {
|
||||||
|
panic("Not valid card name: " + card_name)
|
||||||
|
}
|
||||||
|
|
||||||
|
return int_or_panic(card_name[4:5])
|
||||||
|
}
|
||||||
|
|
||||||
|
func ParseAPIVideo(apiVideo APIExtendedMedia) Video {
|
||||||
|
variants := apiVideo.VideoInfo.Variants
|
||||||
|
sort.Sort(variants)
|
||||||
|
video_remote_url := variants[0].URL
|
||||||
|
|
||||||
|
var view_count int
|
||||||
|
|
||||||
|
r := apiVideo.Ext.MediaStats.R
|
||||||
|
|
||||||
|
switch r.(type) {
|
||||||
|
case string:
|
||||||
|
view_count = 0
|
||||||
|
case map[string]interface{}:
|
||||||
|
OK_entry, ok := r.(map[string]interface{})["ok"]
|
||||||
|
if !ok {
|
||||||
|
panic("No 'ok' value found in the R!")
|
||||||
|
}
|
||||||
|
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
|
||||||
|
view_count = int_or_panic(view_count_str.(string))
|
||||||
|
if !ok {
|
||||||
|
panic("No 'viewCount' value found in the OK!")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
video_parsed_url, err := url.Parse(video_remote_url)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
local_filename := get_prefixed_path(path.Base(video_parsed_url.Path))
|
||||||
|
|
||||||
|
return Video{
|
||||||
|
ID: VideoID(apiVideo.ID),
|
||||||
|
Width: apiVideo.OriginalInfo.Width,
|
||||||
|
Height: apiVideo.OriginalInfo.Height,
|
||||||
|
RemoteURL: video_remote_url,
|
||||||
|
LocalFilename: local_filename,
|
||||||
|
|
||||||
|
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
|
||||||
|
ThumbnailLocalPath: get_prefixed_path(path.Base(apiVideo.MediaURLHttps)),
|
||||||
|
Duration: apiVideo.VideoInfo.Duration,
|
||||||
|
ViewCount: view_count,
|
||||||
|
|
||||||
|
IsDownloaded: false,
|
||||||
|
IsBlockedByDMCA: false,
|
||||||
|
IsGeoblocked: apiVideo.ExtMediaAvailability.Reason == "Geoblocked",
|
||||||
|
IsGif: apiVideo.Type == "animated_gif",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func ParseAPIUrlCard(apiCard APICard) Url {
|
||||||
|
values := apiCard.BindingValues
|
||||||
|
ret := Url{}
|
||||||
|
ret.HasCard = true
|
||||||
|
|
||||||
|
ret.Domain = values.Domain.Value
|
||||||
|
ret.Title = values.Title.Value
|
||||||
|
ret.Description = values.Description.Value
|
||||||
|
ret.IsContentDownloaded = false
|
||||||
|
ret.CreatorID = UserID(values.Creator.UserValue.Value)
|
||||||
|
ret.SiteID = UserID(values.Site.UserValue.Value)
|
||||||
|
|
||||||
|
var thumbnail_url string
|
||||||
|
|
||||||
|
if apiCard.Name == "summary_large_image" || apiCard.Name == "summary" {
|
||||||
|
thumbnail_url = values.Thumbnail.ImageValue.Url
|
||||||
|
} else if apiCard.Name == "player" {
|
||||||
|
thumbnail_url = values.PlayerImage.ImageValue.Url
|
||||||
|
} else if apiCard.Name == "unified_card" {
|
||||||
|
// TODO: Grok chat previews
|
||||||
|
log.Print("Grok chat card, not implemented yet-- skipping")
|
||||||
|
} else {
|
||||||
|
panic("Unknown card type: " + apiCard.Name)
|
||||||
|
}
|
||||||
|
|
||||||
|
if thumbnail_url != "" {
|
||||||
|
ret.HasThumbnail = true
|
||||||
|
ret.ThumbnailRemoteUrl = thumbnail_url
|
||||||
|
ret.ThumbnailLocalPath = get_thumbnail_local_path(thumbnail_url)
|
||||||
|
ret.ThumbnailWidth = values.Thumbnail.ImageValue.Width
|
||||||
|
ret.ThumbnailHeight = values.Thumbnail.ImageValue.Height
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func get_prefixed_path(p string) string {
|
||||||
|
local_prefix_regex := regexp.MustCompile(`^[\w-]{2}`)
|
||||||
|
local_prefix := local_prefix_regex.FindString(p)
|
||||||
|
if len(local_prefix) != 2 {
|
||||||
|
panic(fmt.Sprintf("Unable to extract a 2-letter prefix for filename %s", p))
|
||||||
|
}
|
||||||
|
return path.Join(local_prefix, p)
|
||||||
|
}
|
||||||
|
|
||||||
|
func get_thumbnail_local_path(remote_url string) string {
|
||||||
|
u, err := url.Parse(remote_url)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
if u.RawQuery == "" {
|
||||||
|
return path.Base(u.Path)
|
||||||
|
}
|
||||||
|
query_params, err := url.ParseQuery(u.RawQuery)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return get_prefixed_path(
|
||||||
|
fmt.Sprintf("%s_%s.%s", path.Base(u.Path), query_params["name"][0], query_params["format"][0]),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
type APITweet struct {
|
type APITweet struct {
|
||||||
ID int64 `json:"id_str,string"`
|
ID int64 `json:"id_str,string"`
|
||||||
ConversationID int64 `json:"conversation_id_str,string"`
|
ConversationID int64 `json:"conversation_id_str,string"`
|
||||||
@ -184,6 +359,171 @@ type APITweet struct {
|
|||||||
IsExpandable bool
|
IsExpandable bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t APITweet) ToTweetTrove() (TweetTrove, error) {
|
||||||
|
ret := NewTweetTrove()
|
||||||
|
if t.RetweetedStatusIDStr == "" {
|
||||||
|
// Parse as a Tweet
|
||||||
|
new_tweet, err := ParseSingleTweet(t)
|
||||||
|
if err != nil {
|
||||||
|
return ret, err
|
||||||
|
}
|
||||||
|
ret.Tweets[new_tweet.ID] = new_tweet
|
||||||
|
for _, space := range new_tweet.Spaces {
|
||||||
|
ret.Spaces[space.ID] = space
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Parse as a Retweet
|
||||||
|
new_retweet := Retweet{}
|
||||||
|
var err error
|
||||||
|
|
||||||
|
t.NormalizeContent()
|
||||||
|
|
||||||
|
new_retweet.RetweetID = TweetID(t.ID)
|
||||||
|
new_retweet.TweetID = TweetID(t.RetweetedStatusID)
|
||||||
|
new_retweet.RetweetedByID = UserID(t.UserID)
|
||||||
|
new_retweet.RetweetedAt, err = TimestampFromString(t.CreatedAt)
|
||||||
|
if err != nil {
|
||||||
|
return ret, err
|
||||||
|
}
|
||||||
|
ret.Retweets[new_retweet.RetweetID] = new_retweet
|
||||||
|
}
|
||||||
|
return ret, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Turn an APITweet, as returned from the scraper, into a properly structured Tweet object
|
||||||
|
func ParseSingleTweet(t APITweet) (ret Tweet, err error) {
|
||||||
|
t.NormalizeContent()
|
||||||
|
|
||||||
|
ret.ID = TweetID(t.ID)
|
||||||
|
ret.UserID = UserID(t.UserID)
|
||||||
|
ret.UserHandle = UserHandle(t.UserHandle)
|
||||||
|
ret.Text = t.FullText
|
||||||
|
ret.IsExpandable = t.IsExpandable
|
||||||
|
|
||||||
|
// Process "posted-at" date and time
|
||||||
|
if t.TombstoneText == "" { // Skip time parsing for tombstones
|
||||||
|
ret.PostedAt, err = TimestampFromString(t.CreatedAt)
|
||||||
|
if err != nil {
|
||||||
|
if ret.ID == 0 {
|
||||||
|
return Tweet{}, fmt.Errorf("unable to parse tweet: %w", ERR_NO_TWEET)
|
||||||
|
}
|
||||||
|
return Tweet{}, fmt.Errorf("Error parsing time on tweet ID %d:\n %w", ret.ID, err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ret.NumLikes = t.FavoriteCount
|
||||||
|
ret.NumRetweets = t.RetweetCount
|
||||||
|
ret.NumReplies = t.ReplyCount
|
||||||
|
ret.NumQuoteTweets = t.QuoteCount
|
||||||
|
ret.InReplyToID = TweetID(t.InReplyToStatusID)
|
||||||
|
ret.QuotedTweetID = TweetID(t.QuotedStatusID)
|
||||||
|
|
||||||
|
// Process URLs and link previews
|
||||||
|
for _, url := range t.Entities.URLs {
|
||||||
|
var url_object Url
|
||||||
|
if t.Card.ShortenedUrl == url.ShortenedUrl {
|
||||||
|
if t.Card.Name == "3691233323:audiospace" {
|
||||||
|
// This "url" is just a link to a Space. Don't process it as a Url
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
url_object = ParseAPIUrlCard(t.Card)
|
||||||
|
}
|
||||||
|
url_object.Text = url.ExpandedURL
|
||||||
|
url_object.ShortText = url.ShortenedUrl
|
||||||
|
url_object.TweetID = ret.ID
|
||||||
|
|
||||||
|
// Skip it if it's just the quoted tweet
|
||||||
|
_, id, is_ok := TryParseTweetUrl(url.ExpandedURL)
|
||||||
|
if is_ok && id == ret.QuotedTweetID {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
ret.Urls = append(ret.Urls, url_object)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process images
|
||||||
|
for _, media := range t.Entities.Media {
|
||||||
|
if media.Type != "photo" {
|
||||||
|
// Videos now have an entry in "Entities.Media" but they can be ignored; the useful bit is in ExtendedEntities
|
||||||
|
// So skip ones that aren't "photo"
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
new_image := ParseAPIMedia(media)
|
||||||
|
new_image.TweetID = ret.ID
|
||||||
|
ret.Images = append(ret.Images, new_image)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process hashtags
|
||||||
|
for _, hashtag := range t.Entities.Hashtags {
|
||||||
|
ret.Hashtags = append(ret.Hashtags, hashtag.Text)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process `@` mentions and reply-mentions
|
||||||
|
for _, mention := range t.Entities.Mentions {
|
||||||
|
ret.Mentions = append(ret.Mentions, mention.UserName)
|
||||||
|
}
|
||||||
|
for _, mention := range strings.Split(t.Entities.ReplyMentions, " ") {
|
||||||
|
if mention != "" {
|
||||||
|
if mention[0] != '@' {
|
||||||
|
panic(fmt.Errorf("Unknown ReplyMention value %q:\n %w", t.Entities.ReplyMentions, EXTERNAL_API_ERROR))
|
||||||
|
}
|
||||||
|
ret.ReplyMentions = append(ret.ReplyMentions, mention[1:])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process videos
|
||||||
|
for _, entity := range t.ExtendedEntities.Media {
|
||||||
|
if entity.Type != "video" && entity.Type != "animated_gif" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
new_video := ParseAPIVideo(entity)
|
||||||
|
new_video.TweetID = ret.ID
|
||||||
|
ret.Videos = append(ret.Videos, new_video)
|
||||||
|
|
||||||
|
// Remove the thumbnail from the Images list
|
||||||
|
updated_imgs := []Image{}
|
||||||
|
for _, img := range ret.Images {
|
||||||
|
if VideoID(img.ID) != new_video.ID {
|
||||||
|
updated_imgs = append(updated_imgs, img)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret.Images = updated_imgs
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process polls
|
||||||
|
if strings.Index(t.Card.Name, "poll") == 0 {
|
||||||
|
poll := ParseAPIPoll(t.Card)
|
||||||
|
poll.TweetID = ret.ID
|
||||||
|
ret.Polls = []Poll{poll}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process spaces
|
||||||
|
if t.Card.Name == "3691233323:audiospace" {
|
||||||
|
space := Space{}
|
||||||
|
space.ID = SpaceID(t.Card.BindingValues.ID.StringValue)
|
||||||
|
space.ShortUrl = t.Card.ShortenedUrl
|
||||||
|
|
||||||
|
// Indicate that this Space needs its details fetched still
|
||||||
|
space.IsDetailsFetched = false
|
||||||
|
|
||||||
|
ret.Spaces = []Space{space}
|
||||||
|
ret.SpaceID = space.ID
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process tombstones and other metadata
|
||||||
|
ret.TombstoneType = t.TombstoneText
|
||||||
|
ret.IsStub = !(ret.TombstoneType == "")
|
||||||
|
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
|
||||||
|
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
|
||||||
|
|
||||||
|
// Extra data that can help piece together tombstoned tweet info
|
||||||
|
ret.in_reply_to_user_id = UserID(t.InReplyToUserID)
|
||||||
|
ret.in_reply_to_user_handle = UserHandle(t.InReplyToScreenName)
|
||||||
|
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
func (t *APITweet) NormalizeContent() {
|
func (t *APITweet) NormalizeContent() {
|
||||||
id, err := strconv.Atoi(t.QuotedStatusIDStr)
|
id, err := strconv.Atoi(t.QuotedStatusIDStr)
|
||||||
if err == nil {
|
if err == nil {
|
||||||
@ -260,6 +600,54 @@ type APIUser struct {
|
|||||||
DoesntExist bool
|
DoesntExist bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Turn an APIUser, as returned from the scraper, into a properly structured User object
|
||||||
|
func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
||||||
|
if apiUser.DoesntExist {
|
||||||
|
// User may have been deleted, or there was a typo. There's no data to parse
|
||||||
|
if apiUser.ScreenName == "" {
|
||||||
|
panic("ScreenName is empty!")
|
||||||
|
}
|
||||||
|
ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName))
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ret.ID = UserID(apiUser.ID)
|
||||||
|
ret.Handle = UserHandle(apiUser.ScreenName)
|
||||||
|
if apiUser.IsBanned {
|
||||||
|
// Banned users won't have any further info, so just return here
|
||||||
|
ret.IsBanned = true
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ret.DisplayName = apiUser.Name
|
||||||
|
ret.Bio = apiUser.Description
|
||||||
|
ret.FollowingCount = apiUser.FriendsCount
|
||||||
|
ret.FollowersCount = apiUser.FollowersCount
|
||||||
|
ret.Location = apiUser.Location
|
||||||
|
if len(apiUser.Entities.URL.Urls) > 0 {
|
||||||
|
ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL
|
||||||
|
}
|
||||||
|
ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt)
|
||||||
|
if err != nil {
|
||||||
|
err = fmt.Errorf("Error parsing time on user ID %d: %w", ret.ID, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ret.IsPrivate = apiUser.Protected
|
||||||
|
ret.IsVerified = apiUser.Verified
|
||||||
|
ret.ProfileImageUrl = apiUser.ProfileImageURLHTTPS
|
||||||
|
|
||||||
|
if regexp.MustCompile(`_normal\.\w{2,4}`).MatchString(ret.ProfileImageUrl) {
|
||||||
|
ret.ProfileImageUrl = strings.ReplaceAll(ret.ProfileImageUrl, "_normal.", ".")
|
||||||
|
}
|
||||||
|
ret.BannerImageUrl = apiUser.ProfileBannerURL
|
||||||
|
|
||||||
|
ret.ProfileImageLocalPath = ret.compute_profile_image_local_path()
|
||||||
|
ret.BannerImageLocalPath = ret.compute_banner_image_local_path()
|
||||||
|
|
||||||
|
if len(apiUser.PinnedTweetIdsStr) > 0 {
|
||||||
|
ret.PinnedTweetID = TweetID(idstr_to_int(apiUser.PinnedTweetIdsStr[0]))
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
type APINotification struct {
|
type APINotification struct {
|
||||||
ID string `json:"id"`
|
ID string `json:"id"`
|
||||||
TimestampMs int64 `json:"timestampMs,string"`
|
TimestampMs int64 `json:"timestampMs,string"`
|
||||||
@ -565,22 +953,11 @@ func (t *TweetResponse) ToTweetTrove() (TweetTrove, error) {
|
|||||||
ret := NewTweetTrove()
|
ret := NewTweetTrove()
|
||||||
|
|
||||||
for _, single_tweet := range t.GlobalObjects.Tweets {
|
for _, single_tweet := range t.GlobalObjects.Tweets {
|
||||||
if single_tweet.RetweetedStatusIDStr == "" {
|
trove, err := single_tweet.ToTweetTrove()
|
||||||
new_tweet, err := ParseSingleTweet(single_tweet)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return ret, err
|
return ret, err
|
||||||
}
|
}
|
||||||
ret.Tweets[new_tweet.ID] = new_tweet
|
ret.MergeWith(trove)
|
||||||
for _, space := range new_tweet.Spaces {
|
|
||||||
ret.Spaces[space.ID] = space
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
new_retweet, err := ParseSingleRetweet(single_tweet)
|
|
||||||
if err != nil {
|
|
||||||
return ret, err
|
|
||||||
}
|
|
||||||
ret.Retweets[new_retweet.RetweetID] = new_retweet
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, user := range t.GlobalObjects.Users {
|
for _, user := range t.GlobalObjects.Users {
|
||||||
@ -597,10 +974,14 @@ func (t *TweetResponse) ToTweetTrove() (TweetTrove, error) {
|
|||||||
return ret, nil
|
return ret, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func idstr_to_int(idstr string) int64 {
|
func idstr_to_int(s string) int64 {
|
||||||
id, err := strconv.Atoi(idstr)
|
return int64(int_or_panic(s))
|
||||||
|
}
|
||||||
|
|
||||||
|
func int_or_panic(s string) int {
|
||||||
|
result, err := strconv.Atoi(s)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
return int64(id)
|
return result
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"html"
|
"html"
|
||||||
"net/url"
|
"net/url"
|
||||||
|
"path"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/google/uuid"
|
"github.com/google/uuid"
|
||||||
@ -77,38 +78,106 @@ func (m *APIDMMessage) NormalizeContent() {
|
|||||||
m.MessageData.Text = strings.TrimSpace(m.MessageData.Text)
|
m.MessageData.Text = strings.TrimSpace(m.MessageData.Text)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m APIDMMessage) ToTweetTrove() TweetTrove {
|
func (api_msg APIDMMessage) ToTweetTrove() TweetTrove {
|
||||||
ret := NewTweetTrove()
|
ret := NewTweetTrove()
|
||||||
if m.ID == 0 {
|
if api_msg.ID == 0 {
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
m.NormalizeContent()
|
api_msg.NormalizeContent()
|
||||||
result := ParseAPIDMMessage(m)
|
|
||||||
|
msg := DMMessage{}
|
||||||
|
msg.ID = DMMessageID(api_msg.ID)
|
||||||
|
msg.SentAt = TimestampFromUnixMilli(int64(api_msg.Time))
|
||||||
|
msg.DMChatRoomID = DMChatRoomID(api_msg.ConversationID)
|
||||||
|
msg.SenderID = UserID(api_msg.MessageData.SenderID)
|
||||||
|
msg.Text = api_msg.MessageData.Text
|
||||||
|
|
||||||
|
msg.InReplyToID = DMMessageID(api_msg.MessageData.ReplyData.ID) // Will be "0" if not a reply
|
||||||
|
|
||||||
|
msg.Reactions = make(map[UserID]DMReaction)
|
||||||
|
for _, api_reacc := range api_msg.MessageReactions {
|
||||||
|
reacc := DMReaction{}
|
||||||
|
reacc.ID = DMMessageID(api_reacc.ID)
|
||||||
|
reacc.SenderID = UserID(api_reacc.SenderID)
|
||||||
|
reacc.SentAt = TimestampFromUnixMilli(int64(api_reacc.Time))
|
||||||
|
reacc.Emoji = api_reacc.Emoji
|
||||||
|
reacc.DMMessageID = msg.ID
|
||||||
|
msg.Reactions[reacc.SenderID] = reacc
|
||||||
|
}
|
||||||
|
if api_msg.MessageData.Attachment.Photo.ID != 0 {
|
||||||
|
new_image := ParseAPIMedia(api_msg.MessageData.Attachment.Photo)
|
||||||
|
new_image.DMMessageID = msg.ID
|
||||||
|
msg.Images = []Image{new_image}
|
||||||
|
}
|
||||||
|
if api_msg.MessageData.Attachment.Video.ID != 0 {
|
||||||
|
entity := api_msg.MessageData.Attachment.Video
|
||||||
|
if entity.Type == "video" || entity.Type == "animated_gif" {
|
||||||
|
new_video := ParseAPIVideo(entity)
|
||||||
|
new_video.DMMessageID = msg.ID
|
||||||
|
msg.Videos = append(msg.Videos, new_video)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process URLs and link previews
|
||||||
|
for _, url := range api_msg.MessageData.Entities.URLs {
|
||||||
|
// Skip it if it's an embedded tweet
|
||||||
|
_, id, is_ok := TryParseTweetUrl(url.ExpandedURL)
|
||||||
|
if is_ok && id == TweetID(api_msg.MessageData.Attachment.Tweet.Status.ID) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Skip it if it's an embedded image
|
||||||
|
if api_msg.MessageData.Attachment.Photo.URL == url.ShortenedUrl {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Skip it if it's an embedded video
|
||||||
|
if api_msg.MessageData.Attachment.Video.URL == url.ShortenedUrl {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
var new_url Url
|
||||||
|
if api_msg.MessageData.Attachment.Card.ShortenedUrl == url.ShortenedUrl {
|
||||||
|
if api_msg.MessageData.Attachment.Card.Name == "3691233323:audiospace" {
|
||||||
|
// This "url" is just a link to a Space. Don't process it as a Url
|
||||||
|
// TODO: ...but do process it as a Space?
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
new_url = ParseAPIUrlCard(api_msg.MessageData.Attachment.Card)
|
||||||
|
}
|
||||||
|
new_url.Text = url.ExpandedURL
|
||||||
|
new_url.ShortText = url.ShortenedUrl
|
||||||
|
new_url.DMMessageID = msg.ID
|
||||||
|
msg.Urls = append(msg.Urls, new_url)
|
||||||
|
}
|
||||||
|
|
||||||
// Parse tweet attachment
|
// Parse tweet attachment
|
||||||
if m.MessageData.Attachment.Tweet.Status.ID != 0 {
|
if api_msg.MessageData.Attachment.Tweet.Status.ID != 0 {
|
||||||
u, err := ParseSingleUser(m.MessageData.Attachment.Tweet.Status.User)
|
u, err := ParseSingleUser(api_msg.MessageData.Attachment.Tweet.Status.User)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
ret.Users[u.ID] = u
|
ret.Users[u.ID] = u
|
||||||
|
|
||||||
t, err := ParseSingleTweet(m.MessageData.Attachment.Tweet.Status.APITweet)
|
t, err := ParseSingleTweet(api_msg.MessageData.Attachment.Tweet.Status.APITweet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
t.UserID = u.ID
|
t.UserID = u.ID
|
||||||
ret.Tweets[t.ID] = t
|
ret.Tweets[t.ID] = t
|
||||||
result.EmbeddedTweetID = t.ID
|
msg.EmbeddedTweetID = t.ID
|
||||||
}
|
}
|
||||||
ret.Messages[result.ID] = result
|
ret.Messages[msg.ID] = msg
|
||||||
|
|
||||||
// TODO: parse attached images and videos
|
|
||||||
|
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
type APIDMResponse struct {
|
||||||
|
InboxInitialState APIInbox `json:"inbox_initial_state"`
|
||||||
|
InboxTimeline APIInbox `json:"inbox_timeline"`
|
||||||
|
ConversationTimeline APIInbox `json:"conversation_timeline"`
|
||||||
|
UserEvents APIInbox `json:"user_events"`
|
||||||
|
}
|
||||||
|
|
||||||
type APIDMConversation struct {
|
type APIDMConversation struct {
|
||||||
ConversationID string `json:"conversation_id"`
|
ConversationID string `json:"conversation_id"`
|
||||||
Type string `json:"type"`
|
Type string `json:"type"`
|
||||||
@ -179,13 +248,6 @@ type APIInbox struct {
|
|||||||
Conversations map[string]APIDMConversation `json:"conversations"`
|
Conversations map[string]APIDMConversation `json:"conversations"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type APIDMResponse struct {
|
|
||||||
InboxInitialState APIInbox `json:"inbox_initial_state"`
|
|
||||||
InboxTimeline APIInbox `json:"inbox_timeline"`
|
|
||||||
ConversationTimeline APIInbox `json:"conversation_timeline"`
|
|
||||||
UserEvents APIInbox `json:"user_events"`
|
|
||||||
}
|
|
||||||
|
|
||||||
func (r APIInbox) ToTweetTrove(current_user_id UserID) TweetTrove {
|
func (r APIInbox) ToTweetTrove(current_user_id UserID) TweetTrove {
|
||||||
ret := NewTweetTrove()
|
ret := NewTweetTrove()
|
||||||
|
|
||||||
@ -211,8 +273,8 @@ func (r APIInbox) ToTweetTrove(current_user_id UserID) TweetTrove {
|
|||||||
|
|
||||||
ret.MergeWith(entry.Message.ToTweetTrove())
|
ret.MergeWith(entry.Message.ToTweetTrove())
|
||||||
}
|
}
|
||||||
for _, room := range r.Conversations {
|
for _, api_room := range r.Conversations {
|
||||||
result := ParseAPIDMChatRoom(room, current_user_id)
|
result := ParseAPIDMChatRoom(api_room, current_user_id)
|
||||||
ret.Rooms[result.ID] = result
|
ret.Rooms[result.ID] = result
|
||||||
}
|
}
|
||||||
for _, u := range r.Users {
|
for _, u := range r.Users {
|
||||||
@ -225,6 +287,46 @@ func (r APIInbox) ToTweetTrove(current_user_id UserID) TweetTrove {
|
|||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func ParseAPIDMChatRoom(api_room APIDMConversation, current_user_id UserID) DMChatRoom {
|
||||||
|
result := DMChatRoom{}
|
||||||
|
result.ID = DMChatRoomID(api_room.ConversationID)
|
||||||
|
result.Type = api_room.Type
|
||||||
|
result.LastMessagedAt = TimestampFromUnixMilli(int64(api_room.SortTimestamp))
|
||||||
|
result.IsNSFW = api_room.NSFW
|
||||||
|
|
||||||
|
if result.Type == "GROUP_DM" {
|
||||||
|
result.CreatedAt = TimestampFromUnixMilli(int64(api_room.CreateTime))
|
||||||
|
result.CreatedByUserID = UserID(api_room.CreatedByUserID)
|
||||||
|
result.Name = api_room.Name
|
||||||
|
result.AvatarImageRemoteURL = api_room.AvatarImage
|
||||||
|
tmp_url, err := url.Parse(result.AvatarImageRemoteURL)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
result.AvatarImageLocalPath = fmt.Sprintf("%s_avatar_%s.%s", result.ID, path.Base(tmp_url.Path), tmp_url.Query().Get("format"))
|
||||||
|
}
|
||||||
|
|
||||||
|
result.Participants = make(map[UserID]DMChatParticipant)
|
||||||
|
for _, api_participant := range api_room.Participants {
|
||||||
|
participant := DMChatParticipant{}
|
||||||
|
participant.UserID = UserID(api_participant.UserID)
|
||||||
|
participant.DMChatRoomID = result.ID
|
||||||
|
participant.LastReadEventID = DMMessageID(api_participant.LastReadEventID)
|
||||||
|
|
||||||
|
// Process chat settings if this is the logged-in user
|
||||||
|
if participant.UserID == current_user_id {
|
||||||
|
participant.IsNotificationsDisabled = api_room.NotificationsDisabled
|
||||||
|
participant.IsReadOnly = api_room.ReadOnly
|
||||||
|
participant.IsTrusted = api_room.Trusted
|
||||||
|
participant.IsMuted = api_room.Muted
|
||||||
|
participant.Status = api_room.Status
|
||||||
|
participant.IsChatSettingsValid = true
|
||||||
|
}
|
||||||
|
result.Participants[participant.UserID] = participant
|
||||||
|
}
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
func (api *API) GetDMInbox() (APIInbox, error) {
|
func (api *API) GetDMInbox() (APIInbox, error) {
|
||||||
url, err := url.Parse("https://twitter.com/i/api/1.1/dm/inbox_initial_state.json")
|
url, err := url.Parse("https://twitter.com/i/api/1.1/dm/inbox_initial_state.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -284,6 +386,30 @@ func (api *API) GetDMInbox() (APIInbox, error) {
|
|||||||
result.InboxInitialState.Status = result.InboxInitialState.InboxTimelines.Trusted.Status
|
result.InboxInitialState.Status = result.InboxInitialState.InboxTimelines.Trusted.Status
|
||||||
return result.InboxInitialState, err
|
return result.InboxInitialState, err
|
||||||
}
|
}
|
||||||
|
func (api *API) GetInbox(how_many int) (TweetTrove, string, error) {
|
||||||
|
if !api.IsAuthenticated {
|
||||||
|
return TweetTrove{}, "", ErrLoginRequired
|
||||||
|
}
|
||||||
|
dm_response, err := api.GetDMInbox()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
trove := dm_response.ToTweetTrove(api.UserID)
|
||||||
|
cursor := dm_response.Cursor
|
||||||
|
next_cursor_id := dm_response.InboxTimelines.Trusted.MinEntryID
|
||||||
|
for len(trove.Rooms) < how_many && dm_response.Status != "AT_END" {
|
||||||
|
dm_response, err = api.GetInboxTrusted(next_cursor_id)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
next_trove := dm_response.ToTweetTrove(api.UserID)
|
||||||
|
next_cursor_id = dm_response.MinEntryID
|
||||||
|
trove.MergeWith(next_trove)
|
||||||
|
}
|
||||||
|
|
||||||
|
return trove, cursor, nil
|
||||||
|
}
|
||||||
|
|
||||||
func (api *API) GetInboxTrusted(oldest_id int) (APIInbox, error) {
|
func (api *API) GetInboxTrusted(oldest_id int) (APIInbox, error) {
|
||||||
url, err := url.Parse("https://twitter.com/i/api/1.1/dm/inbox_timeline/trusted.json")
|
url, err := url.Parse("https://twitter.com/i/api/1.1/dm/inbox_timeline/trusted.json")
|
||||||
@ -345,8 +471,13 @@ func (api *API) GetInboxTrusted(oldest_id int) (APIInbox, error) {
|
|||||||
return result.InboxTimeline, err
|
return result.InboxTimeline, err
|
||||||
}
|
}
|
||||||
|
|
||||||
func (api *API) GetDMConversation(id DMChatRoomID, max_id DMMessageID) (APIInbox, error) {
|
func (api *API) GetConversation(room_id DMChatRoomID, max_id DMMessageID, how_many int) (TweetTrove, error) {
|
||||||
url, err := url.Parse("https://twitter.com/i/api/1.1/dm/conversation/" + string(id) + ".json")
|
if !api.IsAuthenticated {
|
||||||
|
return TweetTrove{}, ErrLoginRequired
|
||||||
|
}
|
||||||
|
|
||||||
|
fetch := func(max_id DMMessageID) (APIInbox, error) {
|
||||||
|
url, err := url.Parse("https://twitter.com/i/api/1.1/dm/conversation/" + string(room_id) + ".json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
@ -403,6 +534,26 @@ func (api *API) GetDMConversation(id DMChatRoomID, max_id DMMessageID) (APIInbox
|
|||||||
return result.ConversationTimeline, err
|
return result.ConversationTimeline, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
dm_response, err := fetch(max_id)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
trove := dm_response.ToTweetTrove(api.UserID)
|
||||||
|
oldest := trove.GetOldestMessage(room_id)
|
||||||
|
for len(trove.Messages) < how_many && dm_response.Status != "AT_END" {
|
||||||
|
dm_response, err = fetch(oldest)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
next_trove := dm_response.ToTweetTrove(api.UserID)
|
||||||
|
oldest = next_trove.GetOldestMessage(room_id)
|
||||||
|
trove.MergeWith(next_trove)
|
||||||
|
}
|
||||||
|
|
||||||
|
return trove, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Returns a TweetTrove and the cursor for the next update, or an error
|
// Returns a TweetTrove and the cursor for the next update, or an error
|
||||||
func (api *API) PollInboxUpdates(cursor string) (TweetTrove, string, error) {
|
func (api *API) PollInboxUpdates(cursor string) (TweetTrove, string, error) {
|
||||||
if !api.IsAuthenticated {
|
if !api.IsAuthenticated {
|
||||||
@ -459,6 +610,9 @@ func (api *API) PollInboxUpdates(cursor string) (TweetTrove, string, error) {
|
|||||||
return result.UserEvents.ToTweetTrove(api.UserID), result.UserEvents.Cursor, nil
|
return result.UserEvents.ToTweetTrove(api.UserID), result.UserEvents.Cursor, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Writes
|
||||||
|
// ------
|
||||||
|
|
||||||
func (api *API) SendDMMessage(room_id DMChatRoomID, text string, in_reply_to_id DMMessageID) (TweetTrove, error) {
|
func (api *API) SendDMMessage(room_id DMChatRoomID, text string, in_reply_to_id DMMessageID) (TweetTrove, error) {
|
||||||
if !api.IsAuthenticated {
|
if !api.IsAuthenticated {
|
||||||
return TweetTrove{}, ErrLoginRequired
|
return TweetTrove{}, ErrLoginRequired
|
||||||
|
@ -21,7 +21,9 @@ func TestParseAPIDMMessage(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &api_message)
|
err = json.Unmarshal(data, &api_message)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
message := ParseAPIDMMessage(api_message)
|
trove := api_message.ToTweetTrove()
|
||||||
|
message, is_ok := trove.Messages[DMMessageID(api_message.ID)]
|
||||||
|
require.True(t, is_ok)
|
||||||
assert.Equal(message.ID, DMMessageID(1663623203644751885))
|
assert.Equal(message.ID, DMMessageID(1663623203644751885))
|
||||||
assert.Equal(message.SentAt, TimestampFromUnixMilli(1685473655064))
|
assert.Equal(message.SentAt, TimestampFromUnixMilli(1685473655064))
|
||||||
assert.Equal(message.DMChatRoomID, DMChatRoomID("1458284524761075714-1488963321701171204"))
|
assert.Equal(message.DMChatRoomID, DMChatRoomID("1458284524761075714-1488963321701171204"))
|
||||||
@ -41,7 +43,9 @@ func TestParseAPIDMMessageWithReaction(t *testing.T) {
|
|||||||
err = json.Unmarshal(data, &api_message)
|
err = json.Unmarshal(data, &api_message)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
message := ParseAPIDMMessage(api_message)
|
trove := api_message.ToTweetTrove()
|
||||||
|
message, is_ok := trove.Messages[DMMessageID(api_message.ID)]
|
||||||
|
require.True(t, is_ok)
|
||||||
assert.Equal(message.ID, DMMessageID(1663623062195957773))
|
assert.Equal(message.ID, DMMessageID(1663623062195957773))
|
||||||
require.Len(t, message.Reactions, 1)
|
require.Len(t, message.Reactions, 1)
|
||||||
|
|
||||||
|
@ -1390,7 +1390,19 @@ func (api API) GetUser(handle UserHandle) (User, error) {
|
|||||||
return ParseSingleUser(apiUser)
|
return ParseSingleUser(apiUser)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calls API#GetUserByID and returns the parsed result
|
||||||
|
func GetUserByID(u_id UserID) (User, error) {
|
||||||
|
session, err := NewGuestSession() // This endpoint works better if you're not logged in
|
||||||
|
if err != nil {
|
||||||
|
return User{}, err
|
||||||
|
}
|
||||||
|
return session.GetUserByID(u_id)
|
||||||
|
}
|
||||||
|
|
||||||
func (api API) GetUserByID(u_id UserID) (User, error) {
|
func (api API) GetUserByID(u_id UserID) (User, error) {
|
||||||
|
if u_id == UserID(0) {
|
||||||
|
panic("No Users with ID 0")
|
||||||
|
}
|
||||||
url, err := url.Parse(GraphqlURL{
|
url, err := url.Parse(GraphqlURL{
|
||||||
BaseUrl: "https://x.com/i/api/graphql/Qw77dDjp9xCpUY-AXwt-yQ/UserByRestId",
|
BaseUrl: "https://x.com/i/api/graphql/Qw77dDjp9xCpUY-AXwt-yQ/UserByRestId",
|
||||||
Variables: GraphqlVariables{
|
Variables: GraphqlVariables{
|
||||||
|
@ -1,6 +0,0 @@
|
|||||||
package scraper
|
|
||||||
|
|
||||||
// Tokens
|
|
||||||
// ------
|
|
||||||
|
|
||||||
const BEARER_TOKEN string = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
|
@ -1,18 +0,0 @@
|
|||||||
package scraper
|
|
||||||
|
|
||||||
type ConversationID string
|
|
||||||
|
|
||||||
type Conversation struct {
|
|
||||||
ID ConversationID
|
|
||||||
Type string
|
|
||||||
SortEventID int
|
|
||||||
SortTimestamp int
|
|
||||||
Participants []User
|
|
||||||
Nsfw bool
|
|
||||||
NotificationsDisabled bool
|
|
||||||
LastReadEventId int
|
|
||||||
ReadOnly bool
|
|
||||||
Trusted bool
|
|
||||||
LowQuality bool
|
|
||||||
Muted bool
|
|
||||||
}
|
|
@ -1,11 +1,5 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"net/url"
|
|
||||||
"path"
|
|
||||||
)
|
|
||||||
|
|
||||||
type DMChatRoomID string
|
type DMChatRoomID string
|
||||||
|
|
||||||
// A participant in a chat room.
|
// A participant in a chat room.
|
||||||
@ -45,6 +39,8 @@ type DMChatRoom struct {
|
|||||||
Participants map[UserID]DMChatParticipant
|
Participants map[UserID]DMChatParticipant
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: view-layer
|
||||||
|
// - view helpers should go in a view layer
|
||||||
func (r DMChatRoom) GetParticipantIDs() []UserID {
|
func (r DMChatRoom) GetParticipantIDs() []UserID {
|
||||||
ret := []UserID{}
|
ret := []UserID{}
|
||||||
for user_id := range r.Participants {
|
for user_id := range r.Participants {
|
||||||
@ -52,43 +48,3 @@ func (r DMChatRoom) GetParticipantIDs() []UserID {
|
|||||||
}
|
}
|
||||||
return ret
|
return ret
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseAPIDMChatRoom(api_room APIDMConversation, current_user_id UserID) DMChatRoom {
|
|
||||||
ret := DMChatRoom{}
|
|
||||||
ret.ID = DMChatRoomID(api_room.ConversationID)
|
|
||||||
ret.Type = api_room.Type
|
|
||||||
ret.LastMessagedAt = TimestampFromUnixMilli(int64(api_room.SortTimestamp))
|
|
||||||
ret.IsNSFW = api_room.NSFW
|
|
||||||
|
|
||||||
if ret.Type == "GROUP_DM" {
|
|
||||||
ret.CreatedAt = TimestampFromUnixMilli(int64(api_room.CreateTime))
|
|
||||||
ret.CreatedByUserID = UserID(api_room.CreatedByUserID)
|
|
||||||
ret.Name = api_room.Name
|
|
||||||
ret.AvatarImageRemoteURL = api_room.AvatarImage
|
|
||||||
tmp_url, err := url.Parse(ret.AvatarImageRemoteURL)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
ret.AvatarImageLocalPath = fmt.Sprintf("%s_avatar_%s.%s", ret.ID, path.Base(tmp_url.Path), tmp_url.Query().Get("format"))
|
|
||||||
}
|
|
||||||
|
|
||||||
ret.Participants = make(map[UserID]DMChatParticipant)
|
|
||||||
for _, api_participant := range api_room.Participants {
|
|
||||||
participant := DMChatParticipant{}
|
|
||||||
participant.UserID = UserID(api_participant.UserID)
|
|
||||||
participant.DMChatRoomID = ret.ID
|
|
||||||
participant.LastReadEventID = DMMessageID(api_participant.LastReadEventID)
|
|
||||||
|
|
||||||
// Process chat settings if this is the logged-in user
|
|
||||||
if participant.UserID == current_user_id {
|
|
||||||
participant.IsNotificationsDisabled = api_room.NotificationsDisabled
|
|
||||||
participant.IsReadOnly = api_room.ReadOnly
|
|
||||||
participant.IsTrusted = api_room.Trusted
|
|
||||||
participant.IsMuted = api_room.Muted
|
|
||||||
participant.Status = api_room.Status
|
|
||||||
participant.IsChatSettingsValid = true
|
|
||||||
}
|
|
||||||
ret.Participants[participant.UserID] = participant
|
|
||||||
}
|
|
||||||
return ret
|
|
||||||
}
|
|
||||||
|
@ -10,15 +10,6 @@ type DMReaction struct {
|
|||||||
Emoji string `db:"emoji"`
|
Emoji string `db:"emoji"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseAPIDMReaction(reacc APIDMReaction) DMReaction {
|
|
||||||
ret := DMReaction{}
|
|
||||||
ret.ID = DMMessageID(reacc.ID)
|
|
||||||
ret.SenderID = UserID(reacc.SenderID)
|
|
||||||
ret.SentAt = TimestampFromUnixMilli(int64(reacc.Time))
|
|
||||||
ret.Emoji = reacc.Emoji
|
|
||||||
return ret
|
|
||||||
}
|
|
||||||
|
|
||||||
type DMMessage struct {
|
type DMMessage struct {
|
||||||
ID DMMessageID `db:"id"`
|
ID DMMessageID `db:"id"`
|
||||||
DMChatRoomID DMChatRoomID `db:"chat_room_id"`
|
DMChatRoomID DMChatRoomID `db:"chat_room_id"`
|
||||||
@ -33,67 +24,6 @@ type DMMessage struct {
|
|||||||
Images []Image
|
Images []Image
|
||||||
Videos []Video
|
Videos []Video
|
||||||
Urls []Url
|
Urls []Url
|
||||||
}
|
|
||||||
|
|
||||||
func ParseAPIDMMessage(message APIDMMessage) DMMessage {
|
LastReadEventUserIDs []UserID // Used for rendering
|
||||||
ret := DMMessage{}
|
|
||||||
ret.ID = DMMessageID(message.ID)
|
|
||||||
ret.SentAt = TimestampFromUnixMilli(int64(message.Time))
|
|
||||||
ret.DMChatRoomID = DMChatRoomID(message.ConversationID)
|
|
||||||
ret.SenderID = UserID(message.MessageData.SenderID)
|
|
||||||
ret.Text = message.MessageData.Text
|
|
||||||
|
|
||||||
ret.InReplyToID = DMMessageID(message.MessageData.ReplyData.ID) // Will be "0" if not a reply
|
|
||||||
|
|
||||||
ret.Reactions = make(map[UserID]DMReaction)
|
|
||||||
for _, api_reacc := range message.MessageReactions {
|
|
||||||
reacc := ParseAPIDMReaction(api_reacc)
|
|
||||||
reacc.DMMessageID = ret.ID
|
|
||||||
ret.Reactions[reacc.SenderID] = reacc
|
|
||||||
}
|
|
||||||
if message.MessageData.Attachment.Photo.ID != 0 {
|
|
||||||
new_image := ParseAPIMedia(message.MessageData.Attachment.Photo)
|
|
||||||
new_image.DMMessageID = ret.ID
|
|
||||||
ret.Images = []Image{new_image}
|
|
||||||
}
|
|
||||||
if message.MessageData.Attachment.Video.ID != 0 {
|
|
||||||
entity := message.MessageData.Attachment.Video
|
|
||||||
if entity.Type == "video" || entity.Type == "animated_gif" {
|
|
||||||
new_video := ParseAPIVideo(entity)
|
|
||||||
new_video.DMMessageID = ret.ID
|
|
||||||
ret.Videos = append(ret.Videos, new_video)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process URLs and link previews
|
|
||||||
for _, url := range message.MessageData.Entities.URLs {
|
|
||||||
// Skip it if it's an embedded tweet
|
|
||||||
_, id, is_ok := TryParseTweetUrl(url.ExpandedURL)
|
|
||||||
if is_ok && id == TweetID(message.MessageData.Attachment.Tweet.Status.ID) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// Skip it if it's an embedded image
|
|
||||||
if message.MessageData.Attachment.Photo.URL == url.ShortenedUrl {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// Skip it if it's an embedded video
|
|
||||||
if message.MessageData.Attachment.Video.URL == url.ShortenedUrl {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
var new_url Url
|
|
||||||
if message.MessageData.Attachment.Card.ShortenedUrl == url.ShortenedUrl {
|
|
||||||
if message.MessageData.Attachment.Card.Name == "3691233323:audiospace" {
|
|
||||||
// This "url" is just a link to a Space. Don't process it as a Url
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
new_url = ParseAPIUrlCard(message.MessageData.Attachment.Card)
|
|
||||||
}
|
|
||||||
new_url.Text = url.ExpandedURL
|
|
||||||
new_url.ShortText = url.ShortenedUrl
|
|
||||||
new_url.DMMessageID = ret.ID
|
|
||||||
ret.Urls = append(ret.Urls, new_url)
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret
|
|
||||||
}
|
}
|
||||||
|
@ -1,63 +0,0 @@
|
|||||||
package scraper
|
|
||||||
|
|
||||||
func (t TweetTrove) GetOldestMessage(id DMChatRoomID) DMMessageID {
|
|
||||||
oldest := DMMessageID(^uint(0) >> 1) // Max integer
|
|
||||||
for _, m := range t.Messages {
|
|
||||||
if m.ID < oldest && m.DMChatRoomID == id {
|
|
||||||
oldest = m.ID
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return oldest
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO: Why are these all here? =>
|
|
||||||
|
|
||||||
// Returns a TweetTrove and the cursor for the next update
|
|
||||||
func (api *API) GetInbox(how_many int) (TweetTrove, string, error) {
|
|
||||||
if !api.IsAuthenticated {
|
|
||||||
return TweetTrove{}, "", ErrLoginRequired
|
|
||||||
}
|
|
||||||
dm_response, err := api.GetDMInbox()
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
trove := dm_response.ToTweetTrove(api.UserID)
|
|
||||||
cursor := dm_response.Cursor
|
|
||||||
next_cursor_id := dm_response.InboxTimelines.Trusted.MinEntryID
|
|
||||||
for len(trove.Rooms) < how_many && dm_response.Status != "AT_END" {
|
|
||||||
dm_response, err = api.GetInboxTrusted(next_cursor_id)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
next_trove := dm_response.ToTweetTrove(api.UserID)
|
|
||||||
next_cursor_id = dm_response.MinEntryID
|
|
||||||
trove.MergeWith(next_trove)
|
|
||||||
}
|
|
||||||
|
|
||||||
return trove, cursor, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (api *API) GetConversation(id DMChatRoomID, max_id DMMessageID, how_many int) (TweetTrove, error) {
|
|
||||||
if !api.IsAuthenticated {
|
|
||||||
return TweetTrove{}, ErrLoginRequired
|
|
||||||
}
|
|
||||||
dm_response, err := api.GetDMConversation(id, max_id)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
trove := dm_response.ToTweetTrove(api.UserID)
|
|
||||||
oldest := trove.GetOldestMessage(id)
|
|
||||||
for len(trove.Messages) < how_many && dm_response.Status != "AT_END" {
|
|
||||||
dm_response, err = api.GetDMConversation(id, oldest)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
next_trove := dm_response.ToTweetTrove(api.UserID)
|
|
||||||
oldest = next_trove.GetOldestMessage(id)
|
|
||||||
trove.MergeWith(next_trove)
|
|
||||||
}
|
|
||||||
|
|
||||||
return trove, nil
|
|
||||||
}
|
|
@ -11,6 +11,8 @@ import (
|
|||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const BEARER_TOKEN string = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
||||||
|
|
||||||
type GuestTokenResponse struct {
|
type GuestTokenResponse struct {
|
||||||
Token string `json:"guest_token"`
|
Token string `json:"guest_token"`
|
||||||
RefreshedAt time.Time
|
RefreshedAt time.Time
|
||||||
|
@ -1,9 +1,5 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
|
||||||
"path"
|
|
||||||
)
|
|
||||||
|
|
||||||
type ImageID int64
|
type ImageID int64
|
||||||
|
|
||||||
type Image struct {
|
type Image struct {
|
||||||
@ -16,16 +12,3 @@ type Image struct {
|
|||||||
LocalFilename string `db:"local_filename"`
|
LocalFilename string `db:"local_filename"`
|
||||||
IsDownloaded bool `db:"is_downloaded"`
|
IsDownloaded bool `db:"is_downloaded"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseAPIMedia(apiMedia APIMedia) Image {
|
|
||||||
local_filename := get_prefixed_path(path.Base(apiMedia.MediaURLHttps))
|
|
||||||
|
|
||||||
return Image{
|
|
||||||
ID: ImageID(apiMedia.ID),
|
|
||||||
RemoteURL: apiMedia.MediaURLHttps,
|
|
||||||
Width: apiMedia.OriginalInfo.Width,
|
|
||||||
Height: apiMedia.OriginalInfo.Height,
|
|
||||||
LocalFilename: local_filename,
|
|
||||||
IsDownloaded: false,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -3,6 +3,8 @@ package scraper
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"net/url"
|
||||||
|
"regexp"
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -32,3 +34,43 @@ func ExpandShortUrl(short_url string) string {
|
|||||||
}
|
}
|
||||||
return long_url
|
return long_url
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Given an URL, try to parse it as a tweet url.
|
||||||
|
// The bool is an `is_ok` value; true if the parse was successful, false if it didn't match
|
||||||
|
func TryParseTweetUrl(s string) (UserHandle, TweetID, bool) {
|
||||||
|
parsed_url, err := url.Parse(s)
|
||||||
|
if err != nil {
|
||||||
|
return UserHandle(""), TweetID(0), false
|
||||||
|
}
|
||||||
|
|
||||||
|
if parsed_url.Host != "twitter.com" && parsed_url.Host != "mobile.twitter.com" && parsed_url.Host != "x.com" {
|
||||||
|
return UserHandle(""), TweetID(0), false
|
||||||
|
}
|
||||||
|
|
||||||
|
r := regexp.MustCompile(`^/(\w+)/status/(\d+)$`)
|
||||||
|
matches := r.FindStringSubmatch(parsed_url.Path)
|
||||||
|
if matches == nil {
|
||||||
|
return UserHandle(""), TweetID(0), false
|
||||||
|
}
|
||||||
|
if len(matches) != 3 { // matches[0] is the full string
|
||||||
|
panic(matches)
|
||||||
|
}
|
||||||
|
return UserHandle(matches[1]), TweetID(int_or_panic(matches[2])), true
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Given a tweet URL, return the corresponding user handle.
|
||||||
|
* If tweet url is not valid, return an error.
|
||||||
|
*/
|
||||||
|
func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
|
||||||
|
short_url_regex := regexp.MustCompile(`^https://t.co/\w{5,20}$`)
|
||||||
|
if short_url_regex.MatchString(tweet_url) {
|
||||||
|
tweet_url = ExpandShortUrl(tweet_url)
|
||||||
|
}
|
||||||
|
|
||||||
|
ret, _, is_ok := TryParseTweetUrl(tweet_url)
|
||||||
|
if !is_ok {
|
||||||
|
return "", fmt.Errorf("Invalid tweet url: %s", tweet_url)
|
||||||
|
}
|
||||||
|
return ret, nil
|
||||||
|
}
|
||||||
|
@ -1,9 +1,6 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"net/url"
|
|
||||||
"strconv"
|
|
||||||
"strings"
|
|
||||||
"time"
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -29,6 +26,9 @@ type Poll struct {
|
|||||||
LastUpdatedAt Timestamp `db:"last_scraped_at"`
|
LastUpdatedAt Timestamp `db:"last_scraped_at"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: view-layer
|
||||||
|
// - view helpers should go in a view layer
|
||||||
|
|
||||||
func (p Poll) TotalVotes() int {
|
func (p Poll) TotalVotes() int {
|
||||||
return p.Choice1_Votes + p.Choice2_Votes + p.Choice3_Votes + p.Choice4_Votes
|
return p.Choice1_Votes + p.Choice2_Votes + p.Choice3_Votes + p.Choice4_Votes
|
||||||
}
|
}
|
||||||
@ -48,56 +48,3 @@ func (p Poll) IsWinner(votes int) bool {
|
|||||||
}
|
}
|
||||||
return votes >= p.Choice1_Votes && votes >= p.Choice2_Votes && votes >= p.Choice3_Votes && votes >= p.Choice4_Votes
|
return votes >= p.Choice1_Votes && votes >= p.Choice2_Votes && votes >= p.Choice3_Votes && votes >= p.Choice4_Votes
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseAPIPoll(apiCard APICard) Poll {
|
|
||||||
card_url, err := url.Parse(apiCard.ShortenedUrl)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
id := int_or_panic(card_url.Hostname())
|
|
||||||
|
|
||||||
ret := Poll{}
|
|
||||||
ret.ID = PollID(id)
|
|
||||||
ret.NumChoices = parse_num_choices(apiCard.Name)
|
|
||||||
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
|
|
||||||
ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
|
|
||||||
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)
|
|
||||||
ret.Choice2 = apiCard.BindingValues.Choice2.StringValue
|
|
||||||
ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue)
|
|
||||||
|
|
||||||
if ret.NumChoices > 2 {
|
|
||||||
ret.Choice3 = apiCard.BindingValues.Choice3.StringValue
|
|
||||||
ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue)
|
|
||||||
}
|
|
||||||
if ret.NumChoices > 3 {
|
|
||||||
ret.Choice4 = apiCard.BindingValues.Choice4.StringValue
|
|
||||||
ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue)
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret
|
|
||||||
}
|
|
||||||
|
|
||||||
func parse_num_choices(card_name string) int {
|
|
||||||
if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 {
|
|
||||||
panic("Not valid card name: " + card_name)
|
|
||||||
}
|
|
||||||
|
|
||||||
return int_or_panic(card_name[4:5])
|
|
||||||
}
|
|
||||||
|
|
||||||
func int_or_panic(s string) int {
|
|
||||||
result, err := strconv.Atoi(s)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
return result
|
|
||||||
}
|
|
||||||
|
@ -8,16 +8,3 @@ type Retweet struct {
|
|||||||
RetweetedBy *User
|
RetweetedBy *User
|
||||||
RetweetedAt Timestamp `db:"retweeted_at"`
|
RetweetedAt Timestamp `db:"retweeted_at"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) {
|
|
||||||
apiTweet.NormalizeContent()
|
|
||||||
|
|
||||||
ret.RetweetID = TweetID(apiTweet.ID)
|
|
||||||
ret.TweetID = TweetID(apiTweet.RetweetedStatusID)
|
|
||||||
ret.RetweetedByID = UserID(apiTweet.UserID)
|
|
||||||
ret.RetweetedAt, err = TimestampFromString(apiTweet.CreatedAt)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
@ -13,16 +13,23 @@ import (
|
|||||||
|
|
||||||
func TestParseSingleRetweet(t *testing.T) {
|
func TestParseSingleRetweet(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
|
require := require.New(t)
|
||||||
data, err := os.ReadFile("test_responses/tweet_that_is_a_retweet.json")
|
data, err := os.ReadFile("test_responses/tweet_that_is_a_retweet.json")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
panic(err)
|
panic(err)
|
||||||
}
|
}
|
||||||
var api_tweet APITweet
|
var api_tweet APITweet
|
||||||
err = json.Unmarshal(data, &api_tweet)
|
err = json.Unmarshal(data, &api_tweet)
|
||||||
require.NoError(t, err)
|
require.NoError(err)
|
||||||
|
|
||||||
retweet, err := ParseSingleRetweet(api_tweet)
|
trove, err := api_tweet.ToTweetTrove()
|
||||||
require.NoError(t, err)
|
require.NoError(err)
|
||||||
|
|
||||||
|
require.Len(trove.Tweets, 0)
|
||||||
|
require.Len(trove.Retweets, 1)
|
||||||
|
|
||||||
|
retweet, is_ok := trove.Retweets[TweetID(1404270043018448896)]
|
||||||
|
require.True(is_ok)
|
||||||
|
|
||||||
assert.Equal(TweetID(1404270043018448896), retweet.RetweetID)
|
assert.Equal(TweetID(1404270043018448896), retweet.RetweetID)
|
||||||
assert.Equal(TweetID(1404269989646028804), retweet.TweetID)
|
assert.Equal(TweetID(1404269989646028804), retweet.TweetID)
|
||||||
|
@ -26,6 +26,9 @@ type Space struct {
|
|||||||
IsDetailsFetched bool `db:"is_details_fetched"`
|
IsDetailsFetched bool `db:"is_details_fetched"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: view-layer
|
||||||
|
// - view helpers should go in a view layer
|
||||||
|
|
||||||
func (space Space) FormatDuration() string {
|
func (space Space) FormatDuration() string {
|
||||||
duration := space.EndedAt.Time.Sub(space.StartedAt.Time)
|
duration := space.EndedAt.Time.Sub(space.StartedAt.Time)
|
||||||
h := int(duration.Hours())
|
h := int(duration.Hours())
|
||||||
@ -37,14 +40,3 @@ func (space Space) FormatDuration() string {
|
|||||||
}
|
}
|
||||||
return fmt.Sprintf("%dm%02ds", m, s)
|
return fmt.Sprintf("%dm%02ds", m, s)
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseAPISpace(apiCard APICard) Space {
|
|
||||||
ret := Space{}
|
|
||||||
ret.ID = SpaceID(apiCard.BindingValues.ID.StringValue)
|
|
||||||
ret.ShortUrl = apiCard.ShortenedUrl
|
|
||||||
|
|
||||||
// Indicate that this Space needs its details fetched still
|
|
||||||
ret.IsDetailsFetched = false
|
|
||||||
|
|
||||||
return ret
|
|
||||||
}
|
|
||||||
|
@ -1,31 +1,13 @@
|
|||||||
package scraper_test
|
package scraper_test
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"encoding/json"
|
|
||||||
"os"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/stretchr/testify/assert"
|
"github.com/stretchr/testify/assert"
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
|
|
||||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestParseSpace(t *testing.T) {
|
|
||||||
assert := assert.New(t)
|
|
||||||
data, err := os.ReadFile("test_responses/tweet_content/space.json")
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
var apiCard APICard
|
|
||||||
err = json.Unmarshal(data, &apiCard)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
space := ParseAPISpace(apiCard)
|
|
||||||
assert.Equal(SpaceID("1YpKkZVyQjoxj"), space.ID)
|
|
||||||
assert.Equal("https://t.co/WBPAHNF8Om", space.ShortUrl)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestFormatSpaceDuration(t *testing.T) {
|
func TestFormatSpaceDuration(t *testing.T) {
|
||||||
assert := assert.New(t)
|
assert := assert.New(t)
|
||||||
s := Space{
|
s := Space{
|
||||||
|
@ -1 +0,0 @@
|
|||||||
{"name":"3691233323:audiospace","url":"https://t.co/WBPAHNF8Om","card_type_url":"http://card-type-url-is-deprecated.invalid","binding_values":{"id":{"type":"STRING","string_value":"1YpKkZVyQjoxj"},"narrow_cast_space_type":{"type":"STRING","string_value":"0"},"card_url":{"type":"STRING","string_value":"https://t.co/WBPAHNF8Om","scribe_key":"card_url"}},"card_platform":{"platform":{"device":{"name":"Swift","version":"12"},"audience":{"name":"production"}}}}
|
|
@ -5,9 +5,6 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/terminal_utils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
var ERR_NO_TWEET = errors.New("Empty tweet")
|
var ERR_NO_TWEET = errors.New("Empty tweet")
|
||||||
@ -77,172 +74,6 @@ type Tweet struct {
|
|||||||
IsConversationScraped bool `db:"is_conversation_scraped"`
|
IsConversationScraped bool `db:"is_conversation_scraped"`
|
||||||
LastScrapedAt Timestamp `db:"last_scraped_at"`
|
LastScrapedAt Timestamp `db:"last_scraped_at"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t Tweet) String() string {
|
|
||||||
var author string
|
|
||||||
if t.User != nil {
|
|
||||||
author = fmt.Sprintf("%s\n@%s", t.User.DisplayName, t.User.Handle)
|
|
||||||
} else {
|
|
||||||
author = "@???"
|
|
||||||
}
|
|
||||||
|
|
||||||
ret := fmt.Sprintf(
|
|
||||||
`%s
|
|
||||||
%s
|
|
||||||
%s
|
|
||||||
Replies: %d RT: %d QT: %d Likes: %d
|
|
||||||
`,
|
|
||||||
author,
|
|
||||||
terminal_utils.FormatDate(t.PostedAt.Time),
|
|
||||||
terminal_utils.WrapText(t.Text, 60),
|
|
||||||
t.NumReplies,
|
|
||||||
t.NumRetweets,
|
|
||||||
t.NumQuoteTweets,
|
|
||||||
t.NumLikes,
|
|
||||||
)
|
|
||||||
|
|
||||||
if len(t.Images) > 0 {
|
|
||||||
ret += fmt.Sprintf(terminal_utils.COLOR_GREEN+"images: %d\n"+terminal_utils.COLOR_RESET, len(t.Images))
|
|
||||||
}
|
|
||||||
if len(t.Urls) > 0 {
|
|
||||||
ret += "urls: [\n"
|
|
||||||
for _, url := range t.Urls {
|
|
||||||
ret += " " + url.Text + "\n"
|
|
||||||
}
|
|
||||||
ret += "]"
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret
|
|
||||||
}
|
|
||||||
|
|
||||||
// Turn an APITweet, as returned from the scraper, into a properly structured Tweet object
|
|
||||||
func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
|
||||||
apiTweet.NormalizeContent()
|
|
||||||
|
|
||||||
ret.ID = TweetID(apiTweet.ID)
|
|
||||||
ret.UserID = UserID(apiTweet.UserID)
|
|
||||||
ret.UserHandle = UserHandle(apiTweet.UserHandle)
|
|
||||||
ret.Text = apiTweet.FullText
|
|
||||||
ret.IsExpandable = apiTweet.IsExpandable
|
|
||||||
|
|
||||||
// Process "posted-at" date and time
|
|
||||||
if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones
|
|
||||||
ret.PostedAt, err = TimestampFromString(apiTweet.CreatedAt)
|
|
||||||
if err != nil {
|
|
||||||
if ret.ID == 0 {
|
|
||||||
return Tweet{}, fmt.Errorf("unable to parse tweet: %w", ERR_NO_TWEET)
|
|
||||||
}
|
|
||||||
return Tweet{}, fmt.Errorf("Error parsing time on tweet ID %d:\n %w", ret.ID, err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
ret.NumLikes = apiTweet.FavoriteCount
|
|
||||||
ret.NumRetweets = apiTweet.RetweetCount
|
|
||||||
ret.NumReplies = apiTweet.ReplyCount
|
|
||||||
ret.NumQuoteTweets = apiTweet.QuoteCount
|
|
||||||
ret.InReplyToID = TweetID(apiTweet.InReplyToStatusID)
|
|
||||||
ret.QuotedTweetID = TweetID(apiTweet.QuotedStatusID)
|
|
||||||
|
|
||||||
// Process URLs and link previews
|
|
||||||
for _, url := range apiTweet.Entities.URLs {
|
|
||||||
var url_object Url
|
|
||||||
if apiTweet.Card.ShortenedUrl == url.ShortenedUrl {
|
|
||||||
if apiTweet.Card.Name == "3691233323:audiospace" {
|
|
||||||
// This "url" is just a link to a Space. Don't process it as a Url
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
url_object = ParseAPIUrlCard(apiTweet.Card)
|
|
||||||
}
|
|
||||||
url_object.Text = url.ExpandedURL
|
|
||||||
url_object.ShortText = url.ShortenedUrl
|
|
||||||
url_object.TweetID = ret.ID
|
|
||||||
|
|
||||||
// Skip it if it's just the quoted tweet
|
|
||||||
_, id, is_ok := TryParseTweetUrl(url.ExpandedURL)
|
|
||||||
if is_ok && id == ret.QuotedTweetID {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
ret.Urls = append(ret.Urls, url_object)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process images
|
|
||||||
for _, media := range apiTweet.Entities.Media {
|
|
||||||
if media.Type != "photo" {
|
|
||||||
// Videos now have an entry in "Entities.Media" but they can be ignored; the useful bit is in ExtendedEntities
|
|
||||||
// So skip ones that aren't "photo"
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
new_image := ParseAPIMedia(media)
|
|
||||||
new_image.TweetID = ret.ID
|
|
||||||
ret.Images = append(ret.Images, new_image)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process hashtags
|
|
||||||
for _, hashtag := range apiTweet.Entities.Hashtags {
|
|
||||||
ret.Hashtags = append(ret.Hashtags, hashtag.Text)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process `@` mentions and reply-mentions
|
|
||||||
for _, mention := range apiTweet.Entities.Mentions {
|
|
||||||
ret.Mentions = append(ret.Mentions, mention.UserName)
|
|
||||||
}
|
|
||||||
for _, mention := range strings.Split(apiTweet.Entities.ReplyMentions, " ") {
|
|
||||||
if mention != "" {
|
|
||||||
if mention[0] != '@' {
|
|
||||||
panic(fmt.Errorf("Unknown ReplyMention value %q:\n %w", apiTweet.Entities.ReplyMentions, EXTERNAL_API_ERROR))
|
|
||||||
}
|
|
||||||
ret.ReplyMentions = append(ret.ReplyMentions, mention[1:])
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process videos
|
|
||||||
for _, entity := range apiTweet.ExtendedEntities.Media {
|
|
||||||
if entity.Type != "video" && entity.Type != "animated_gif" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
|
|
||||||
new_video := ParseAPIVideo(entity)
|
|
||||||
new_video.TweetID = ret.ID
|
|
||||||
ret.Videos = append(ret.Videos, new_video)
|
|
||||||
|
|
||||||
// Remove the thumbnail from the Images list
|
|
||||||
updated_imgs := []Image{}
|
|
||||||
for _, img := range ret.Images {
|
|
||||||
if VideoID(img.ID) != new_video.ID {
|
|
||||||
updated_imgs = append(updated_imgs, img)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
ret.Images = updated_imgs
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process polls
|
|
||||||
if strings.Index(apiTweet.Card.Name, "poll") == 0 {
|
|
||||||
poll := ParseAPIPoll(apiTweet.Card)
|
|
||||||
poll.TweetID = ret.ID
|
|
||||||
ret.Polls = []Poll{poll}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process spaces
|
|
||||||
if apiTweet.Card.Name == "3691233323:audiospace" {
|
|
||||||
space := ParseAPISpace(apiTweet.Card)
|
|
||||||
ret.Spaces = []Space{space}
|
|
||||||
ret.SpaceID = space.ID
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process tombstones and other metadata
|
|
||||||
ret.TombstoneType = apiTweet.TombstoneText
|
|
||||||
ret.IsStub = !(ret.TombstoneType == "")
|
|
||||||
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
|
|
||||||
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
|
|
||||||
|
|
||||||
// Extra data that can help piece together tombstoned tweet info
|
|
||||||
ret.in_reply_to_user_id = UserID(apiTweet.InReplyToUserID)
|
|
||||||
ret.in_reply_to_user_handle = UserHandle(apiTweet.InReplyToScreenName)
|
|
||||||
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get a single tweet with no replies from the API.
|
// Get a single tweet with no replies from the API.
|
||||||
//
|
//
|
||||||
// args:
|
// args:
|
||||||
|
@ -195,3 +195,13 @@ func (trove *TweetTrove) PostProcess(api *API) error {
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (t TweetTrove) GetOldestMessage(id DMChatRoomID) DMMessageID {
|
||||||
|
oldest := DMMessageID(^uint(0) >> 1) // Max integer
|
||||||
|
for _, m := range t.Messages {
|
||||||
|
if m.ID < oldest && m.DMChatRoomID == id {
|
||||||
|
oldest = m.ID
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return oldest
|
||||||
|
}
|
||||||
|
@ -1,11 +1,7 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
|
||||||
"log"
|
|
||||||
"net/url"
|
"net/url"
|
||||||
"path"
|
|
||||||
"regexp"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type Url struct {
|
type Url struct {
|
||||||
@ -28,6 +24,9 @@ type Url struct {
|
|||||||
IsContentDownloaded bool `db:"is_content_downloaded"`
|
IsContentDownloaded bool `db:"is_content_downloaded"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: view-layer
|
||||||
|
// - view helpers should go in a view layer
|
||||||
|
|
||||||
func (u Url) GetDomain() string {
|
func (u Url) GetDomain() string {
|
||||||
if u.Domain != "" {
|
if u.Domain != "" {
|
||||||
return u.Domain
|
return u.Domain
|
||||||
@ -38,106 +37,3 @@ func (u Url) GetDomain() string {
|
|||||||
}
|
}
|
||||||
return urlstruct.Host
|
return urlstruct.Host
|
||||||
}
|
}
|
||||||
|
|
||||||
func ParseAPIUrlCard(apiCard APICard) Url {
|
|
||||||
values := apiCard.BindingValues
|
|
||||||
ret := Url{}
|
|
||||||
ret.HasCard = true
|
|
||||||
|
|
||||||
ret.Domain = values.Domain.Value
|
|
||||||
ret.Title = values.Title.Value
|
|
||||||
ret.Description = values.Description.Value
|
|
||||||
ret.IsContentDownloaded = false
|
|
||||||
ret.CreatorID = UserID(values.Creator.UserValue.Value)
|
|
||||||
ret.SiteID = UserID(values.Site.UserValue.Value)
|
|
||||||
|
|
||||||
var thumbnail_url string
|
|
||||||
|
|
||||||
if apiCard.Name == "summary_large_image" || apiCard.Name == "summary" {
|
|
||||||
thumbnail_url = values.Thumbnail.ImageValue.Url
|
|
||||||
} else if apiCard.Name == "player" {
|
|
||||||
thumbnail_url = values.PlayerImage.ImageValue.Url
|
|
||||||
} else if apiCard.Name == "unified_card" {
|
|
||||||
// TODO: Grok chat previews
|
|
||||||
log.Print("Grok chat card, not implemented yet-- skipping")
|
|
||||||
} else {
|
|
||||||
panic("Unknown card type: " + apiCard.Name)
|
|
||||||
}
|
|
||||||
|
|
||||||
if thumbnail_url != "" {
|
|
||||||
ret.HasThumbnail = true
|
|
||||||
ret.ThumbnailRemoteUrl = thumbnail_url
|
|
||||||
ret.ThumbnailLocalPath = get_thumbnail_local_path(thumbnail_url)
|
|
||||||
ret.ThumbnailWidth = values.Thumbnail.ImageValue.Width
|
|
||||||
ret.ThumbnailHeight = values.Thumbnail.ImageValue.Height
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret
|
|
||||||
}
|
|
||||||
|
|
||||||
func get_prefixed_path(p string) string {
|
|
||||||
local_prefix_regex := regexp.MustCompile(`^[\w-]{2}`)
|
|
||||||
local_prefix := local_prefix_regex.FindString(p)
|
|
||||||
if len(local_prefix) != 2 {
|
|
||||||
panic(fmt.Sprintf("Unable to extract a 2-letter prefix for filename %s", p))
|
|
||||||
}
|
|
||||||
return path.Join(local_prefix, p)
|
|
||||||
}
|
|
||||||
|
|
||||||
func get_thumbnail_local_path(remote_url string) string {
|
|
||||||
u, err := url.Parse(remote_url)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
if u.RawQuery == "" {
|
|
||||||
return path.Base(u.Path)
|
|
||||||
}
|
|
||||||
query_params, err := url.ParseQuery(u.RawQuery)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return get_prefixed_path(
|
|
||||||
fmt.Sprintf("%s_%s.%s", path.Base(u.Path), query_params["name"][0], query_params["format"][0]),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Given an URL, try to parse it as a tweet url.
|
|
||||||
// The bool is an `is_ok` value; true if the parse was successful, false if it didn't match
|
|
||||||
func TryParseTweetUrl(s string) (UserHandle, TweetID, bool) {
|
|
||||||
parsed_url, err := url.Parse(s)
|
|
||||||
if err != nil {
|
|
||||||
return UserHandle(""), TweetID(0), false
|
|
||||||
}
|
|
||||||
|
|
||||||
if parsed_url.Host != "twitter.com" && parsed_url.Host != "mobile.twitter.com" && parsed_url.Host != "x.com" {
|
|
||||||
return UserHandle(""), TweetID(0), false
|
|
||||||
}
|
|
||||||
|
|
||||||
r := regexp.MustCompile(`^/(\w+)/status/(\d+)$`)
|
|
||||||
matches := r.FindStringSubmatch(parsed_url.Path)
|
|
||||||
if matches == nil {
|
|
||||||
return UserHandle(""), TweetID(0), false
|
|
||||||
}
|
|
||||||
if len(matches) != 3 { // matches[0] is the full string
|
|
||||||
panic(matches)
|
|
||||||
}
|
|
||||||
return UserHandle(matches[1]), TweetID(int_or_panic(matches[2])), true
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Given a tweet URL, return the corresponding user handle.
|
|
||||||
* If tweet url is not valid, return an error.
|
|
||||||
*/
|
|
||||||
func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
|
|
||||||
short_url_regex := regexp.MustCompile(`^https://t.co/\w{5,20}$`)
|
|
||||||
if short_url_regex.MatchString(tweet_url) {
|
|
||||||
tweet_url = ExpandShortUrl(tweet_url)
|
|
||||||
}
|
|
||||||
|
|
||||||
ret, _, is_ok := TryParseTweetUrl(tweet_url)
|
|
||||||
if !is_ok {
|
|
||||||
return "", fmt.Errorf("Invalid tweet url: %s", tweet_url)
|
|
||||||
}
|
|
||||||
return ret, nil
|
|
||||||
}
|
|
||||||
|
@ -4,9 +4,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"path"
|
"path"
|
||||||
"regexp"
|
"regexp"
|
||||||
"strings"
|
|
||||||
|
|
||||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/terminal_utils"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const DEFAULT_PROFILE_IMAGE_URL = "https://abs.twimg.com/sticky/default_profile_images/default_profile.png"
|
const DEFAULT_PROFILE_IMAGE_URL = "https://abs.twimg.com/sticky/default_profile_images/default_profile.png"
|
||||||
@ -15,14 +12,6 @@ const DEFAULT_PROFILE_IMAGE = "default_profile.png"
|
|||||||
type UserID int64
|
type UserID int64
|
||||||
type UserHandle string
|
type UserHandle string
|
||||||
|
|
||||||
func JoinArrayOfHandles(handles []UserHandle) string {
|
|
||||||
ret := []string{}
|
|
||||||
for _, h := range handles {
|
|
||||||
ret = append(ret, string(h))
|
|
||||||
}
|
|
||||||
return strings.Join(ret, ",")
|
|
||||||
}
|
|
||||||
|
|
||||||
type User struct {
|
type User struct {
|
||||||
ID UserID `db:"id"`
|
ID UserID `db:"id"`
|
||||||
DisplayName string `db:"display_name"`
|
DisplayName string `db:"display_name"`
|
||||||
@ -51,40 +40,6 @@ type User struct {
|
|||||||
IsIdFake bool `db:"is_id_fake"`
|
IsIdFake bool `db:"is_id_fake"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func (u User) String() string {
|
|
||||||
var verified string
|
|
||||||
if u.IsVerified {
|
|
||||||
verified = "[\u2713]"
|
|
||||||
}
|
|
||||||
ret := fmt.Sprintf(
|
|
||||||
`%s%s
|
|
||||||
@%s
|
|
||||||
%s
|
|
||||||
|
|
||||||
Following: %d Followers: %d
|
|
||||||
|
|
||||||
Joined %s
|
|
||||||
%s
|
|
||||||
%s
|
|
||||||
`,
|
|
||||||
u.DisplayName,
|
|
||||||
verified,
|
|
||||||
u.Handle,
|
|
||||||
terminal_utils.WrapText(u.Bio, 60),
|
|
||||||
u.FollowingCount,
|
|
||||||
u.FollowersCount,
|
|
||||||
terminal_utils.FormatDate(u.JoinDate.Time),
|
|
||||||
u.Location,
|
|
||||||
u.Website,
|
|
||||||
)
|
|
||||||
if u.PinnedTweet != nil {
|
|
||||||
ret += "\n" + terminal_utils.WrapText(u.PinnedTweet.Text, 60)
|
|
||||||
} else {
|
|
||||||
println("Pinned tweet id:", u.PinnedTweetID)
|
|
||||||
}
|
|
||||||
return ret
|
|
||||||
}
|
|
||||||
|
|
||||||
func GetUnknownUser() User {
|
func GetUnknownUser() User {
|
||||||
return User{
|
return User{
|
||||||
ID: UserID(0x4000000000000000), // 2^62
|
ID: UserID(0x4000000000000000), // 2^62
|
||||||
@ -125,63 +80,6 @@ func GetUnknownUserWithHandle(handle UserHandle) User {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Turn an APIUser, as returned from the scraper, into a properly structured User object
|
|
||||||
func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
|
||||||
if apiUser.DoesntExist {
|
|
||||||
// User may have been deleted, or there was a typo. There's no data to parse
|
|
||||||
if apiUser.ScreenName == "" {
|
|
||||||
panic("ScreenName is empty!")
|
|
||||||
}
|
|
||||||
ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName))
|
|
||||||
return
|
|
||||||
}
|
|
||||||
ret.ID = UserID(apiUser.ID)
|
|
||||||
ret.Handle = UserHandle(apiUser.ScreenName)
|
|
||||||
if apiUser.IsBanned {
|
|
||||||
// Banned users won't have any further info, so just return here
|
|
||||||
ret.IsBanned = true
|
|
||||||
return
|
|
||||||
}
|
|
||||||
ret.DisplayName = apiUser.Name
|
|
||||||
ret.Bio = apiUser.Description
|
|
||||||
ret.FollowingCount = apiUser.FriendsCount
|
|
||||||
ret.FollowersCount = apiUser.FollowersCount
|
|
||||||
ret.Location = apiUser.Location
|
|
||||||
if len(apiUser.Entities.URL.Urls) > 0 {
|
|
||||||
ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL
|
|
||||||
}
|
|
||||||
ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt)
|
|
||||||
if err != nil {
|
|
||||||
err = fmt.Errorf("Error parsing time on user ID %d: %w", ret.ID, err)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
ret.IsPrivate = apiUser.Protected
|
|
||||||
ret.IsVerified = apiUser.Verified
|
|
||||||
ret.ProfileImageUrl = apiUser.ProfileImageURLHTTPS
|
|
||||||
|
|
||||||
if regexp.MustCompile(`_normal\.\w{2,4}`).MatchString(ret.ProfileImageUrl) {
|
|
||||||
ret.ProfileImageUrl = strings.ReplaceAll(ret.ProfileImageUrl, "_normal.", ".")
|
|
||||||
}
|
|
||||||
ret.BannerImageUrl = apiUser.ProfileBannerURL
|
|
||||||
|
|
||||||
ret.ProfileImageLocalPath = ret.compute_profile_image_local_path()
|
|
||||||
ret.BannerImageLocalPath = ret.compute_banner_image_local_path()
|
|
||||||
|
|
||||||
if len(apiUser.PinnedTweetIdsStr) > 0 {
|
|
||||||
ret.PinnedTweetID = TweetID(idstr_to_int(apiUser.PinnedTweetIdsStr[0]))
|
|
||||||
}
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Calls API#GetUserByID and returns the parsed result
|
|
||||||
func GetUserByID(u_id UserID) (User, error) {
|
|
||||||
session, err := NewGuestSession() // This endpoint works better if you're not logged in
|
|
||||||
if err != nil {
|
|
||||||
return User{}, err
|
|
||||||
}
|
|
||||||
return session.GetUserByID(u_id)
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Make a filename for the profile image, that hopefully won't clobber other ones
|
* Make a filename for the profile image, that hopefully won't clobber other ones
|
||||||
*/
|
*/
|
||||||
|
@ -1,16 +1,7 @@
|
|||||||
package scraper
|
package scraper
|
||||||
|
|
||||||
import (
|
|
||||||
"net/url"
|
|
||||||
"path"
|
|
||||||
"sort"
|
|
||||||
)
|
|
||||||
|
|
||||||
type VideoID int64
|
type VideoID int64
|
||||||
|
|
||||||
// TODO video-source-user: extract source user information (e.g., someone shares a video
|
|
||||||
// from someone else).
|
|
||||||
|
|
||||||
type Video struct {
|
type Video struct {
|
||||||
ID VideoID `db:"id"`
|
ID VideoID `db:"id"`
|
||||||
TweetID TweetID `db:"tweet_id"`
|
TweetID TweetID `db:"tweet_id"`
|
||||||
@ -30,56 +21,3 @@ type Video struct {
|
|||||||
IsGeoblocked bool `db:"is_geoblocked"`
|
IsGeoblocked bool `db:"is_geoblocked"`
|
||||||
IsGif bool `db:"is_gif"`
|
IsGif bool `db:"is_gif"`
|
||||||
}
|
}
|
||||||
|
|
||||||
func get_filename(remote_url string) string {
|
|
||||||
u, err := url.Parse(remote_url)
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
return path.Base(u.Path)
|
|
||||||
}
|
|
||||||
|
|
||||||
func ParseAPIVideo(apiVideo APIExtendedMedia) Video {
|
|
||||||
variants := apiVideo.VideoInfo.Variants
|
|
||||||
sort.Sort(variants)
|
|
||||||
video_remote_url := variants[0].URL
|
|
||||||
|
|
||||||
var view_count int
|
|
||||||
|
|
||||||
r := apiVideo.Ext.MediaStats.R
|
|
||||||
|
|
||||||
switch r.(type) {
|
|
||||||
case string:
|
|
||||||
view_count = 0
|
|
||||||
case map[string]interface{}:
|
|
||||||
OK_entry, ok := r.(map[string]interface{})["ok"]
|
|
||||||
if !ok {
|
|
||||||
panic("No 'ok' value found in the R!")
|
|
||||||
}
|
|
||||||
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
|
|
||||||
view_count = int_or_panic(view_count_str.(string))
|
|
||||||
if !ok {
|
|
||||||
panic("No 'viewCount' value found in the OK!")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
local_filename := get_prefixed_path(get_filename(video_remote_url))
|
|
||||||
|
|
||||||
return Video{
|
|
||||||
ID: VideoID(apiVideo.ID),
|
|
||||||
Width: apiVideo.OriginalInfo.Width,
|
|
||||||
Height: apiVideo.OriginalInfo.Height,
|
|
||||||
RemoteURL: video_remote_url,
|
|
||||||
LocalFilename: local_filename,
|
|
||||||
|
|
||||||
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
|
|
||||||
ThumbnailLocalPath: get_prefixed_path(path.Base(apiVideo.MediaURLHttps)),
|
|
||||||
Duration: apiVideo.VideoInfo.Duration,
|
|
||||||
ViewCount: view_count,
|
|
||||||
|
|
||||||
IsDownloaded: false,
|
|
||||||
IsBlockedByDMCA: false,
|
|
||||||
IsGeoblocked: apiVideo.ExtMediaAvailability.Reason == "Geoblocked",
|
|
||||||
IsGif: apiVideo.Type == "animated_gif",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user