REFACTOR: move as much API code out of the types files and into api_types_...
files as possible
- also remove a few useless functions
This commit is contained in:
parent
1f44fb0961
commit
850662c3cb
@ -4,6 +4,10 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"html"
|
||||
"log"
|
||||
"net/url"
|
||||
"path"
|
||||
"regexp"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -21,6 +25,19 @@ type APIMedia struct {
|
||||
} `json:"original_info"`
|
||||
}
|
||||
|
||||
func ParseAPIMedia(apiMedia APIMedia) Image {
|
||||
local_filename := get_prefixed_path(path.Base(apiMedia.MediaURLHttps))
|
||||
|
||||
return Image{
|
||||
ID: ImageID(apiMedia.ID),
|
||||
RemoteURL: apiMedia.MediaURLHttps,
|
||||
Width: apiMedia.OriginalInfo.Width,
|
||||
Height: apiMedia.OriginalInfo.Height,
|
||||
LocalFilename: local_filename,
|
||||
IsDownloaded: false,
|
||||
}
|
||||
}
|
||||
|
||||
type SortableVariants []struct {
|
||||
Bitrate int `json:"bitrate,omitempty"`
|
||||
URL string `json:"url"`
|
||||
@ -137,6 +154,164 @@ type APICard struct {
|
||||
} `json:"binding_values"`
|
||||
}
|
||||
|
||||
func ParseAPIPoll(apiCard APICard) Poll {
|
||||
card_url, err := url.Parse(apiCard.ShortenedUrl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
id := int_or_panic(card_url.Hostname())
|
||||
|
||||
ret := Poll{}
|
||||
ret.ID = PollID(id)
|
||||
ret.NumChoices = parse_num_choices(apiCard.Name)
|
||||
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
|
||||
ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
|
||||
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)
|
||||
ret.Choice2 = apiCard.BindingValues.Choice2.StringValue
|
||||
ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue)
|
||||
|
||||
if ret.NumChoices > 2 {
|
||||
ret.Choice3 = apiCard.BindingValues.Choice3.StringValue
|
||||
ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue)
|
||||
}
|
||||
if ret.NumChoices > 3 {
|
||||
ret.Choice4 = apiCard.BindingValues.Choice4.StringValue
|
||||
ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue)
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func parse_num_choices(card_name string) int {
|
||||
if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 {
|
||||
panic("Not valid card name: " + card_name)
|
||||
}
|
||||
|
||||
return int_or_panic(card_name[4:5])
|
||||
}
|
||||
|
||||
func ParseAPIVideo(apiVideo APIExtendedMedia) Video {
|
||||
variants := apiVideo.VideoInfo.Variants
|
||||
sort.Sort(variants)
|
||||
video_remote_url := variants[0].URL
|
||||
|
||||
var view_count int
|
||||
|
||||
r := apiVideo.Ext.MediaStats.R
|
||||
|
||||
switch r.(type) {
|
||||
case string:
|
||||
view_count = 0
|
||||
case map[string]interface{}:
|
||||
OK_entry, ok := r.(map[string]interface{})["ok"]
|
||||
if !ok {
|
||||
panic("No 'ok' value found in the R!")
|
||||
}
|
||||
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
|
||||
view_count = int_or_panic(view_count_str.(string))
|
||||
if !ok {
|
||||
panic("No 'viewCount' value found in the OK!")
|
||||
}
|
||||
}
|
||||
|
||||
video_parsed_url, err := url.Parse(video_remote_url)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
local_filename := get_prefixed_path(path.Base(video_parsed_url.Path))
|
||||
|
||||
return Video{
|
||||
ID: VideoID(apiVideo.ID),
|
||||
Width: apiVideo.OriginalInfo.Width,
|
||||
Height: apiVideo.OriginalInfo.Height,
|
||||
RemoteURL: video_remote_url,
|
||||
LocalFilename: local_filename,
|
||||
|
||||
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
|
||||
ThumbnailLocalPath: get_prefixed_path(path.Base(apiVideo.MediaURLHttps)),
|
||||
Duration: apiVideo.VideoInfo.Duration,
|
||||
ViewCount: view_count,
|
||||
|
||||
IsDownloaded: false,
|
||||
IsBlockedByDMCA: false,
|
||||
IsGeoblocked: apiVideo.ExtMediaAvailability.Reason == "Geoblocked",
|
||||
IsGif: apiVideo.Type == "animated_gif",
|
||||
}
|
||||
}
|
||||
|
||||
func ParseAPIUrlCard(apiCard APICard) Url {
|
||||
values := apiCard.BindingValues
|
||||
ret := Url{}
|
||||
ret.HasCard = true
|
||||
|
||||
ret.Domain = values.Domain.Value
|
||||
ret.Title = values.Title.Value
|
||||
ret.Description = values.Description.Value
|
||||
ret.IsContentDownloaded = false
|
||||
ret.CreatorID = UserID(values.Creator.UserValue.Value)
|
||||
ret.SiteID = UserID(values.Site.UserValue.Value)
|
||||
|
||||
var thumbnail_url string
|
||||
|
||||
if apiCard.Name == "summary_large_image" || apiCard.Name == "summary" {
|
||||
thumbnail_url = values.Thumbnail.ImageValue.Url
|
||||
} else if apiCard.Name == "player" {
|
||||
thumbnail_url = values.PlayerImage.ImageValue.Url
|
||||
} else if apiCard.Name == "unified_card" {
|
||||
// TODO: Grok chat previews
|
||||
log.Print("Grok chat card, not implemented yet-- skipping")
|
||||
} else {
|
||||
panic("Unknown card type: " + apiCard.Name)
|
||||
}
|
||||
|
||||
if thumbnail_url != "" {
|
||||
ret.HasThumbnail = true
|
||||
ret.ThumbnailRemoteUrl = thumbnail_url
|
||||
ret.ThumbnailLocalPath = get_thumbnail_local_path(thumbnail_url)
|
||||
ret.ThumbnailWidth = values.Thumbnail.ImageValue.Width
|
||||
ret.ThumbnailHeight = values.Thumbnail.ImageValue.Height
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func get_prefixed_path(p string) string {
|
||||
local_prefix_regex := regexp.MustCompile(`^[\w-]{2}`)
|
||||
local_prefix := local_prefix_regex.FindString(p)
|
||||
if len(local_prefix) != 2 {
|
||||
panic(fmt.Sprintf("Unable to extract a 2-letter prefix for filename %s", p))
|
||||
}
|
||||
return path.Join(local_prefix, p)
|
||||
}
|
||||
|
||||
func get_thumbnail_local_path(remote_url string) string {
|
||||
u, err := url.Parse(remote_url)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if u.RawQuery == "" {
|
||||
return path.Base(u.Path)
|
||||
}
|
||||
query_params, err := url.ParseQuery(u.RawQuery)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return get_prefixed_path(
|
||||
fmt.Sprintf("%s_%s.%s", path.Base(u.Path), query_params["name"][0], query_params["format"][0]),
|
||||
)
|
||||
}
|
||||
|
||||
type APITweet struct {
|
||||
ID int64 `json:"id_str,string"`
|
||||
ConversationID int64 `json:"conversation_id_str,string"`
|
||||
@ -184,6 +359,171 @@ type APITweet struct {
|
||||
IsExpandable bool
|
||||
}
|
||||
|
||||
func (t APITweet) ToTweetTrove() (TweetTrove, error) {
|
||||
ret := NewTweetTrove()
|
||||
if t.RetweetedStatusIDStr == "" {
|
||||
// Parse as a Tweet
|
||||
new_tweet, err := ParseSingleTweet(t)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
ret.Tweets[new_tweet.ID] = new_tweet
|
||||
for _, space := range new_tweet.Spaces {
|
||||
ret.Spaces[space.ID] = space
|
||||
}
|
||||
} else {
|
||||
// Parse as a Retweet
|
||||
new_retweet := Retweet{}
|
||||
var err error
|
||||
|
||||
t.NormalizeContent()
|
||||
|
||||
new_retweet.RetweetID = TweetID(t.ID)
|
||||
new_retweet.TweetID = TweetID(t.RetweetedStatusID)
|
||||
new_retweet.RetweetedByID = UserID(t.UserID)
|
||||
new_retweet.RetweetedAt, err = TimestampFromString(t.CreatedAt)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
ret.Retweets[new_retweet.RetweetID] = new_retweet
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
// Turn an APITweet, as returned from the scraper, into a properly structured Tweet object
|
||||
func ParseSingleTweet(t APITweet) (ret Tweet, err error) {
|
||||
t.NormalizeContent()
|
||||
|
||||
ret.ID = TweetID(t.ID)
|
||||
ret.UserID = UserID(t.UserID)
|
||||
ret.UserHandle = UserHandle(t.UserHandle)
|
||||
ret.Text = t.FullText
|
||||
ret.IsExpandable = t.IsExpandable
|
||||
|
||||
// Process "posted-at" date and time
|
||||
if t.TombstoneText == "" { // Skip time parsing for tombstones
|
||||
ret.PostedAt, err = TimestampFromString(t.CreatedAt)
|
||||
if err != nil {
|
||||
if ret.ID == 0 {
|
||||
return Tweet{}, fmt.Errorf("unable to parse tweet: %w", ERR_NO_TWEET)
|
||||
}
|
||||
return Tweet{}, fmt.Errorf("Error parsing time on tweet ID %d:\n %w", ret.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
ret.NumLikes = t.FavoriteCount
|
||||
ret.NumRetweets = t.RetweetCount
|
||||
ret.NumReplies = t.ReplyCount
|
||||
ret.NumQuoteTweets = t.QuoteCount
|
||||
ret.InReplyToID = TweetID(t.InReplyToStatusID)
|
||||
ret.QuotedTweetID = TweetID(t.QuotedStatusID)
|
||||
|
||||
// Process URLs and link previews
|
||||
for _, url := range t.Entities.URLs {
|
||||
var url_object Url
|
||||
if t.Card.ShortenedUrl == url.ShortenedUrl {
|
||||
if t.Card.Name == "3691233323:audiospace" {
|
||||
// This "url" is just a link to a Space. Don't process it as a Url
|
||||
continue
|
||||
}
|
||||
url_object = ParseAPIUrlCard(t.Card)
|
||||
}
|
||||
url_object.Text = url.ExpandedURL
|
||||
url_object.ShortText = url.ShortenedUrl
|
||||
url_object.TweetID = ret.ID
|
||||
|
||||
// Skip it if it's just the quoted tweet
|
||||
_, id, is_ok := TryParseTweetUrl(url.ExpandedURL)
|
||||
if is_ok && id == ret.QuotedTweetID {
|
||||
continue
|
||||
}
|
||||
|
||||
ret.Urls = append(ret.Urls, url_object)
|
||||
}
|
||||
|
||||
// Process images
|
||||
for _, media := range t.Entities.Media {
|
||||
if media.Type != "photo" {
|
||||
// Videos now have an entry in "Entities.Media" but they can be ignored; the useful bit is in ExtendedEntities
|
||||
// So skip ones that aren't "photo"
|
||||
continue
|
||||
}
|
||||
new_image := ParseAPIMedia(media)
|
||||
new_image.TweetID = ret.ID
|
||||
ret.Images = append(ret.Images, new_image)
|
||||
}
|
||||
|
||||
// Process hashtags
|
||||
for _, hashtag := range t.Entities.Hashtags {
|
||||
ret.Hashtags = append(ret.Hashtags, hashtag.Text)
|
||||
}
|
||||
|
||||
// Process `@` mentions and reply-mentions
|
||||
for _, mention := range t.Entities.Mentions {
|
||||
ret.Mentions = append(ret.Mentions, mention.UserName)
|
||||
}
|
||||
for _, mention := range strings.Split(t.Entities.ReplyMentions, " ") {
|
||||
if mention != "" {
|
||||
if mention[0] != '@' {
|
||||
panic(fmt.Errorf("Unknown ReplyMention value %q:\n %w", t.Entities.ReplyMentions, EXTERNAL_API_ERROR))
|
||||
}
|
||||
ret.ReplyMentions = append(ret.ReplyMentions, mention[1:])
|
||||
}
|
||||
}
|
||||
|
||||
// Process videos
|
||||
for _, entity := range t.ExtendedEntities.Media {
|
||||
if entity.Type != "video" && entity.Type != "animated_gif" {
|
||||
continue
|
||||
}
|
||||
|
||||
new_video := ParseAPIVideo(entity)
|
||||
new_video.TweetID = ret.ID
|
||||
ret.Videos = append(ret.Videos, new_video)
|
||||
|
||||
// Remove the thumbnail from the Images list
|
||||
updated_imgs := []Image{}
|
||||
for _, img := range ret.Images {
|
||||
if VideoID(img.ID) != new_video.ID {
|
||||
updated_imgs = append(updated_imgs, img)
|
||||
}
|
||||
}
|
||||
ret.Images = updated_imgs
|
||||
}
|
||||
|
||||
// Process polls
|
||||
if strings.Index(t.Card.Name, "poll") == 0 {
|
||||
poll := ParseAPIPoll(t.Card)
|
||||
poll.TweetID = ret.ID
|
||||
ret.Polls = []Poll{poll}
|
||||
}
|
||||
|
||||
// Process spaces
|
||||
if t.Card.Name == "3691233323:audiospace" {
|
||||
space := Space{}
|
||||
space.ID = SpaceID(t.Card.BindingValues.ID.StringValue)
|
||||
space.ShortUrl = t.Card.ShortenedUrl
|
||||
|
||||
// Indicate that this Space needs its details fetched still
|
||||
space.IsDetailsFetched = false
|
||||
|
||||
ret.Spaces = []Space{space}
|
||||
ret.SpaceID = space.ID
|
||||
}
|
||||
|
||||
// Process tombstones and other metadata
|
||||
ret.TombstoneType = t.TombstoneText
|
||||
ret.IsStub = !(ret.TombstoneType == "")
|
||||
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
|
||||
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
|
||||
|
||||
// Extra data that can help piece together tombstoned tweet info
|
||||
ret.in_reply_to_user_id = UserID(t.InReplyToUserID)
|
||||
ret.in_reply_to_user_handle = UserHandle(t.InReplyToScreenName)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (t *APITweet) NormalizeContent() {
|
||||
id, err := strconv.Atoi(t.QuotedStatusIDStr)
|
||||
if err == nil {
|
||||
@ -260,6 +600,54 @@ type APIUser struct {
|
||||
DoesntExist bool
|
||||
}
|
||||
|
||||
// Turn an APIUser, as returned from the scraper, into a properly structured User object
|
||||
func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
||||
if apiUser.DoesntExist {
|
||||
// User may have been deleted, or there was a typo. There's no data to parse
|
||||
if apiUser.ScreenName == "" {
|
||||
panic("ScreenName is empty!")
|
||||
}
|
||||
ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName))
|
||||
return
|
||||
}
|
||||
ret.ID = UserID(apiUser.ID)
|
||||
ret.Handle = UserHandle(apiUser.ScreenName)
|
||||
if apiUser.IsBanned {
|
||||
// Banned users won't have any further info, so just return here
|
||||
ret.IsBanned = true
|
||||
return
|
||||
}
|
||||
ret.DisplayName = apiUser.Name
|
||||
ret.Bio = apiUser.Description
|
||||
ret.FollowingCount = apiUser.FriendsCount
|
||||
ret.FollowersCount = apiUser.FollowersCount
|
||||
ret.Location = apiUser.Location
|
||||
if len(apiUser.Entities.URL.Urls) > 0 {
|
||||
ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL
|
||||
}
|
||||
ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Error parsing time on user ID %d: %w", ret.ID, err)
|
||||
return
|
||||
}
|
||||
ret.IsPrivate = apiUser.Protected
|
||||
ret.IsVerified = apiUser.Verified
|
||||
ret.ProfileImageUrl = apiUser.ProfileImageURLHTTPS
|
||||
|
||||
if regexp.MustCompile(`_normal\.\w{2,4}`).MatchString(ret.ProfileImageUrl) {
|
||||
ret.ProfileImageUrl = strings.ReplaceAll(ret.ProfileImageUrl, "_normal.", ".")
|
||||
}
|
||||
ret.BannerImageUrl = apiUser.ProfileBannerURL
|
||||
|
||||
ret.ProfileImageLocalPath = ret.compute_profile_image_local_path()
|
||||
ret.BannerImageLocalPath = ret.compute_banner_image_local_path()
|
||||
|
||||
if len(apiUser.PinnedTweetIdsStr) > 0 {
|
||||
ret.PinnedTweetID = TweetID(idstr_to_int(apiUser.PinnedTweetIdsStr[0]))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type APINotification struct {
|
||||
ID string `json:"id"`
|
||||
TimestampMs int64 `json:"timestampMs,string"`
|
||||
@ -565,22 +953,11 @@ func (t *TweetResponse) ToTweetTrove() (TweetTrove, error) {
|
||||
ret := NewTweetTrove()
|
||||
|
||||
for _, single_tweet := range t.GlobalObjects.Tweets {
|
||||
if single_tweet.RetweetedStatusIDStr == "" {
|
||||
new_tweet, err := ParseSingleTweet(single_tweet)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
ret.Tweets[new_tweet.ID] = new_tweet
|
||||
for _, space := range new_tweet.Spaces {
|
||||
ret.Spaces[space.ID] = space
|
||||
}
|
||||
} else {
|
||||
new_retweet, err := ParseSingleRetweet(single_tweet)
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
ret.Retweets[new_retweet.RetweetID] = new_retweet
|
||||
trove, err := single_tweet.ToTweetTrove()
|
||||
if err != nil {
|
||||
return ret, err
|
||||
}
|
||||
ret.MergeWith(trove)
|
||||
}
|
||||
|
||||
for _, user := range t.GlobalObjects.Users {
|
||||
@ -597,10 +974,14 @@ func (t *TweetResponse) ToTweetTrove() (TweetTrove, error) {
|
||||
return ret, nil
|
||||
}
|
||||
|
||||
func idstr_to_int(idstr string) int64 {
|
||||
id, err := strconv.Atoi(idstr)
|
||||
func idstr_to_int(s string) int64 {
|
||||
return int64(int_or_panic(s))
|
||||
}
|
||||
|
||||
func int_or_panic(s string) int {
|
||||
result, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return int64(id)
|
||||
return result
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ import (
|
||||
"fmt"
|
||||
"html"
|
||||
"net/url"
|
||||
"path"
|
||||
"strings"
|
||||
|
||||
"github.com/google/uuid"
|
||||
@ -77,38 +78,106 @@ func (m *APIDMMessage) NormalizeContent() {
|
||||
m.MessageData.Text = strings.TrimSpace(m.MessageData.Text)
|
||||
}
|
||||
|
||||
func (m APIDMMessage) ToTweetTrove() TweetTrove {
|
||||
func (api_msg APIDMMessage) ToTweetTrove() TweetTrove {
|
||||
ret := NewTweetTrove()
|
||||
if m.ID == 0 {
|
||||
if api_msg.ID == 0 {
|
||||
return ret
|
||||
}
|
||||
|
||||
m.NormalizeContent()
|
||||
result := ParseAPIDMMessage(m)
|
||||
api_msg.NormalizeContent()
|
||||
|
||||
msg := DMMessage{}
|
||||
msg.ID = DMMessageID(api_msg.ID)
|
||||
msg.SentAt = TimestampFromUnixMilli(int64(api_msg.Time))
|
||||
msg.DMChatRoomID = DMChatRoomID(api_msg.ConversationID)
|
||||
msg.SenderID = UserID(api_msg.MessageData.SenderID)
|
||||
msg.Text = api_msg.MessageData.Text
|
||||
|
||||
msg.InReplyToID = DMMessageID(api_msg.MessageData.ReplyData.ID) // Will be "0" if not a reply
|
||||
|
||||
msg.Reactions = make(map[UserID]DMReaction)
|
||||
for _, api_reacc := range api_msg.MessageReactions {
|
||||
reacc := DMReaction{}
|
||||
reacc.ID = DMMessageID(api_reacc.ID)
|
||||
reacc.SenderID = UserID(api_reacc.SenderID)
|
||||
reacc.SentAt = TimestampFromUnixMilli(int64(api_reacc.Time))
|
||||
reacc.Emoji = api_reacc.Emoji
|
||||
reacc.DMMessageID = msg.ID
|
||||
msg.Reactions[reacc.SenderID] = reacc
|
||||
}
|
||||
if api_msg.MessageData.Attachment.Photo.ID != 0 {
|
||||
new_image := ParseAPIMedia(api_msg.MessageData.Attachment.Photo)
|
||||
new_image.DMMessageID = msg.ID
|
||||
msg.Images = []Image{new_image}
|
||||
}
|
||||
if api_msg.MessageData.Attachment.Video.ID != 0 {
|
||||
entity := api_msg.MessageData.Attachment.Video
|
||||
if entity.Type == "video" || entity.Type == "animated_gif" {
|
||||
new_video := ParseAPIVideo(entity)
|
||||
new_video.DMMessageID = msg.ID
|
||||
msg.Videos = append(msg.Videos, new_video)
|
||||
}
|
||||
}
|
||||
|
||||
// Process URLs and link previews
|
||||
for _, url := range api_msg.MessageData.Entities.URLs {
|
||||
// Skip it if it's an embedded tweet
|
||||
_, id, is_ok := TryParseTweetUrl(url.ExpandedURL)
|
||||
if is_ok && id == TweetID(api_msg.MessageData.Attachment.Tweet.Status.ID) {
|
||||
continue
|
||||
}
|
||||
// Skip it if it's an embedded image
|
||||
if api_msg.MessageData.Attachment.Photo.URL == url.ShortenedUrl {
|
||||
continue
|
||||
}
|
||||
// Skip it if it's an embedded video
|
||||
if api_msg.MessageData.Attachment.Video.URL == url.ShortenedUrl {
|
||||
continue
|
||||
}
|
||||
|
||||
var new_url Url
|
||||
if api_msg.MessageData.Attachment.Card.ShortenedUrl == url.ShortenedUrl {
|
||||
if api_msg.MessageData.Attachment.Card.Name == "3691233323:audiospace" {
|
||||
// This "url" is just a link to a Space. Don't process it as a Url
|
||||
// TODO: ...but do process it as a Space?
|
||||
continue
|
||||
}
|
||||
new_url = ParseAPIUrlCard(api_msg.MessageData.Attachment.Card)
|
||||
}
|
||||
new_url.Text = url.ExpandedURL
|
||||
new_url.ShortText = url.ShortenedUrl
|
||||
new_url.DMMessageID = msg.ID
|
||||
msg.Urls = append(msg.Urls, new_url)
|
||||
}
|
||||
|
||||
// Parse tweet attachment
|
||||
if m.MessageData.Attachment.Tweet.Status.ID != 0 {
|
||||
u, err := ParseSingleUser(m.MessageData.Attachment.Tweet.Status.User)
|
||||
if api_msg.MessageData.Attachment.Tweet.Status.ID != 0 {
|
||||
u, err := ParseSingleUser(api_msg.MessageData.Attachment.Tweet.Status.User)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
ret.Users[u.ID] = u
|
||||
|
||||
t, err := ParseSingleTweet(m.MessageData.Attachment.Tweet.Status.APITweet)
|
||||
t, err := ParseSingleTweet(api_msg.MessageData.Attachment.Tweet.Status.APITweet)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
t.UserID = u.ID
|
||||
ret.Tweets[t.ID] = t
|
||||
result.EmbeddedTweetID = t.ID
|
||||
msg.EmbeddedTweetID = t.ID
|
||||
}
|
||||
ret.Messages[result.ID] = result
|
||||
|
||||
// TODO: parse attached images and videos
|
||||
ret.Messages[msg.ID] = msg
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
type APIDMResponse struct {
|
||||
InboxInitialState APIInbox `json:"inbox_initial_state"`
|
||||
InboxTimeline APIInbox `json:"inbox_timeline"`
|
||||
ConversationTimeline APIInbox `json:"conversation_timeline"`
|
||||
UserEvents APIInbox `json:"user_events"`
|
||||
}
|
||||
|
||||
type APIDMConversation struct {
|
||||
ConversationID string `json:"conversation_id"`
|
||||
Type string `json:"type"`
|
||||
@ -179,13 +248,6 @@ type APIInbox struct {
|
||||
Conversations map[string]APIDMConversation `json:"conversations"`
|
||||
}
|
||||
|
||||
type APIDMResponse struct {
|
||||
InboxInitialState APIInbox `json:"inbox_initial_state"`
|
||||
InboxTimeline APIInbox `json:"inbox_timeline"`
|
||||
ConversationTimeline APIInbox `json:"conversation_timeline"`
|
||||
UserEvents APIInbox `json:"user_events"`
|
||||
}
|
||||
|
||||
func (r APIInbox) ToTweetTrove(current_user_id UserID) TweetTrove {
|
||||
ret := NewTweetTrove()
|
||||
|
||||
@ -211,8 +273,8 @@ func (r APIInbox) ToTweetTrove(current_user_id UserID) TweetTrove {
|
||||
|
||||
ret.MergeWith(entry.Message.ToTweetTrove())
|
||||
}
|
||||
for _, room := range r.Conversations {
|
||||
result := ParseAPIDMChatRoom(room, current_user_id)
|
||||
for _, api_room := range r.Conversations {
|
||||
result := ParseAPIDMChatRoom(api_room, current_user_id)
|
||||
ret.Rooms[result.ID] = result
|
||||
}
|
||||
for _, u := range r.Users {
|
||||
@ -225,6 +287,46 @@ func (r APIInbox) ToTweetTrove(current_user_id UserID) TweetTrove {
|
||||
return ret
|
||||
}
|
||||
|
||||
func ParseAPIDMChatRoom(api_room APIDMConversation, current_user_id UserID) DMChatRoom {
|
||||
result := DMChatRoom{}
|
||||
result.ID = DMChatRoomID(api_room.ConversationID)
|
||||
result.Type = api_room.Type
|
||||
result.LastMessagedAt = TimestampFromUnixMilli(int64(api_room.SortTimestamp))
|
||||
result.IsNSFW = api_room.NSFW
|
||||
|
||||
if result.Type == "GROUP_DM" {
|
||||
result.CreatedAt = TimestampFromUnixMilli(int64(api_room.CreateTime))
|
||||
result.CreatedByUserID = UserID(api_room.CreatedByUserID)
|
||||
result.Name = api_room.Name
|
||||
result.AvatarImageRemoteURL = api_room.AvatarImage
|
||||
tmp_url, err := url.Parse(result.AvatarImageRemoteURL)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
result.AvatarImageLocalPath = fmt.Sprintf("%s_avatar_%s.%s", result.ID, path.Base(tmp_url.Path), tmp_url.Query().Get("format"))
|
||||
}
|
||||
|
||||
result.Participants = make(map[UserID]DMChatParticipant)
|
||||
for _, api_participant := range api_room.Participants {
|
||||
participant := DMChatParticipant{}
|
||||
participant.UserID = UserID(api_participant.UserID)
|
||||
participant.DMChatRoomID = result.ID
|
||||
participant.LastReadEventID = DMMessageID(api_participant.LastReadEventID)
|
||||
|
||||
// Process chat settings if this is the logged-in user
|
||||
if participant.UserID == current_user_id {
|
||||
participant.IsNotificationsDisabled = api_room.NotificationsDisabled
|
||||
participant.IsReadOnly = api_room.ReadOnly
|
||||
participant.IsTrusted = api_room.Trusted
|
||||
participant.IsMuted = api_room.Muted
|
||||
participant.Status = api_room.Status
|
||||
participant.IsChatSettingsValid = true
|
||||
}
|
||||
result.Participants[participant.UserID] = participant
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
func (api *API) GetDMInbox() (APIInbox, error) {
|
||||
url, err := url.Parse("https://twitter.com/i/api/1.1/dm/inbox_initial_state.json")
|
||||
if err != nil {
|
||||
@ -284,6 +386,30 @@ func (api *API) GetDMInbox() (APIInbox, error) {
|
||||
result.InboxInitialState.Status = result.InboxInitialState.InboxTimelines.Trusted.Status
|
||||
return result.InboxInitialState, err
|
||||
}
|
||||
func (api *API) GetInbox(how_many int) (TweetTrove, string, error) {
|
||||
if !api.IsAuthenticated {
|
||||
return TweetTrove{}, "", ErrLoginRequired
|
||||
}
|
||||
dm_response, err := api.GetDMInbox()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
trove := dm_response.ToTweetTrove(api.UserID)
|
||||
cursor := dm_response.Cursor
|
||||
next_cursor_id := dm_response.InboxTimelines.Trusted.MinEntryID
|
||||
for len(trove.Rooms) < how_many && dm_response.Status != "AT_END" {
|
||||
dm_response, err = api.GetInboxTrusted(next_cursor_id)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
next_trove := dm_response.ToTweetTrove(api.UserID)
|
||||
next_cursor_id = dm_response.MinEntryID
|
||||
trove.MergeWith(next_trove)
|
||||
}
|
||||
|
||||
return trove, cursor, nil
|
||||
}
|
||||
|
||||
func (api *API) GetInboxTrusted(oldest_id int) (APIInbox, error) {
|
||||
url, err := url.Parse("https://twitter.com/i/api/1.1/dm/inbox_timeline/trusted.json")
|
||||
@ -345,62 +471,87 @@ func (api *API) GetInboxTrusted(oldest_id int) (APIInbox, error) {
|
||||
return result.InboxTimeline, err
|
||||
}
|
||||
|
||||
func (api *API) GetDMConversation(id DMChatRoomID, max_id DMMessageID) (APIInbox, error) {
|
||||
url, err := url.Parse("https://twitter.com/i/api/1.1/dm/conversation/" + string(id) + ".json")
|
||||
func (api *API) GetConversation(room_id DMChatRoomID, max_id DMMessageID, how_many int) (TweetTrove, error) {
|
||||
if !api.IsAuthenticated {
|
||||
return TweetTrove{}, ErrLoginRequired
|
||||
}
|
||||
|
||||
fetch := func(max_id DMMessageID) (APIInbox, error) {
|
||||
url, err := url.Parse("https://twitter.com/i/api/1.1/dm/conversation/" + string(room_id) + ".json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
query := url.Query()
|
||||
query.Add("max_id", fmt.Sprint(max_id))
|
||||
query.Add("context", "FETCH_DM_CONVERSATION_HISTORY")
|
||||
query.Add("include_profile_interstitial_type", "1")
|
||||
query.Add("include_blocking", "1")
|
||||
query.Add("include_blocked_by", "1")
|
||||
query.Add("include_followed_by", "1")
|
||||
query.Add("include_want_retweets", "1")
|
||||
query.Add("include_mute_edge", "1")
|
||||
query.Add("include_can_dm", "1")
|
||||
query.Add("include_can_media_tag", "1")
|
||||
query.Add("include_ext_has_nft_avatar", "1")
|
||||
query.Add("include_ext_is_blue_verified", "1")
|
||||
query.Add("include_ext_verified_type", "1")
|
||||
query.Add("include_ext_profile_image_shape", "1")
|
||||
query.Add("skip_status", "1")
|
||||
query.Add("dm_secret_conversations_enabled", "false")
|
||||
query.Add("krs_registration_enabled", "true")
|
||||
query.Add("cards_platform", "Web-12")
|
||||
query.Add("include_cards", "1")
|
||||
query.Add("include_ext_alt_text", "true")
|
||||
query.Add("include_ext_limited_action_results", "true")
|
||||
query.Add("include_quote_count", "true")
|
||||
query.Add("include_reply_count", "1")
|
||||
query.Add("tweet_mode", "extended")
|
||||
query.Add("include_ext_views", "true")
|
||||
query.Add("dm_users", "false")
|
||||
query.Add("include_groups", "true")
|
||||
query.Add("include_inbox_timelines", "true")
|
||||
query.Add("include_ext_media_color", "true")
|
||||
query.Add("supports_reactions", "true")
|
||||
query.Add("include_conversation_info", "true")
|
||||
query.Add("ext", strings.Join([]string{
|
||||
"mediaColor",
|
||||
"altText",
|
||||
"mediaStats",
|
||||
"highlightedLabel",
|
||||
"hasNftAvatar",
|
||||
"voiceInfo",
|
||||
"birdwatchPivot",
|
||||
"enrichments",
|
||||
"superFollowMetadata",
|
||||
"unmentionInfo",
|
||||
"editControl",
|
||||
"vibe",
|
||||
}, ","))
|
||||
url.RawQuery = query.Encode()
|
||||
|
||||
var result APIDMResponse
|
||||
err = api.do_http(url.String(), "", &result)
|
||||
return result.ConversationTimeline, err
|
||||
}
|
||||
|
||||
dm_response, err := fetch(max_id)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
query := url.Query()
|
||||
query.Add("max_id", fmt.Sprint(max_id))
|
||||
query.Add("context", "FETCH_DM_CONVERSATION_HISTORY")
|
||||
query.Add("include_profile_interstitial_type", "1")
|
||||
query.Add("include_blocking", "1")
|
||||
query.Add("include_blocked_by", "1")
|
||||
query.Add("include_followed_by", "1")
|
||||
query.Add("include_want_retweets", "1")
|
||||
query.Add("include_mute_edge", "1")
|
||||
query.Add("include_can_dm", "1")
|
||||
query.Add("include_can_media_tag", "1")
|
||||
query.Add("include_ext_has_nft_avatar", "1")
|
||||
query.Add("include_ext_is_blue_verified", "1")
|
||||
query.Add("include_ext_verified_type", "1")
|
||||
query.Add("include_ext_profile_image_shape", "1")
|
||||
query.Add("skip_status", "1")
|
||||
query.Add("dm_secret_conversations_enabled", "false")
|
||||
query.Add("krs_registration_enabled", "true")
|
||||
query.Add("cards_platform", "Web-12")
|
||||
query.Add("include_cards", "1")
|
||||
query.Add("include_ext_alt_text", "true")
|
||||
query.Add("include_ext_limited_action_results", "true")
|
||||
query.Add("include_quote_count", "true")
|
||||
query.Add("include_reply_count", "1")
|
||||
query.Add("tweet_mode", "extended")
|
||||
query.Add("include_ext_views", "true")
|
||||
query.Add("dm_users", "false")
|
||||
query.Add("include_groups", "true")
|
||||
query.Add("include_inbox_timelines", "true")
|
||||
query.Add("include_ext_media_color", "true")
|
||||
query.Add("supports_reactions", "true")
|
||||
query.Add("include_conversation_info", "true")
|
||||
query.Add("ext", strings.Join([]string{
|
||||
"mediaColor",
|
||||
"altText",
|
||||
"mediaStats",
|
||||
"highlightedLabel",
|
||||
"hasNftAvatar",
|
||||
"voiceInfo",
|
||||
"birdwatchPivot",
|
||||
"enrichments",
|
||||
"superFollowMetadata",
|
||||
"unmentionInfo",
|
||||
"editControl",
|
||||
"vibe",
|
||||
}, ","))
|
||||
url.RawQuery = query.Encode()
|
||||
|
||||
var result APIDMResponse
|
||||
err = api.do_http(url.String(), "", &result)
|
||||
return result.ConversationTimeline, err
|
||||
trove := dm_response.ToTweetTrove(api.UserID)
|
||||
oldest := trove.GetOldestMessage(room_id)
|
||||
for len(trove.Messages) < how_many && dm_response.Status != "AT_END" {
|
||||
dm_response, err = fetch(oldest)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
next_trove := dm_response.ToTweetTrove(api.UserID)
|
||||
oldest = next_trove.GetOldestMessage(room_id)
|
||||
trove.MergeWith(next_trove)
|
||||
}
|
||||
|
||||
return trove, nil
|
||||
}
|
||||
|
||||
// Returns a TweetTrove and the cursor for the next update, or an error
|
||||
@ -459,6 +610,9 @@ func (api *API) PollInboxUpdates(cursor string) (TweetTrove, string, error) {
|
||||
return result.UserEvents.ToTweetTrove(api.UserID), result.UserEvents.Cursor, nil
|
||||
}
|
||||
|
||||
// Writes
|
||||
// ------
|
||||
|
||||
func (api *API) SendDMMessage(room_id DMChatRoomID, text string, in_reply_to_id DMMessageID) (TweetTrove, error) {
|
||||
if !api.IsAuthenticated {
|
||||
return TweetTrove{}, ErrLoginRequired
|
||||
|
@ -21,7 +21,9 @@ func TestParseAPIDMMessage(t *testing.T) {
|
||||
err = json.Unmarshal(data, &api_message)
|
||||
require.NoError(t, err)
|
||||
|
||||
message := ParseAPIDMMessage(api_message)
|
||||
trove := api_message.ToTweetTrove()
|
||||
message, is_ok := trove.Messages[DMMessageID(api_message.ID)]
|
||||
require.True(t, is_ok)
|
||||
assert.Equal(message.ID, DMMessageID(1663623203644751885))
|
||||
assert.Equal(message.SentAt, TimestampFromUnixMilli(1685473655064))
|
||||
assert.Equal(message.DMChatRoomID, DMChatRoomID("1458284524761075714-1488963321701171204"))
|
||||
@ -41,7 +43,9 @@ func TestParseAPIDMMessageWithReaction(t *testing.T) {
|
||||
err = json.Unmarshal(data, &api_message)
|
||||
require.NoError(t, err)
|
||||
|
||||
message := ParseAPIDMMessage(api_message)
|
||||
trove := api_message.ToTweetTrove()
|
||||
message, is_ok := trove.Messages[DMMessageID(api_message.ID)]
|
||||
require.True(t, is_ok)
|
||||
assert.Equal(message.ID, DMMessageID(1663623062195957773))
|
||||
require.Len(t, message.Reactions, 1)
|
||||
|
||||
|
@ -1390,7 +1390,19 @@ func (api API) GetUser(handle UserHandle) (User, error) {
|
||||
return ParseSingleUser(apiUser)
|
||||
}
|
||||
|
||||
// Calls API#GetUserByID and returns the parsed result
|
||||
func GetUserByID(u_id UserID) (User, error) {
|
||||
session, err := NewGuestSession() // This endpoint works better if you're not logged in
|
||||
if err != nil {
|
||||
return User{}, err
|
||||
}
|
||||
return session.GetUserByID(u_id)
|
||||
}
|
||||
|
||||
func (api API) GetUserByID(u_id UserID) (User, error) {
|
||||
if u_id == UserID(0) {
|
||||
panic("No Users with ID 0")
|
||||
}
|
||||
url, err := url.Parse(GraphqlURL{
|
||||
BaseUrl: "https://x.com/i/api/graphql/Qw77dDjp9xCpUY-AXwt-yQ/UserByRestId",
|
||||
Variables: GraphqlVariables{
|
||||
|
@ -1,6 +0,0 @@
|
||||
package scraper
|
||||
|
||||
// Tokens
|
||||
// ------
|
||||
|
||||
const BEARER_TOKEN string = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
@ -1,18 +0,0 @@
|
||||
package scraper
|
||||
|
||||
type ConversationID string
|
||||
|
||||
type Conversation struct {
|
||||
ID ConversationID
|
||||
Type string
|
||||
SortEventID int
|
||||
SortTimestamp int
|
||||
Participants []User
|
||||
Nsfw bool
|
||||
NotificationsDisabled bool
|
||||
LastReadEventId int
|
||||
ReadOnly bool
|
||||
Trusted bool
|
||||
LowQuality bool
|
||||
Muted bool
|
||||
}
|
@ -1,11 +1,5 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"net/url"
|
||||
"path"
|
||||
)
|
||||
|
||||
type DMChatRoomID string
|
||||
|
||||
// A participant in a chat room.
|
||||
@ -45,6 +39,8 @@ type DMChatRoom struct {
|
||||
Participants map[UserID]DMChatParticipant
|
||||
}
|
||||
|
||||
// TODO: view-layer
|
||||
// - view helpers should go in a view layer
|
||||
func (r DMChatRoom) GetParticipantIDs() []UserID {
|
||||
ret := []UserID{}
|
||||
for user_id := range r.Participants {
|
||||
@ -52,43 +48,3 @@ func (r DMChatRoom) GetParticipantIDs() []UserID {
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func ParseAPIDMChatRoom(api_room APIDMConversation, current_user_id UserID) DMChatRoom {
|
||||
ret := DMChatRoom{}
|
||||
ret.ID = DMChatRoomID(api_room.ConversationID)
|
||||
ret.Type = api_room.Type
|
||||
ret.LastMessagedAt = TimestampFromUnixMilli(int64(api_room.SortTimestamp))
|
||||
ret.IsNSFW = api_room.NSFW
|
||||
|
||||
if ret.Type == "GROUP_DM" {
|
||||
ret.CreatedAt = TimestampFromUnixMilli(int64(api_room.CreateTime))
|
||||
ret.CreatedByUserID = UserID(api_room.CreatedByUserID)
|
||||
ret.Name = api_room.Name
|
||||
ret.AvatarImageRemoteURL = api_room.AvatarImage
|
||||
tmp_url, err := url.Parse(ret.AvatarImageRemoteURL)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
ret.AvatarImageLocalPath = fmt.Sprintf("%s_avatar_%s.%s", ret.ID, path.Base(tmp_url.Path), tmp_url.Query().Get("format"))
|
||||
}
|
||||
|
||||
ret.Participants = make(map[UserID]DMChatParticipant)
|
||||
for _, api_participant := range api_room.Participants {
|
||||
participant := DMChatParticipant{}
|
||||
participant.UserID = UserID(api_participant.UserID)
|
||||
participant.DMChatRoomID = ret.ID
|
||||
participant.LastReadEventID = DMMessageID(api_participant.LastReadEventID)
|
||||
|
||||
// Process chat settings if this is the logged-in user
|
||||
if participant.UserID == current_user_id {
|
||||
participant.IsNotificationsDisabled = api_room.NotificationsDisabled
|
||||
participant.IsReadOnly = api_room.ReadOnly
|
||||
participant.IsTrusted = api_room.Trusted
|
||||
participant.IsMuted = api_room.Muted
|
||||
participant.Status = api_room.Status
|
||||
participant.IsChatSettingsValid = true
|
||||
}
|
||||
ret.Participants[participant.UserID] = participant
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
@ -10,15 +10,6 @@ type DMReaction struct {
|
||||
Emoji string `db:"emoji"`
|
||||
}
|
||||
|
||||
func ParseAPIDMReaction(reacc APIDMReaction) DMReaction {
|
||||
ret := DMReaction{}
|
||||
ret.ID = DMMessageID(reacc.ID)
|
||||
ret.SenderID = UserID(reacc.SenderID)
|
||||
ret.SentAt = TimestampFromUnixMilli(int64(reacc.Time))
|
||||
ret.Emoji = reacc.Emoji
|
||||
return ret
|
||||
}
|
||||
|
||||
type DMMessage struct {
|
||||
ID DMMessageID `db:"id"`
|
||||
DMChatRoomID DMChatRoomID `db:"chat_room_id"`
|
||||
@ -33,67 +24,6 @@ type DMMessage struct {
|
||||
Images []Image
|
||||
Videos []Video
|
||||
Urls []Url
|
||||
}
|
||||
|
||||
func ParseAPIDMMessage(message APIDMMessage) DMMessage {
|
||||
ret := DMMessage{}
|
||||
ret.ID = DMMessageID(message.ID)
|
||||
ret.SentAt = TimestampFromUnixMilli(int64(message.Time))
|
||||
ret.DMChatRoomID = DMChatRoomID(message.ConversationID)
|
||||
ret.SenderID = UserID(message.MessageData.SenderID)
|
||||
ret.Text = message.MessageData.Text
|
||||
|
||||
ret.InReplyToID = DMMessageID(message.MessageData.ReplyData.ID) // Will be "0" if not a reply
|
||||
|
||||
ret.Reactions = make(map[UserID]DMReaction)
|
||||
for _, api_reacc := range message.MessageReactions {
|
||||
reacc := ParseAPIDMReaction(api_reacc)
|
||||
reacc.DMMessageID = ret.ID
|
||||
ret.Reactions[reacc.SenderID] = reacc
|
||||
}
|
||||
if message.MessageData.Attachment.Photo.ID != 0 {
|
||||
new_image := ParseAPIMedia(message.MessageData.Attachment.Photo)
|
||||
new_image.DMMessageID = ret.ID
|
||||
ret.Images = []Image{new_image}
|
||||
}
|
||||
if message.MessageData.Attachment.Video.ID != 0 {
|
||||
entity := message.MessageData.Attachment.Video
|
||||
if entity.Type == "video" || entity.Type == "animated_gif" {
|
||||
new_video := ParseAPIVideo(entity)
|
||||
new_video.DMMessageID = ret.ID
|
||||
ret.Videos = append(ret.Videos, new_video)
|
||||
}
|
||||
}
|
||||
|
||||
// Process URLs and link previews
|
||||
for _, url := range message.MessageData.Entities.URLs {
|
||||
// Skip it if it's an embedded tweet
|
||||
_, id, is_ok := TryParseTweetUrl(url.ExpandedURL)
|
||||
if is_ok && id == TweetID(message.MessageData.Attachment.Tweet.Status.ID) {
|
||||
continue
|
||||
}
|
||||
// Skip it if it's an embedded image
|
||||
if message.MessageData.Attachment.Photo.URL == url.ShortenedUrl {
|
||||
continue
|
||||
}
|
||||
// Skip it if it's an embedded video
|
||||
if message.MessageData.Attachment.Video.URL == url.ShortenedUrl {
|
||||
continue
|
||||
}
|
||||
|
||||
var new_url Url
|
||||
if message.MessageData.Attachment.Card.ShortenedUrl == url.ShortenedUrl {
|
||||
if message.MessageData.Attachment.Card.Name == "3691233323:audiospace" {
|
||||
// This "url" is just a link to a Space. Don't process it as a Url
|
||||
continue
|
||||
}
|
||||
new_url = ParseAPIUrlCard(message.MessageData.Attachment.Card)
|
||||
}
|
||||
new_url.Text = url.ExpandedURL
|
||||
new_url.ShortText = url.ShortenedUrl
|
||||
new_url.DMMessageID = ret.ID
|
||||
ret.Urls = append(ret.Urls, new_url)
|
||||
}
|
||||
|
||||
return ret
|
||||
|
||||
LastReadEventUserIDs []UserID // Used for rendering
|
||||
}
|
||||
|
@ -1,63 +0,0 @@
|
||||
package scraper
|
||||
|
||||
func (t TweetTrove) GetOldestMessage(id DMChatRoomID) DMMessageID {
|
||||
oldest := DMMessageID(^uint(0) >> 1) // Max integer
|
||||
for _, m := range t.Messages {
|
||||
if m.ID < oldest && m.DMChatRoomID == id {
|
||||
oldest = m.ID
|
||||
}
|
||||
}
|
||||
return oldest
|
||||
}
|
||||
|
||||
// TODO: Why are these all here? =>
|
||||
|
||||
// Returns a TweetTrove and the cursor for the next update
|
||||
func (api *API) GetInbox(how_many int) (TweetTrove, string, error) {
|
||||
if !api.IsAuthenticated {
|
||||
return TweetTrove{}, "", ErrLoginRequired
|
||||
}
|
||||
dm_response, err := api.GetDMInbox()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
trove := dm_response.ToTweetTrove(api.UserID)
|
||||
cursor := dm_response.Cursor
|
||||
next_cursor_id := dm_response.InboxTimelines.Trusted.MinEntryID
|
||||
for len(trove.Rooms) < how_many && dm_response.Status != "AT_END" {
|
||||
dm_response, err = api.GetInboxTrusted(next_cursor_id)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
next_trove := dm_response.ToTweetTrove(api.UserID)
|
||||
next_cursor_id = dm_response.MinEntryID
|
||||
trove.MergeWith(next_trove)
|
||||
}
|
||||
|
||||
return trove, cursor, nil
|
||||
}
|
||||
|
||||
func (api *API) GetConversation(id DMChatRoomID, max_id DMMessageID, how_many int) (TweetTrove, error) {
|
||||
if !api.IsAuthenticated {
|
||||
return TweetTrove{}, ErrLoginRequired
|
||||
}
|
||||
dm_response, err := api.GetDMConversation(id, max_id)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
trove := dm_response.ToTweetTrove(api.UserID)
|
||||
oldest := trove.GetOldestMessage(id)
|
||||
for len(trove.Messages) < how_many && dm_response.Status != "AT_END" {
|
||||
dm_response, err = api.GetDMConversation(id, oldest)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
next_trove := dm_response.ToTweetTrove(api.UserID)
|
||||
oldest = next_trove.GetOldestMessage(id)
|
||||
trove.MergeWith(next_trove)
|
||||
}
|
||||
|
||||
return trove, nil
|
||||
}
|
@ -11,6 +11,8 @@ import (
|
||||
"time"
|
||||
)
|
||||
|
||||
const BEARER_TOKEN string = "AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA"
|
||||
|
||||
type GuestTokenResponse struct {
|
||||
Token string `json:"guest_token"`
|
||||
RefreshedAt time.Time
|
||||
|
@ -1,9 +1,5 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"path"
|
||||
)
|
||||
|
||||
type ImageID int64
|
||||
|
||||
type Image struct {
|
||||
@ -16,16 +12,3 @@ type Image struct {
|
||||
LocalFilename string `db:"local_filename"`
|
||||
IsDownloaded bool `db:"is_downloaded"`
|
||||
}
|
||||
|
||||
func ParseAPIMedia(apiMedia APIMedia) Image {
|
||||
local_filename := get_prefixed_path(path.Base(apiMedia.MediaURLHttps))
|
||||
|
||||
return Image{
|
||||
ID: ImageID(apiMedia.ID),
|
||||
RemoteURL: apiMedia.MediaURLHttps,
|
||||
Width: apiMedia.OriginalInfo.Width,
|
||||
Height: apiMedia.OriginalInfo.Height,
|
||||
LocalFilename: local_filename,
|
||||
IsDownloaded: false,
|
||||
}
|
||||
}
|
||||
|
@ -3,6 +3,8 @@ package scraper
|
||||
import (
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"time"
|
||||
)
|
||||
|
||||
@ -32,3 +34,43 @@ func ExpandShortUrl(short_url string) string {
|
||||
}
|
||||
return long_url
|
||||
}
|
||||
|
||||
// Given an URL, try to parse it as a tweet url.
|
||||
// The bool is an `is_ok` value; true if the parse was successful, false if it didn't match
|
||||
func TryParseTweetUrl(s string) (UserHandle, TweetID, bool) {
|
||||
parsed_url, err := url.Parse(s)
|
||||
if err != nil {
|
||||
return UserHandle(""), TweetID(0), false
|
||||
}
|
||||
|
||||
if parsed_url.Host != "twitter.com" && parsed_url.Host != "mobile.twitter.com" && parsed_url.Host != "x.com" {
|
||||
return UserHandle(""), TweetID(0), false
|
||||
}
|
||||
|
||||
r := regexp.MustCompile(`^/(\w+)/status/(\d+)$`)
|
||||
matches := r.FindStringSubmatch(parsed_url.Path)
|
||||
if matches == nil {
|
||||
return UserHandle(""), TweetID(0), false
|
||||
}
|
||||
if len(matches) != 3 { // matches[0] is the full string
|
||||
panic(matches)
|
||||
}
|
||||
return UserHandle(matches[1]), TweetID(int_or_panic(matches[2])), true
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a tweet URL, return the corresponding user handle.
|
||||
* If tweet url is not valid, return an error.
|
||||
*/
|
||||
func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
|
||||
short_url_regex := regexp.MustCompile(`^https://t.co/\w{5,20}$`)
|
||||
if short_url_regex.MatchString(tweet_url) {
|
||||
tweet_url = ExpandShortUrl(tweet_url)
|
||||
}
|
||||
|
||||
ret, _, is_ok := TryParseTweetUrl(tweet_url)
|
||||
if !is_ok {
|
||||
return "", fmt.Errorf("Invalid tweet url: %s", tweet_url)
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
@ -1,9 +1,6 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
)
|
||||
|
||||
@ -29,6 +26,9 @@ type Poll struct {
|
||||
LastUpdatedAt Timestamp `db:"last_scraped_at"`
|
||||
}
|
||||
|
||||
// TODO: view-layer
|
||||
// - view helpers should go in a view layer
|
||||
|
||||
func (p Poll) TotalVotes() int {
|
||||
return p.Choice1_Votes + p.Choice2_Votes + p.Choice3_Votes + p.Choice4_Votes
|
||||
}
|
||||
@ -48,56 +48,3 @@ func (p Poll) IsWinner(votes int) bool {
|
||||
}
|
||||
return votes >= p.Choice1_Votes && votes >= p.Choice2_Votes && votes >= p.Choice3_Votes && votes >= p.Choice4_Votes
|
||||
}
|
||||
|
||||
func ParseAPIPoll(apiCard APICard) Poll {
|
||||
card_url, err := url.Parse(apiCard.ShortenedUrl)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
id := int_or_panic(card_url.Hostname())
|
||||
|
||||
ret := Poll{}
|
||||
ret.ID = PollID(id)
|
||||
ret.NumChoices = parse_num_choices(apiCard.Name)
|
||||
ret.VotingDuration = int_or_panic(apiCard.BindingValues.DurationMinutes.StringValue) * 60
|
||||
ret.VotingEndsAt, err = TimestampFromString(apiCard.BindingValues.EndDatetimeUTC.StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
ret.LastUpdatedAt, err = TimestampFromString(apiCard.BindingValues.LastUpdatedAt.StringValue)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
ret.Choice1 = apiCard.BindingValues.Choice1.StringValue
|
||||
ret.Choice1_Votes = int_or_panic(apiCard.BindingValues.Choice1_Count.StringValue)
|
||||
ret.Choice2 = apiCard.BindingValues.Choice2.StringValue
|
||||
ret.Choice2_Votes = int_or_panic(apiCard.BindingValues.Choice2_Count.StringValue)
|
||||
|
||||
if ret.NumChoices > 2 {
|
||||
ret.Choice3 = apiCard.BindingValues.Choice3.StringValue
|
||||
ret.Choice3_Votes = int_or_panic(apiCard.BindingValues.Choice3_Count.StringValue)
|
||||
}
|
||||
if ret.NumChoices > 3 {
|
||||
ret.Choice4 = apiCard.BindingValues.Choice4.StringValue
|
||||
ret.Choice4_Votes = int_or_panic(apiCard.BindingValues.Choice4_Count.StringValue)
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func parse_num_choices(card_name string) int {
|
||||
if strings.Index(card_name, "poll") != 0 || strings.Index(card_name, "choice") != 5 {
|
||||
panic("Not valid card name: " + card_name)
|
||||
}
|
||||
|
||||
return int_or_panic(card_name[4:5])
|
||||
}
|
||||
|
||||
func int_or_panic(s string) int {
|
||||
result, err := strconv.Atoi(s)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
@ -8,16 +8,3 @@ type Retweet struct {
|
||||
RetweetedBy *User
|
||||
RetweetedAt Timestamp `db:"retweeted_at"`
|
||||
}
|
||||
|
||||
func ParseSingleRetweet(apiTweet APITweet) (ret Retweet, err error) {
|
||||
apiTweet.NormalizeContent()
|
||||
|
||||
ret.RetweetID = TweetID(apiTweet.ID)
|
||||
ret.TweetID = TweetID(apiTweet.RetweetedStatusID)
|
||||
ret.RetweetedByID = UserID(apiTweet.UserID)
|
||||
ret.RetweetedAt, err = TimestampFromString(apiTweet.CreatedAt)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
@ -13,16 +13,23 @@ import (
|
||||
|
||||
func TestParseSingleRetweet(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
require := require.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_that_is_a_retweet.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var api_tweet APITweet
|
||||
err = json.Unmarshal(data, &api_tweet)
|
||||
require.NoError(t, err)
|
||||
require.NoError(err)
|
||||
|
||||
retweet, err := ParseSingleRetweet(api_tweet)
|
||||
require.NoError(t, err)
|
||||
trove, err := api_tweet.ToTweetTrove()
|
||||
require.NoError(err)
|
||||
|
||||
require.Len(trove.Tweets, 0)
|
||||
require.Len(trove.Retweets, 1)
|
||||
|
||||
retweet, is_ok := trove.Retweets[TweetID(1404270043018448896)]
|
||||
require.True(is_ok)
|
||||
|
||||
assert.Equal(TweetID(1404270043018448896), retweet.RetweetID)
|
||||
assert.Equal(TweetID(1404269989646028804), retweet.TweetID)
|
||||
|
@ -26,6 +26,9 @@ type Space struct {
|
||||
IsDetailsFetched bool `db:"is_details_fetched"`
|
||||
}
|
||||
|
||||
// TODO: view-layer
|
||||
// - view helpers should go in a view layer
|
||||
|
||||
func (space Space) FormatDuration() string {
|
||||
duration := space.EndedAt.Time.Sub(space.StartedAt.Time)
|
||||
h := int(duration.Hours())
|
||||
@ -37,14 +40,3 @@ func (space Space) FormatDuration() string {
|
||||
}
|
||||
return fmt.Sprintf("%dm%02ds", m, s)
|
||||
}
|
||||
|
||||
func ParseAPISpace(apiCard APICard) Space {
|
||||
ret := Space{}
|
||||
ret.ID = SpaceID(apiCard.BindingValues.ID.StringValue)
|
||||
ret.ShortUrl = apiCard.ShortenedUrl
|
||||
|
||||
// Indicate that this Space needs its details fetched still
|
||||
ret.IsDetailsFetched = false
|
||||
|
||||
return ret
|
||||
}
|
||||
|
@ -1,31 +1,13 @@
|
||||
package scraper_test
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
. "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
)
|
||||
|
||||
func TestParseSpace(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
data, err := os.ReadFile("test_responses/tweet_content/space.json")
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
var apiCard APICard
|
||||
err = json.Unmarshal(data, &apiCard)
|
||||
require.NoError(t, err)
|
||||
|
||||
space := ParseAPISpace(apiCard)
|
||||
assert.Equal(SpaceID("1YpKkZVyQjoxj"), space.ID)
|
||||
assert.Equal("https://t.co/WBPAHNF8Om", space.ShortUrl)
|
||||
}
|
||||
|
||||
func TestFormatSpaceDuration(t *testing.T) {
|
||||
assert := assert.New(t)
|
||||
s := Space{
|
||||
|
@ -1 +0,0 @@
|
||||
{"name":"3691233323:audiospace","url":"https://t.co/WBPAHNF8Om","card_type_url":"http://card-type-url-is-deprecated.invalid","binding_values":{"id":{"type":"STRING","string_value":"1YpKkZVyQjoxj"},"narrow_cast_space_type":{"type":"STRING","string_value":"0"},"card_url":{"type":"STRING","string_value":"https://t.co/WBPAHNF8Om","scribe_key":"card_url"}},"card_platform":{"platform":{"device":{"name":"Swift","version":"12"},"audience":{"name":"production"}}}}
|
@ -5,9 +5,6 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/terminal_utils"
|
||||
)
|
||||
|
||||
var ERR_NO_TWEET = errors.New("Empty tweet")
|
||||
@ -77,172 +74,6 @@ type Tweet struct {
|
||||
IsConversationScraped bool `db:"is_conversation_scraped"`
|
||||
LastScrapedAt Timestamp `db:"last_scraped_at"`
|
||||
}
|
||||
|
||||
func (t Tweet) String() string {
|
||||
var author string
|
||||
if t.User != nil {
|
||||
author = fmt.Sprintf("%s\n@%s", t.User.DisplayName, t.User.Handle)
|
||||
} else {
|
||||
author = "@???"
|
||||
}
|
||||
|
||||
ret := fmt.Sprintf(
|
||||
`%s
|
||||
%s
|
||||
%s
|
||||
Replies: %d RT: %d QT: %d Likes: %d
|
||||
`,
|
||||
author,
|
||||
terminal_utils.FormatDate(t.PostedAt.Time),
|
||||
terminal_utils.WrapText(t.Text, 60),
|
||||
t.NumReplies,
|
||||
t.NumRetweets,
|
||||
t.NumQuoteTweets,
|
||||
t.NumLikes,
|
||||
)
|
||||
|
||||
if len(t.Images) > 0 {
|
||||
ret += fmt.Sprintf(terminal_utils.COLOR_GREEN+"images: %d\n"+terminal_utils.COLOR_RESET, len(t.Images))
|
||||
}
|
||||
if len(t.Urls) > 0 {
|
||||
ret += "urls: [\n"
|
||||
for _, url := range t.Urls {
|
||||
ret += " " + url.Text + "\n"
|
||||
}
|
||||
ret += "]"
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
// Turn an APITweet, as returned from the scraper, into a properly structured Tweet object
|
||||
func ParseSingleTweet(apiTweet APITweet) (ret Tweet, err error) {
|
||||
apiTweet.NormalizeContent()
|
||||
|
||||
ret.ID = TweetID(apiTweet.ID)
|
||||
ret.UserID = UserID(apiTweet.UserID)
|
||||
ret.UserHandle = UserHandle(apiTweet.UserHandle)
|
||||
ret.Text = apiTweet.FullText
|
||||
ret.IsExpandable = apiTweet.IsExpandable
|
||||
|
||||
// Process "posted-at" date and time
|
||||
if apiTweet.TombstoneText == "" { // Skip time parsing for tombstones
|
||||
ret.PostedAt, err = TimestampFromString(apiTweet.CreatedAt)
|
||||
if err != nil {
|
||||
if ret.ID == 0 {
|
||||
return Tweet{}, fmt.Errorf("unable to parse tweet: %w", ERR_NO_TWEET)
|
||||
}
|
||||
return Tweet{}, fmt.Errorf("Error parsing time on tweet ID %d:\n %w", ret.ID, err)
|
||||
}
|
||||
}
|
||||
|
||||
ret.NumLikes = apiTweet.FavoriteCount
|
||||
ret.NumRetweets = apiTweet.RetweetCount
|
||||
ret.NumReplies = apiTweet.ReplyCount
|
||||
ret.NumQuoteTweets = apiTweet.QuoteCount
|
||||
ret.InReplyToID = TweetID(apiTweet.InReplyToStatusID)
|
||||
ret.QuotedTweetID = TweetID(apiTweet.QuotedStatusID)
|
||||
|
||||
// Process URLs and link previews
|
||||
for _, url := range apiTweet.Entities.URLs {
|
||||
var url_object Url
|
||||
if apiTweet.Card.ShortenedUrl == url.ShortenedUrl {
|
||||
if apiTweet.Card.Name == "3691233323:audiospace" {
|
||||
// This "url" is just a link to a Space. Don't process it as a Url
|
||||
continue
|
||||
}
|
||||
url_object = ParseAPIUrlCard(apiTweet.Card)
|
||||
}
|
||||
url_object.Text = url.ExpandedURL
|
||||
url_object.ShortText = url.ShortenedUrl
|
||||
url_object.TweetID = ret.ID
|
||||
|
||||
// Skip it if it's just the quoted tweet
|
||||
_, id, is_ok := TryParseTweetUrl(url.ExpandedURL)
|
||||
if is_ok && id == ret.QuotedTweetID {
|
||||
continue
|
||||
}
|
||||
|
||||
ret.Urls = append(ret.Urls, url_object)
|
||||
}
|
||||
|
||||
// Process images
|
||||
for _, media := range apiTweet.Entities.Media {
|
||||
if media.Type != "photo" {
|
||||
// Videos now have an entry in "Entities.Media" but they can be ignored; the useful bit is in ExtendedEntities
|
||||
// So skip ones that aren't "photo"
|
||||
continue
|
||||
}
|
||||
new_image := ParseAPIMedia(media)
|
||||
new_image.TweetID = ret.ID
|
||||
ret.Images = append(ret.Images, new_image)
|
||||
}
|
||||
|
||||
// Process hashtags
|
||||
for _, hashtag := range apiTweet.Entities.Hashtags {
|
||||
ret.Hashtags = append(ret.Hashtags, hashtag.Text)
|
||||
}
|
||||
|
||||
// Process `@` mentions and reply-mentions
|
||||
for _, mention := range apiTweet.Entities.Mentions {
|
||||
ret.Mentions = append(ret.Mentions, mention.UserName)
|
||||
}
|
||||
for _, mention := range strings.Split(apiTweet.Entities.ReplyMentions, " ") {
|
||||
if mention != "" {
|
||||
if mention[0] != '@' {
|
||||
panic(fmt.Errorf("Unknown ReplyMention value %q:\n %w", apiTweet.Entities.ReplyMentions, EXTERNAL_API_ERROR))
|
||||
}
|
||||
ret.ReplyMentions = append(ret.ReplyMentions, mention[1:])
|
||||
}
|
||||
}
|
||||
|
||||
// Process videos
|
||||
for _, entity := range apiTweet.ExtendedEntities.Media {
|
||||
if entity.Type != "video" && entity.Type != "animated_gif" {
|
||||
continue
|
||||
}
|
||||
|
||||
new_video := ParseAPIVideo(entity)
|
||||
new_video.TweetID = ret.ID
|
||||
ret.Videos = append(ret.Videos, new_video)
|
||||
|
||||
// Remove the thumbnail from the Images list
|
||||
updated_imgs := []Image{}
|
||||
for _, img := range ret.Images {
|
||||
if VideoID(img.ID) != new_video.ID {
|
||||
updated_imgs = append(updated_imgs, img)
|
||||
}
|
||||
}
|
||||
ret.Images = updated_imgs
|
||||
}
|
||||
|
||||
// Process polls
|
||||
if strings.Index(apiTweet.Card.Name, "poll") == 0 {
|
||||
poll := ParseAPIPoll(apiTweet.Card)
|
||||
poll.TweetID = ret.ID
|
||||
ret.Polls = []Poll{poll}
|
||||
}
|
||||
|
||||
// Process spaces
|
||||
if apiTweet.Card.Name == "3691233323:audiospace" {
|
||||
space := ParseAPISpace(apiTweet.Card)
|
||||
ret.Spaces = []Space{space}
|
||||
ret.SpaceID = space.ID
|
||||
}
|
||||
|
||||
// Process tombstones and other metadata
|
||||
ret.TombstoneType = apiTweet.TombstoneText
|
||||
ret.IsStub = !(ret.TombstoneType == "")
|
||||
ret.LastScrapedAt = TimestampFromUnix(0) // Caller will change this for the tweet that was actually scraped
|
||||
ret.IsConversationScraped = false // Safe due to the "No Worsening" principle
|
||||
|
||||
// Extra data that can help piece together tombstoned tweet info
|
||||
ret.in_reply_to_user_id = UserID(apiTweet.InReplyToUserID)
|
||||
ret.in_reply_to_user_handle = UserHandle(apiTweet.InReplyToScreenName)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
// Get a single tweet with no replies from the API.
|
||||
//
|
||||
// args:
|
||||
|
@ -195,3 +195,13 @@ func (trove *TweetTrove) PostProcess(api *API) error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (t TweetTrove) GetOldestMessage(id DMChatRoomID) DMMessageID {
|
||||
oldest := DMMessageID(^uint(0) >> 1) // Max integer
|
||||
for _, m := range t.Messages {
|
||||
if m.ID < oldest && m.DMChatRoomID == id {
|
||||
oldest = m.ID
|
||||
}
|
||||
}
|
||||
return oldest
|
||||
}
|
||||
|
@ -1,11 +1,7 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"log"
|
||||
"net/url"
|
||||
"path"
|
||||
"regexp"
|
||||
)
|
||||
|
||||
type Url struct {
|
||||
@ -28,6 +24,9 @@ type Url struct {
|
||||
IsContentDownloaded bool `db:"is_content_downloaded"`
|
||||
}
|
||||
|
||||
// TODO: view-layer
|
||||
// - view helpers should go in a view layer
|
||||
|
||||
func (u Url) GetDomain() string {
|
||||
if u.Domain != "" {
|
||||
return u.Domain
|
||||
@ -38,106 +37,3 @@ func (u Url) GetDomain() string {
|
||||
}
|
||||
return urlstruct.Host
|
||||
}
|
||||
|
||||
func ParseAPIUrlCard(apiCard APICard) Url {
|
||||
values := apiCard.BindingValues
|
||||
ret := Url{}
|
||||
ret.HasCard = true
|
||||
|
||||
ret.Domain = values.Domain.Value
|
||||
ret.Title = values.Title.Value
|
||||
ret.Description = values.Description.Value
|
||||
ret.IsContentDownloaded = false
|
||||
ret.CreatorID = UserID(values.Creator.UserValue.Value)
|
||||
ret.SiteID = UserID(values.Site.UserValue.Value)
|
||||
|
||||
var thumbnail_url string
|
||||
|
||||
if apiCard.Name == "summary_large_image" || apiCard.Name == "summary" {
|
||||
thumbnail_url = values.Thumbnail.ImageValue.Url
|
||||
} else if apiCard.Name == "player" {
|
||||
thumbnail_url = values.PlayerImage.ImageValue.Url
|
||||
} else if apiCard.Name == "unified_card" {
|
||||
// TODO: Grok chat previews
|
||||
log.Print("Grok chat card, not implemented yet-- skipping")
|
||||
} else {
|
||||
panic("Unknown card type: " + apiCard.Name)
|
||||
}
|
||||
|
||||
if thumbnail_url != "" {
|
||||
ret.HasThumbnail = true
|
||||
ret.ThumbnailRemoteUrl = thumbnail_url
|
||||
ret.ThumbnailLocalPath = get_thumbnail_local_path(thumbnail_url)
|
||||
ret.ThumbnailWidth = values.Thumbnail.ImageValue.Width
|
||||
ret.ThumbnailHeight = values.Thumbnail.ImageValue.Height
|
||||
}
|
||||
|
||||
return ret
|
||||
}
|
||||
|
||||
func get_prefixed_path(p string) string {
|
||||
local_prefix_regex := regexp.MustCompile(`^[\w-]{2}`)
|
||||
local_prefix := local_prefix_regex.FindString(p)
|
||||
if len(local_prefix) != 2 {
|
||||
panic(fmt.Sprintf("Unable to extract a 2-letter prefix for filename %s", p))
|
||||
}
|
||||
return path.Join(local_prefix, p)
|
||||
}
|
||||
|
||||
func get_thumbnail_local_path(remote_url string) string {
|
||||
u, err := url.Parse(remote_url)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if u.RawQuery == "" {
|
||||
return path.Base(u.Path)
|
||||
}
|
||||
query_params, err := url.ParseQuery(u.RawQuery)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
return get_prefixed_path(
|
||||
fmt.Sprintf("%s_%s.%s", path.Base(u.Path), query_params["name"][0], query_params["format"][0]),
|
||||
)
|
||||
}
|
||||
|
||||
// Given an URL, try to parse it as a tweet url.
|
||||
// The bool is an `is_ok` value; true if the parse was successful, false if it didn't match
|
||||
func TryParseTweetUrl(s string) (UserHandle, TweetID, bool) {
|
||||
parsed_url, err := url.Parse(s)
|
||||
if err != nil {
|
||||
return UserHandle(""), TweetID(0), false
|
||||
}
|
||||
|
||||
if parsed_url.Host != "twitter.com" && parsed_url.Host != "mobile.twitter.com" && parsed_url.Host != "x.com" {
|
||||
return UserHandle(""), TweetID(0), false
|
||||
}
|
||||
|
||||
r := regexp.MustCompile(`^/(\w+)/status/(\d+)$`)
|
||||
matches := r.FindStringSubmatch(parsed_url.Path)
|
||||
if matches == nil {
|
||||
return UserHandle(""), TweetID(0), false
|
||||
}
|
||||
if len(matches) != 3 { // matches[0] is the full string
|
||||
panic(matches)
|
||||
}
|
||||
return UserHandle(matches[1]), TweetID(int_or_panic(matches[2])), true
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a tweet URL, return the corresponding user handle.
|
||||
* If tweet url is not valid, return an error.
|
||||
*/
|
||||
func ParseHandleFromTweetUrl(tweet_url string) (UserHandle, error) {
|
||||
short_url_regex := regexp.MustCompile(`^https://t.co/\w{5,20}$`)
|
||||
if short_url_regex.MatchString(tweet_url) {
|
||||
tweet_url = ExpandShortUrl(tweet_url)
|
||||
}
|
||||
|
||||
ret, _, is_ok := TryParseTweetUrl(tweet_url)
|
||||
if !is_ok {
|
||||
return "", fmt.Errorf("Invalid tweet url: %s", tweet_url)
|
||||
}
|
||||
return ret, nil
|
||||
}
|
||||
|
@ -4,9 +4,6 @@ import (
|
||||
"fmt"
|
||||
"path"
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/terminal_utils"
|
||||
)
|
||||
|
||||
const DEFAULT_PROFILE_IMAGE_URL = "https://abs.twimg.com/sticky/default_profile_images/default_profile.png"
|
||||
@ -15,14 +12,6 @@ const DEFAULT_PROFILE_IMAGE = "default_profile.png"
|
||||
type UserID int64
|
||||
type UserHandle string
|
||||
|
||||
func JoinArrayOfHandles(handles []UserHandle) string {
|
||||
ret := []string{}
|
||||
for _, h := range handles {
|
||||
ret = append(ret, string(h))
|
||||
}
|
||||
return strings.Join(ret, ",")
|
||||
}
|
||||
|
||||
type User struct {
|
||||
ID UserID `db:"id"`
|
||||
DisplayName string `db:"display_name"`
|
||||
@ -51,40 +40,6 @@ type User struct {
|
||||
IsIdFake bool `db:"is_id_fake"`
|
||||
}
|
||||
|
||||
func (u User) String() string {
|
||||
var verified string
|
||||
if u.IsVerified {
|
||||
verified = "[\u2713]"
|
||||
}
|
||||
ret := fmt.Sprintf(
|
||||
`%s%s
|
||||
@%s
|
||||
%s
|
||||
|
||||
Following: %d Followers: %d
|
||||
|
||||
Joined %s
|
||||
%s
|
||||
%s
|
||||
`,
|
||||
u.DisplayName,
|
||||
verified,
|
||||
u.Handle,
|
||||
terminal_utils.WrapText(u.Bio, 60),
|
||||
u.FollowingCount,
|
||||
u.FollowersCount,
|
||||
terminal_utils.FormatDate(u.JoinDate.Time),
|
||||
u.Location,
|
||||
u.Website,
|
||||
)
|
||||
if u.PinnedTweet != nil {
|
||||
ret += "\n" + terminal_utils.WrapText(u.PinnedTweet.Text, 60)
|
||||
} else {
|
||||
println("Pinned tweet id:", u.PinnedTweetID)
|
||||
}
|
||||
return ret
|
||||
}
|
||||
|
||||
func GetUnknownUser() User {
|
||||
return User{
|
||||
ID: UserID(0x4000000000000000), // 2^62
|
||||
@ -125,63 +80,6 @@ func GetUnknownUserWithHandle(handle UserHandle) User {
|
||||
}
|
||||
}
|
||||
|
||||
// Turn an APIUser, as returned from the scraper, into a properly structured User object
|
||||
func ParseSingleUser(apiUser APIUser) (ret User, err error) {
|
||||
if apiUser.DoesntExist {
|
||||
// User may have been deleted, or there was a typo. There's no data to parse
|
||||
if apiUser.ScreenName == "" {
|
||||
panic("ScreenName is empty!")
|
||||
}
|
||||
ret = GetUnknownUserWithHandle(UserHandle(apiUser.ScreenName))
|
||||
return
|
||||
}
|
||||
ret.ID = UserID(apiUser.ID)
|
||||
ret.Handle = UserHandle(apiUser.ScreenName)
|
||||
if apiUser.IsBanned {
|
||||
// Banned users won't have any further info, so just return here
|
||||
ret.IsBanned = true
|
||||
return
|
||||
}
|
||||
ret.DisplayName = apiUser.Name
|
||||
ret.Bio = apiUser.Description
|
||||
ret.FollowingCount = apiUser.FriendsCount
|
||||
ret.FollowersCount = apiUser.FollowersCount
|
||||
ret.Location = apiUser.Location
|
||||
if len(apiUser.Entities.URL.Urls) > 0 {
|
||||
ret.Website = apiUser.Entities.URL.Urls[0].ExpandedURL
|
||||
}
|
||||
ret.JoinDate, err = TimestampFromString(apiUser.CreatedAt)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("Error parsing time on user ID %d: %w", ret.ID, err)
|
||||
return
|
||||
}
|
||||
ret.IsPrivate = apiUser.Protected
|
||||
ret.IsVerified = apiUser.Verified
|
||||
ret.ProfileImageUrl = apiUser.ProfileImageURLHTTPS
|
||||
|
||||
if regexp.MustCompile(`_normal\.\w{2,4}`).MatchString(ret.ProfileImageUrl) {
|
||||
ret.ProfileImageUrl = strings.ReplaceAll(ret.ProfileImageUrl, "_normal.", ".")
|
||||
}
|
||||
ret.BannerImageUrl = apiUser.ProfileBannerURL
|
||||
|
||||
ret.ProfileImageLocalPath = ret.compute_profile_image_local_path()
|
||||
ret.BannerImageLocalPath = ret.compute_banner_image_local_path()
|
||||
|
||||
if len(apiUser.PinnedTweetIdsStr) > 0 {
|
||||
ret.PinnedTweetID = TweetID(idstr_to_int(apiUser.PinnedTweetIdsStr[0]))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// Calls API#GetUserByID and returns the parsed result
|
||||
func GetUserByID(u_id UserID) (User, error) {
|
||||
session, err := NewGuestSession() // This endpoint works better if you're not logged in
|
||||
if err != nil {
|
||||
return User{}, err
|
||||
}
|
||||
return session.GetUserByID(u_id)
|
||||
}
|
||||
|
||||
/**
|
||||
* Make a filename for the profile image, that hopefully won't clobber other ones
|
||||
*/
|
||||
|
@ -1,16 +1,7 @@
|
||||
package scraper
|
||||
|
||||
import (
|
||||
"net/url"
|
||||
"path"
|
||||
"sort"
|
||||
)
|
||||
|
||||
type VideoID int64
|
||||
|
||||
// TODO video-source-user: extract source user information (e.g., someone shares a video
|
||||
// from someone else).
|
||||
|
||||
type Video struct {
|
||||
ID VideoID `db:"id"`
|
||||
TweetID TweetID `db:"tweet_id"`
|
||||
@ -30,56 +21,3 @@ type Video struct {
|
||||
IsGeoblocked bool `db:"is_geoblocked"`
|
||||
IsGif bool `db:"is_gif"`
|
||||
}
|
||||
|
||||
func get_filename(remote_url string) string {
|
||||
u, err := url.Parse(remote_url)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
return path.Base(u.Path)
|
||||
}
|
||||
|
||||
func ParseAPIVideo(apiVideo APIExtendedMedia) Video {
|
||||
variants := apiVideo.VideoInfo.Variants
|
||||
sort.Sort(variants)
|
||||
video_remote_url := variants[0].URL
|
||||
|
||||
var view_count int
|
||||
|
||||
r := apiVideo.Ext.MediaStats.R
|
||||
|
||||
switch r.(type) {
|
||||
case string:
|
||||
view_count = 0
|
||||
case map[string]interface{}:
|
||||
OK_entry, ok := r.(map[string]interface{})["ok"]
|
||||
if !ok {
|
||||
panic("No 'ok' value found in the R!")
|
||||
}
|
||||
view_count_str, ok := OK_entry.(map[string]interface{})["viewCount"]
|
||||
view_count = int_or_panic(view_count_str.(string))
|
||||
if !ok {
|
||||
panic("No 'viewCount' value found in the OK!")
|
||||
}
|
||||
}
|
||||
|
||||
local_filename := get_prefixed_path(get_filename(video_remote_url))
|
||||
|
||||
return Video{
|
||||
ID: VideoID(apiVideo.ID),
|
||||
Width: apiVideo.OriginalInfo.Width,
|
||||
Height: apiVideo.OriginalInfo.Height,
|
||||
RemoteURL: video_remote_url,
|
||||
LocalFilename: local_filename,
|
||||
|
||||
ThumbnailRemoteUrl: apiVideo.MediaURLHttps,
|
||||
ThumbnailLocalPath: get_prefixed_path(path.Base(apiVideo.MediaURLHttps)),
|
||||
Duration: apiVideo.VideoInfo.Duration,
|
||||
ViewCount: view_count,
|
||||
|
||||
IsDownloaded: false,
|
||||
IsBlockedByDMCA: false,
|
||||
IsGeoblocked: apiVideo.ExtMediaAvailability.Reason == "Geoblocked",
|
||||
IsGif: apiVideo.Type == "animated_gif",
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user