Make user handle matching case-insensitive in tweet trove

This commit is contained in:
Alessio 2022-02-14 17:58:10 -08:00
parent 8a70f55e1b
commit 82f5721ac2
2 changed files with 10 additions and 15 deletions

View File

@ -2,6 +2,7 @@ package scraper
import ( import (
"fmt" "fmt"
"strings"
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
) )
@ -44,7 +45,7 @@ func (trove TweetTrove) Transform() (tweets []Tweet, retweets []Retweet, users [
*/ */
func (trove TweetTrove) FindUserByHandle(handle UserHandle) (User, bool) { func (trove TweetTrove) FindUserByHandle(handle UserHandle) (User, bool) {
for _, user := range trove.Users { for _, user := range trove.Users {
if user.Handle == handle { if strings.ToLower(string(user.Handle)) == strings.ToLower(string(handle)) {
return user, true return user, true
} }
} }
@ -112,20 +113,13 @@ func (trove *TweetTrove) FillMissingUserIDs() {
continue continue
} }
handle := tweet.UserHandle user, is_found := trove.FindUserByHandle(tweet.UserHandle)
is_user_found := false if !is_found {
for _, u := range trove.Users {
if u.Handle == handle {
tweet.UserID = u.ID
is_user_found = true
break
}
}
if !is_user_found {
// The user probably deleted deleted their account, and thus `scraper.GetUser` failed. So // The user probably deleted deleted their account, and thus `scraper.GetUser` failed. So
// they're not in this trove's Users. // they're not in this trove's Users.
panic(fmt.Sprintf("Couldn't fill out this Tweet's UserID: %d, %s", tweet.ID, tweet.UserHandle)) panic(fmt.Sprintf("Couldn't fill out this Tweet's UserID: %d, %s", tweet.ID, tweet.UserHandle))
} }
tweet.UserID = user.ID
trove.Tweets[i] = tweet trove.Tweets[i] = tweet
} }
} }

View File

@ -4,6 +4,7 @@ import (
"testing" "testing"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
. "offline_twitter/scraper" . "offline_twitter/scraper"
) )
@ -50,7 +51,7 @@ func TestMergeTweetTroves(t *testing.T) {
func TestFillMissingUserIDs(t *testing.T) { func TestFillMissingUserIDs(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
u1 := User{ID: 1, Handle: "a"} u1 := User{ID: 1, Handle: "A"}
t1 := Tweet{ID: 1, UserID: 1} t1 := Tweet{ID: 1, UserID: 1}
t2 := Tweet{ID: 2, UserHandle: "a"} t2 := Tweet{ID: 2, UserHandle: "a"}
@ -71,14 +72,14 @@ func TestFindUserByHandle(t *testing.T) {
assert := assert.New(t) assert := assert.New(t)
u1 := User{ID: 1, Handle: "1", DisplayName: "One"} u1 := User{ID: 1, Handle: "1", DisplayName: "One"}
u2 := User{ID: 2, Handle: "2", DisplayName: "Two"} u2 := User{ID: 2, Handle: "b", DisplayName: "Two"}
trove := NewTweetTrove() trove := NewTweetTrove()
trove.Users[u1.ID] = u1 trove.Users[u1.ID] = u1
trove.Users[u2.ID] = u2 trove.Users[u2.ID] = u2
user_2, ok := trove.FindUserByHandle("2") user_2, ok := trove.FindUserByHandle("B")
assert.True(ok) require.True(t, ok)
assert.Equal(user_2.DisplayName, "Two") assert.Equal(user_2.DisplayName, "Two")
_, ok = trove.FindUserByHandle("3") _, ok = trove.FindUserByHandle("3")