Add background scraping of the logged-in user's home timeline
This commit is contained in:
parent
09356ecc04
commit
8349ca4ae3
@ -20,8 +20,8 @@ pipeline:
|
|||||||
secrets:
|
secrets:
|
||||||
- offline_twatter_passwd
|
- offline_twatter_passwd
|
||||||
when: # At least one
|
when: # At least one
|
||||||
path: pkg/scraper/**
|
- path: pkg/scraper/**
|
||||||
branch: release-*
|
- branch: release-*
|
||||||
commands:
|
commands:
|
||||||
- cd cmd
|
- cd cmd
|
||||||
- ./tests.sh
|
- ./tests.sh
|
||||||
|
14
doc/TODO.txt
14
doc/TODO.txt
@ -97,7 +97,7 @@ TODO const-select-fields
|
|||||||
TODO compound-query-structs
|
TODO compound-query-structs
|
||||||
- Right now the result structs for the compound queries (e.g., "Feed", "TweetDetail") are in the `persistence` package. They don't go there probably
|
- Right now the result structs for the compound queries (e.g., "Feed", "TweetDetail") are in the `persistence` package. They don't go there probably
|
||||||
|
|
||||||
TOOD: login-routes-tests
|
TODO: login-routes-tests
|
||||||
- Make the scraper.API object injectable somehow (get rid of singleton pattern) and add tests for login and change-session sequences
|
- Make the scraper.API object injectable somehow (get rid of singleton pattern) and add tests for login and change-session sequences
|
||||||
- Also test profile.ListSessions()
|
- Also test profile.ListSessions()
|
||||||
|
|
||||||
@ -111,13 +111,15 @@ TODO: webserver-session-arg-active-user
|
|||||||
|
|
||||||
TODO: webserver-tombstones
|
TODO: webserver-tombstones
|
||||||
|
|
||||||
TODO: fetch-timeline
|
|
||||||
- HTMX polling element on base template
|
|
||||||
|
|
||||||
TODO: progressive-web-app
|
TODO: progressive-web-app
|
||||||
|
|
||||||
TODO: paste-twitter-urls-in-search-bar
|
TODO: paste-twitter-urls-in-search-bar
|
||||||
- pasting a link from twitter.com into the search bar should handle it properly
|
- pasting a link from twitter.com into the search bar should handle it properly
|
||||||
- tweet detail
|
|
||||||
- user profile
|
|
||||||
- space
|
- space
|
||||||
|
|
||||||
|
TODO: image-width-and-height
|
||||||
|
- Images should have explicit "width" and "height" attributes. This reduces Cumulative Layout Shift (CLS) while loading the page.
|
||||||
|
- https://web.dev/optimize-cls/#images-without-dimensions
|
||||||
|
|
||||||
|
TODO: quote-tweet-icon
|
||||||
|
- show quote-tweets on a tweet
|
||||||
|
@ -53,7 +53,7 @@ func (app *Application) TweetDetail(w http.ResponseWriter, r *http.Request) {
|
|||||||
|
|
||||||
// Return whether the scrape succeeded (if false, we should 404)
|
// Return whether the scrape succeeded (if false, we should 404)
|
||||||
try_scrape_tweet := func() bool {
|
try_scrape_tweet := func() bool {
|
||||||
if app.DisableScraping {
|
if app.IsScrapingDisabled {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
trove, err := scraper.GetTweetFullAPIV2(tweet_id, 50) // TODO: parameterizable
|
trove, err := scraper.GetTweetFullAPIV2(tweet_id, 50) // TODO: parameterizable
|
||||||
|
@ -31,7 +31,7 @@ type Application struct {
|
|||||||
|
|
||||||
Profile persistence.Profile
|
Profile persistence.Profile
|
||||||
ActiveUser scraper.User
|
ActiveUser scraper.User
|
||||||
DisableScraping bool
|
IsScrapingDisabled bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewApp(profile persistence.Profile) Application {
|
func NewApp(profile persistence.Profile) Application {
|
||||||
@ -43,6 +43,7 @@ func NewApp(profile persistence.Profile) Application {
|
|||||||
|
|
||||||
Profile: profile,
|
Profile: profile,
|
||||||
ActiveUser: get_default_user(),
|
ActiveUser: get_default_user(),
|
||||||
|
IsScrapingDisabled: true, // Until an active user is set
|
||||||
}
|
}
|
||||||
ret.Middlewares = []Middleware{
|
ret.Middlewares = []Middleware{
|
||||||
secureHeaders,
|
secureHeaders,
|
||||||
@ -64,7 +65,7 @@ func (app *Application) SetActiveUser(handle scraper.UserHandle) error {
|
|||||||
if handle == "no account" {
|
if handle == "no account" {
|
||||||
scraper.InitApi(scraper.NewGuestSession())
|
scraper.InitApi(scraper.NewGuestSession())
|
||||||
app.ActiveUser = get_default_user()
|
app.ActiveUser = get_default_user()
|
||||||
app.DisableScraping = true // API requests will fail b/c not logged in
|
app.IsScrapingDisabled = true // API requests will fail b/c not logged in
|
||||||
} else {
|
} else {
|
||||||
user, err := app.Profile.GetUserByHandle(handle)
|
user, err := app.Profile.GetUserByHandle(handle)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -72,7 +73,7 @@ func (app *Application) SetActiveUser(handle scraper.UserHandle) error {
|
|||||||
}
|
}
|
||||||
scraper.InitApi(app.Profile.LoadSession(handle))
|
scraper.InitApi(app.Profile.LoadSession(handle))
|
||||||
app.ActiveUser = user
|
app.ActiveUser = user
|
||||||
app.DisableScraping = false
|
app.IsScrapingDisabled = false
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -139,6 +140,9 @@ func (app *Application) Run(address string) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
app.InfoLog.Printf("Starting server on %s", address)
|
app.InfoLog.Printf("Starting server on %s", address)
|
||||||
|
|
||||||
|
app.start_background()
|
||||||
|
|
||||||
err := srv.ListenAndServe()
|
err := srv.ListenAndServe()
|
||||||
app.ErrorLog.Fatal(err)
|
app.ErrorLog.Fatal(err)
|
||||||
}
|
}
|
||||||
|
@ -49,7 +49,7 @@ func selector(s string) cascadia.Sel {
|
|||||||
func do_request(req *http.Request) *http.Response {
|
func do_request(req *http.Request) *http.Response {
|
||||||
recorder := httptest.NewRecorder()
|
recorder := httptest.NewRecorder()
|
||||||
app := webserver.NewApp(profile)
|
app := webserver.NewApp(profile)
|
||||||
app.DisableScraping = true
|
app.IsScrapingDisabled = true
|
||||||
app.ServeHTTP(recorder, req)
|
app.ServeHTTP(recorder, req)
|
||||||
return recorder.Result()
|
return recorder.Result()
|
||||||
}
|
}
|
||||||
|
65
internal/webserver/stopwatch.go
Normal file
65
internal/webserver/stopwatch.go
Normal file
@ -0,0 +1,65 @@
|
|||||||
|
package webserver
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
var is_for_you_only = true // Do one initial scrape of the "for you" feed and then just regular feed after that
|
||||||
|
|
||||||
|
func (app *Application) background_scrape() {
|
||||||
|
// Avoid crashing the thread if a scrape fails
|
||||||
|
defer func() {
|
||||||
|
if err := recover(); err != nil {
|
||||||
|
// TODO
|
||||||
|
fmt.Println("Panicked!")
|
||||||
|
fmt.Printf("%#v\n", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
fmt.Println("Starting scrape...")
|
||||||
|
|
||||||
|
// Do nothing if scraping is currently disabled
|
||||||
|
if app.IsScrapingDisabled {
|
||||||
|
fmt.Println("Skipping scrape!")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
fmt.Println("Scraping...")
|
||||||
|
trove, err := scraper.GetHomeTimeline("", is_for_you_only)
|
||||||
|
if err != nil {
|
||||||
|
app.ErrorLog.Printf("Background scrape failed: %s", err.Error())
|
||||||
|
return
|
||||||
|
}
|
||||||
|
fmt.Println("Saving scrape results...")
|
||||||
|
app.Profile.SaveTweetTrove(trove)
|
||||||
|
fmt.Println("Scraping succeeded.")
|
||||||
|
is_for_you_only = false
|
||||||
|
}
|
||||||
|
|
||||||
|
func (app *Application) start_background() {
|
||||||
|
// Start a goroutine to run the background task every 3 minutes
|
||||||
|
fmt.Println("Starting background")
|
||||||
|
go func() {
|
||||||
|
fmt.Println("Starting routine")
|
||||||
|
|
||||||
|
// Initial delay before the first task execution (0 seconds here, adjust as needed)
|
||||||
|
initialDelay := 10 * time.Second
|
||||||
|
time.Sleep(initialDelay)
|
||||||
|
|
||||||
|
app.background_scrape()
|
||||||
|
|
||||||
|
// Create a timer that triggers the background task every 3 minutes
|
||||||
|
interval := 3 * time.Minute // TODO: parameterizable
|
||||||
|
timer := time.NewTicker(interval)
|
||||||
|
defer timer.Stop()
|
||||||
|
|
||||||
|
for range timer.C {
|
||||||
|
// Execute the background task
|
||||||
|
fmt.Println("Starting routine")
|
||||||
|
|
||||||
|
app.background_scrape()
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
@ -234,6 +234,8 @@ func (api *API) do_http_POST(url string, body string, result interface{}) error
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("Error executing HTTP POST request:\n %w", err)
|
return fmt.Errorf("Error executing HTTP POST request:\n %w", err)
|
||||||
}
|
}
|
||||||
|
api.update_csrf_token()
|
||||||
|
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
|
||||||
if resp.StatusCode != 200 {
|
if resp.StatusCode != 200 {
|
||||||
|
@ -954,7 +954,7 @@ func GetUserLikes(user_id UserID, cursor string) (TweetTrove, error) {
|
|||||||
return the_api.GetUserLikes(user_id, cursor)
|
return the_api.GetUserLikes(user_id, cursor)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (api API) GetHomeTimeline(cursor string, is_for_you bool) (TweetTrove, error) {
|
func (api *API) GetHomeTimeline(cursor string, is_for_you bool) (TweetTrove, error) {
|
||||||
var url string
|
var url string
|
||||||
body_struct := struct {
|
body_struct := struct {
|
||||||
Variables GraphqlVariables `json:"variables"`
|
Variables GraphqlVariables `json:"variables"`
|
||||||
|
Loading…
x
Reference in New Issue
Block a user