Add background scraping of the logged-in user's home timeline
This commit is contained in:
parent
09356ecc04
commit
8349ca4ae3
@ -20,8 +20,8 @@ pipeline:
|
||||
secrets:
|
||||
- offline_twatter_passwd
|
||||
when: # At least one
|
||||
path: pkg/scraper/**
|
||||
branch: release-*
|
||||
- path: pkg/scraper/**
|
||||
- branch: release-*
|
||||
commands:
|
||||
- cd cmd
|
||||
- ./tests.sh
|
||||
|
14
doc/TODO.txt
14
doc/TODO.txt
@ -97,7 +97,7 @@ TODO const-select-fields
|
||||
TODO compound-query-structs
|
||||
- Right now the result structs for the compound queries (e.g., "Feed", "TweetDetail") are in the `persistence` package. They don't go there probably
|
||||
|
||||
TOOD: login-routes-tests
|
||||
TODO: login-routes-tests
|
||||
- Make the scraper.API object injectable somehow (get rid of singleton pattern) and add tests for login and change-session sequences
|
||||
- Also test profile.ListSessions()
|
||||
|
||||
@ -111,13 +111,15 @@ TODO: webserver-session-arg-active-user
|
||||
|
||||
TODO: webserver-tombstones
|
||||
|
||||
TODO: fetch-timeline
|
||||
- HTMX polling element on base template
|
||||
|
||||
TODO: progressive-web-app
|
||||
|
||||
TODO: paste-twitter-urls-in-search-bar
|
||||
- pasting a link from twitter.com into the search bar should handle it properly
|
||||
- tweet detail
|
||||
- user profile
|
||||
- space
|
||||
|
||||
TODO: image-width-and-height
|
||||
- Images should have explicit "width" and "height" attributes. This reduces Cumulative Layout Shift (CLS) while loading the page.
|
||||
- https://web.dev/optimize-cls/#images-without-dimensions
|
||||
|
||||
TODO: quote-tweet-icon
|
||||
- show quote-tweets on a tweet
|
||||
|
@ -53,7 +53,7 @@ func (app *Application) TweetDetail(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
// Return whether the scrape succeeded (if false, we should 404)
|
||||
try_scrape_tweet := func() bool {
|
||||
if app.DisableScraping {
|
||||
if app.IsScrapingDisabled {
|
||||
return false
|
||||
}
|
||||
trove, err := scraper.GetTweetFullAPIV2(tweet_id, 50) // TODO: parameterizable
|
||||
|
@ -31,7 +31,7 @@ type Application struct {
|
||||
|
||||
Profile persistence.Profile
|
||||
ActiveUser scraper.User
|
||||
DisableScraping bool
|
||||
IsScrapingDisabled bool
|
||||
}
|
||||
|
||||
func NewApp(profile persistence.Profile) Application {
|
||||
@ -43,6 +43,7 @@ func NewApp(profile persistence.Profile) Application {
|
||||
|
||||
Profile: profile,
|
||||
ActiveUser: get_default_user(),
|
||||
IsScrapingDisabled: true, // Until an active user is set
|
||||
}
|
||||
ret.Middlewares = []Middleware{
|
||||
secureHeaders,
|
||||
@ -64,7 +65,7 @@ func (app *Application) SetActiveUser(handle scraper.UserHandle) error {
|
||||
if handle == "no account" {
|
||||
scraper.InitApi(scraper.NewGuestSession())
|
||||
app.ActiveUser = get_default_user()
|
||||
app.DisableScraping = true // API requests will fail b/c not logged in
|
||||
app.IsScrapingDisabled = true // API requests will fail b/c not logged in
|
||||
} else {
|
||||
user, err := app.Profile.GetUserByHandle(handle)
|
||||
if err != nil {
|
||||
@ -72,7 +73,7 @@ func (app *Application) SetActiveUser(handle scraper.UserHandle) error {
|
||||
}
|
||||
scraper.InitApi(app.Profile.LoadSession(handle))
|
||||
app.ActiveUser = user
|
||||
app.DisableScraping = false
|
||||
app.IsScrapingDisabled = false
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@ -139,6 +140,9 @@ func (app *Application) Run(address string) {
|
||||
}
|
||||
|
||||
app.InfoLog.Printf("Starting server on %s", address)
|
||||
|
||||
app.start_background()
|
||||
|
||||
err := srv.ListenAndServe()
|
||||
app.ErrorLog.Fatal(err)
|
||||
}
|
||||
|
@ -49,7 +49,7 @@ func selector(s string) cascadia.Sel {
|
||||
func do_request(req *http.Request) *http.Response {
|
||||
recorder := httptest.NewRecorder()
|
||||
app := webserver.NewApp(profile)
|
||||
app.DisableScraping = true
|
||||
app.IsScrapingDisabled = true
|
||||
app.ServeHTTP(recorder, req)
|
||||
return recorder.Result()
|
||||
}
|
||||
|
65
internal/webserver/stopwatch.go
Normal file
65
internal/webserver/stopwatch.go
Normal file
@ -0,0 +1,65 @@
|
||||
package webserver
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper"
|
||||
"time"
|
||||
)
|
||||
|
||||
var is_for_you_only = true // Do one initial scrape of the "for you" feed and then just regular feed after that
|
||||
|
||||
func (app *Application) background_scrape() {
|
||||
// Avoid crashing the thread if a scrape fails
|
||||
defer func() {
|
||||
if err := recover(); err != nil {
|
||||
// TODO
|
||||
fmt.Println("Panicked!")
|
||||
fmt.Printf("%#v\n", err)
|
||||
}
|
||||
}()
|
||||
|
||||
fmt.Println("Starting scrape...")
|
||||
|
||||
// Do nothing if scraping is currently disabled
|
||||
if app.IsScrapingDisabled {
|
||||
fmt.Println("Skipping scrape!")
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Println("Scraping...")
|
||||
trove, err := scraper.GetHomeTimeline("", is_for_you_only)
|
||||
if err != nil {
|
||||
app.ErrorLog.Printf("Background scrape failed: %s", err.Error())
|
||||
return
|
||||
}
|
||||
fmt.Println("Saving scrape results...")
|
||||
app.Profile.SaveTweetTrove(trove)
|
||||
fmt.Println("Scraping succeeded.")
|
||||
is_for_you_only = false
|
||||
}
|
||||
|
||||
func (app *Application) start_background() {
|
||||
// Start a goroutine to run the background task every 3 minutes
|
||||
fmt.Println("Starting background")
|
||||
go func() {
|
||||
fmt.Println("Starting routine")
|
||||
|
||||
// Initial delay before the first task execution (0 seconds here, adjust as needed)
|
||||
initialDelay := 10 * time.Second
|
||||
time.Sleep(initialDelay)
|
||||
|
||||
app.background_scrape()
|
||||
|
||||
// Create a timer that triggers the background task every 3 minutes
|
||||
interval := 3 * time.Minute // TODO: parameterizable
|
||||
timer := time.NewTicker(interval)
|
||||
defer timer.Stop()
|
||||
|
||||
for range timer.C {
|
||||
// Execute the background task
|
||||
fmt.Println("Starting routine")
|
||||
|
||||
app.background_scrape()
|
||||
}
|
||||
}()
|
||||
}
|
@ -234,6 +234,8 @@ func (api *API) do_http_POST(url string, body string, result interface{}) error
|
||||
if err != nil {
|
||||
return fmt.Errorf("Error executing HTTP POST request:\n %w", err)
|
||||
}
|
||||
api.update_csrf_token()
|
||||
|
||||
defer resp.Body.Close()
|
||||
|
||||
if resp.StatusCode != 200 {
|
||||
|
@ -954,7 +954,7 @@ func GetUserLikes(user_id UserID, cursor string) (TweetTrove, error) {
|
||||
return the_api.GetUserLikes(user_id, cursor)
|
||||
}
|
||||
|
||||
func (api API) GetHomeTimeline(cursor string, is_for_you bool) (TweetTrove, error) {
|
||||
func (api *API) GetHomeTimeline(cursor string, is_for_you bool) (TweetTrove, error) {
|
||||
var url string
|
||||
body_struct := struct {
|
||||
Variables GraphqlVariables `json:"variables"`
|
||||
|
Loading…
x
Reference in New Issue
Block a user