diff --git a/.woodpecker/build.yml b/.woodpecker/build.yml index 1ab1eee..b4edd52 100644 --- a/.woodpecker/build.yml +++ b/.woodpecker/build.yml @@ -20,8 +20,8 @@ pipeline: secrets: - offline_twatter_passwd when: # At least one - path: pkg/scraper/** - branch: release-* + - path: pkg/scraper/** + - branch: release-* commands: - cd cmd - ./tests.sh diff --git a/doc/TODO.txt b/doc/TODO.txt index 21cd2de..8fc051c 100644 --- a/doc/TODO.txt +++ b/doc/TODO.txt @@ -97,7 +97,7 @@ TODO const-select-fields TODO compound-query-structs - Right now the result structs for the compound queries (e.g., "Feed", "TweetDetail") are in the `persistence` package. They don't go there probably -TOOD: login-routes-tests +TODO: login-routes-tests - Make the scraper.API object injectable somehow (get rid of singleton pattern) and add tests for login and change-session sequences - Also test profile.ListSessions() @@ -111,13 +111,15 @@ TODO: webserver-session-arg-active-user TODO: webserver-tombstones -TODO: fetch-timeline -- HTMX polling element on base template - TODO: progressive-web-app TODO: paste-twitter-urls-in-search-bar - pasting a link from twitter.com into the search bar should handle it properly - - tweet detail - - user profile - space + +TODO: image-width-and-height +- Images should have explicit "width" and "height" attributes. This reduces Cumulative Layout Shift (CLS) while loading the page. +- https://web.dev/optimize-cls/#images-without-dimensions + +TODO: quote-tweet-icon +- show quote-tweets on a tweet diff --git a/internal/webserver/handler_tweet_detail.go b/internal/webserver/handler_tweet_detail.go index 92e8f77..3dbcfe6 100644 --- a/internal/webserver/handler_tweet_detail.go +++ b/internal/webserver/handler_tweet_detail.go @@ -53,7 +53,7 @@ func (app *Application) TweetDetail(w http.ResponseWriter, r *http.Request) { // Return whether the scrape succeeded (if false, we should 404) try_scrape_tweet := func() bool { - if app.DisableScraping { + if app.IsScrapingDisabled { return false } trove, err := scraper.GetTweetFullAPIV2(tweet_id, 50) // TODO: parameterizable diff --git a/internal/webserver/server.go b/internal/webserver/server.go index 7cda2d6..d986789 100644 --- a/internal/webserver/server.go +++ b/internal/webserver/server.go @@ -29,9 +29,9 @@ type Application struct { Middlewares []Middleware - Profile persistence.Profile - ActiveUser scraper.User - DisableScraping bool + Profile persistence.Profile + ActiveUser scraper.User + IsScrapingDisabled bool } func NewApp(profile persistence.Profile) Application { @@ -41,8 +41,9 @@ func NewApp(profile persistence.Profile) Application { InfoLog: log.New(os.Stdout, "INFO\t", log.Ldate|log.Ltime), ErrorLog: log.New(os.Stderr, "ERROR\t", log.Ldate|log.Ltime|log.Lshortfile), - Profile: profile, - ActiveUser: get_default_user(), + Profile: profile, + ActiveUser: get_default_user(), + IsScrapingDisabled: true, // Until an active user is set } ret.Middlewares = []Middleware{ secureHeaders, @@ -64,7 +65,7 @@ func (app *Application) SetActiveUser(handle scraper.UserHandle) error { if handle == "no account" { scraper.InitApi(scraper.NewGuestSession()) app.ActiveUser = get_default_user() - app.DisableScraping = true // API requests will fail b/c not logged in + app.IsScrapingDisabled = true // API requests will fail b/c not logged in } else { user, err := app.Profile.GetUserByHandle(handle) if err != nil { @@ -72,7 +73,7 @@ func (app *Application) SetActiveUser(handle scraper.UserHandle) error { } scraper.InitApi(app.Profile.LoadSession(handle)) app.ActiveUser = user - app.DisableScraping = false + app.IsScrapingDisabled = false } return nil } @@ -139,6 +140,9 @@ func (app *Application) Run(address string) { } app.InfoLog.Printf("Starting server on %s", address) + + app.start_background() + err := srv.ListenAndServe() app.ErrorLog.Fatal(err) } diff --git a/internal/webserver/server_test.go b/internal/webserver/server_test.go index 573f3ee..060f085 100644 --- a/internal/webserver/server_test.go +++ b/internal/webserver/server_test.go @@ -49,7 +49,7 @@ func selector(s string) cascadia.Sel { func do_request(req *http.Request) *http.Response { recorder := httptest.NewRecorder() app := webserver.NewApp(profile) - app.DisableScraping = true + app.IsScrapingDisabled = true app.ServeHTTP(recorder, req) return recorder.Result() } diff --git a/internal/webserver/stopwatch.go b/internal/webserver/stopwatch.go new file mode 100644 index 0000000..af4a7c6 --- /dev/null +++ b/internal/webserver/stopwatch.go @@ -0,0 +1,65 @@ +package webserver + +import ( + "fmt" + "gitlab.com/offline-twitter/twitter_offline_engine/pkg/scraper" + "time" +) + +var is_for_you_only = true // Do one initial scrape of the "for you" feed and then just regular feed after that + +func (app *Application) background_scrape() { + // Avoid crashing the thread if a scrape fails + defer func() { + if err := recover(); err != nil { + // TODO + fmt.Println("Panicked!") + fmt.Printf("%#v\n", err) + } + }() + + fmt.Println("Starting scrape...") + + // Do nothing if scraping is currently disabled + if app.IsScrapingDisabled { + fmt.Println("Skipping scrape!") + return + } + + fmt.Println("Scraping...") + trove, err := scraper.GetHomeTimeline("", is_for_you_only) + if err != nil { + app.ErrorLog.Printf("Background scrape failed: %s", err.Error()) + return + } + fmt.Println("Saving scrape results...") + app.Profile.SaveTweetTrove(trove) + fmt.Println("Scraping succeeded.") + is_for_you_only = false +} + +func (app *Application) start_background() { + // Start a goroutine to run the background task every 3 minutes + fmt.Println("Starting background") + go func() { + fmt.Println("Starting routine") + + // Initial delay before the first task execution (0 seconds here, adjust as needed) + initialDelay := 10 * time.Second + time.Sleep(initialDelay) + + app.background_scrape() + + // Create a timer that triggers the background task every 3 minutes + interval := 3 * time.Minute // TODO: parameterizable + timer := time.NewTicker(interval) + defer timer.Stop() + + for range timer.C { + // Execute the background task + fmt.Println("Starting routine") + + app.background_scrape() + } + }() +} diff --git a/pkg/scraper/api_request_utils.go b/pkg/scraper/api_request_utils.go index ad8b658..d71edbd 100644 --- a/pkg/scraper/api_request_utils.go +++ b/pkg/scraper/api_request_utils.go @@ -234,6 +234,8 @@ func (api *API) do_http_POST(url string, body string, result interface{}) error if err != nil { return fmt.Errorf("Error executing HTTP POST request:\n %w", err) } + api.update_csrf_token() + defer resp.Body.Close() if resp.StatusCode != 200 { diff --git a/pkg/scraper/api_types_v2.go b/pkg/scraper/api_types_v2.go index cb4b898..519f4e4 100644 --- a/pkg/scraper/api_types_v2.go +++ b/pkg/scraper/api_types_v2.go @@ -954,7 +954,7 @@ func GetUserLikes(user_id UserID, cursor string) (TweetTrove, error) { return the_api.GetUserLikes(user_id, cursor) } -func (api API) GetHomeTimeline(cursor string, is_for_you bool) (TweetTrove, error) { +func (api *API) GetHomeTimeline(cursor string, is_for_you bool) (TweetTrove, error) { var url string body_struct := struct { Variables GraphqlVariables `json:"variables"`