@@ -10,6 +10,28 @@ import { CheerioAPI } from "cheerio";
1010import { load } from "cheerio" ;
1111import { getFetcher } from "./fetcher" ;
1212
13+ // This is a wrapper around the fetch function that loads the page into a CheerioAPI
14+ // instance and returns the type of the page.
15+
16+ // By default, it also allows us to skip the adult banner.
17+ const fetchPage = async < ReturnType > ( {
18+ url,
19+ skipAdultBanner = true ,
20+ } : {
21+ url : string ;
22+ skipAdultBanner ?: boolean ;
23+ } ) => {
24+ return ( await load (
25+ await (
26+ await getFetcher ( ) ( url , {
27+ headers : {
28+ Cookie : skipAdultBanner ? "view_adult=true;" : "" ,
29+ } ,
30+ } )
31+ ) . text ( )
32+ ) ) as ReturnType ;
33+ } ;
34+
1335// We create separate interfaces for each page type to make sure that the
1436// correct type of page is passed to each method that extracts data.
1537// Other than this, all pages are instances of CheerioAPI and can be used interchangeably.
@@ -20,9 +42,9 @@ export interface TagWorksFeed extends CheerioAPI {
2042 kind : "TagWorksFeed" ;
2143}
2244export const loadTagWorksFeed = async ( { tagName } : { tagName : string } ) => {
23- return load (
24- await ( await getFetcher ( ) ( getTagWorksFeedUrl ( tagName ) ) ) . text ( )
25- ) as TagWorksFeed ;
45+ return await fetchPage < TagWorksFeed > ( {
46+ url : getTagWorksFeedUrl ( tagName ) ,
47+ } ) ;
2648} ;
2749
2850// A page showing the details of a tag.
@@ -31,7 +53,9 @@ export interface TagPage extends CheerioAPI {
3153 kind : "TagPage" ;
3254}
3355export const loadTagPage = async ( { tagName } : { tagName : string } ) => {
34- return load ( await ( await getFetcher ( ) ( getTagUrl ( tagName ) ) ) . text ( ) ) as TagPage ;
56+ return await fetchPage < TagPage > ( {
57+ url : getTagUrl ( tagName ) ,
58+ } ) ;
3559} ;
3660
3761// Atom feed of the most recent works featuring a tag.
@@ -40,9 +64,9 @@ export interface TagWorksAtomFeed extends CheerioAPI {
4064 kind : "TagWorksAtomFeed" ;
4165}
4266export const loadTagFeedAtomPage = async ( { tagId } : { tagId : string } ) => {
43- return load (
44- await ( await getFetcher ( ) ( getTagWorksFeedAtomUrl ( tagId ) ) ) . text ( )
45- ) as TagWorksAtomFeed ;
67+ return await fetchPage < TagWorksAtomFeed > ( {
68+ url : getTagWorksFeedAtomUrl ( tagId ) ,
69+ } ) ;
4670} ;
4771
4872// The first page of a work.
@@ -57,20 +81,9 @@ export const loadWorkPage = async ({
5781 workId : string ;
5882 chapterId ?: string ;
5983} ) => {
60- return load (
61- await (
62- await getFetcher ( ) ( getWorkUrl ( { workId, chapterId } ) , {
63- headers : {
64- // We set a cookie to bypass the Terms of Service agreement modal that
65- // appears when viewing works as a guest, which prevented some
66- // selectors from working. Appending ?view_adult=true to URLs doesn't
67- // work for chaptered works since that part gets cleared when those
68- // are automatically redirected.
69- Cookie : "view_adult=true;" ,
70- } ,
71- } )
72- ) . text ( )
73- ) as WorkPage ;
84+ return await fetchPage < WorkPage > ( {
85+ url : getWorkUrl ( { workId, chapterId } ) ,
86+ } ) ;
7487} ;
7588
7689// A user profile page.
@@ -83,29 +96,25 @@ export const loadUserProfilePage = async ({
8396} : {
8497 username : string ;
8598} ) => {
86- return load (
87- await ( await getFetcher ( ) ( getUserProfileUrl ( { username } ) ) ) . text ( )
88- ) as UserProfile ;
99+ return await fetchPage < UserProfile > ( {
100+ url : getUserProfileUrl ( { username } ) ,
101+ } ) ;
89102} ;
90103
91104export interface ChapterIndexPage extends CheerioAPI {
92105 kind : "ChapterIndexPage" ;
93106}
94107export const loadChaptersIndexPage = async ( { workId } : { workId : string } ) => {
95- return load (
96- await (
97- await getFetcher ( ) ( `https://archiveofourown.org/works/${ workId } /navigate` )
98- ) . text ( )
99- ) as ChapterIndexPage ;
108+ return await fetchPage < ChapterIndexPage > ( {
109+ url : `https://archiveofourown.org/works/${ workId } /navigate` ,
110+ } ) ;
100111} ;
101112
102113export interface SeriesPage extends CheerioAPI {
103114 kind : "SeriesPage" ;
104115}
105116export const loadSeriesPage = async ( seriesId : string ) => {
106- return load (
107- await (
108- await getFetcher ( ) ( `https://archiveofourown.org/series/${ seriesId } ` )
109- ) . text ( )
110- ) as SeriesPage ;
117+ return await fetchPage < SeriesPage > ( {
118+ url : `https://archiveofourown.org/series/${ seriesId } ` ,
119+ } ) ;
111120} ;
0 commit comments