Skip to content

Commit a0c7762

Browse files
authored
feat: scraping des articles depuis un drupal #215 (#501)
1 parent 49aaf02 commit a0c7762

File tree

18 files changed

+1623
-207
lines changed

18 files changed

+1623
-207
lines changed

.docker/Dockerfile

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,26 @@ RUN APP_ENV=prod composer install --prefer-dist --no-dev --no-cache --optimize-a
1919
#----------------------------------------------------------------------
2020
# Installer les dépendances JavaScript/nodejs dans une couche dédiée
2121
#----------------------------------------------------------------------
22-
FROM ${registry}/library/node:20-alpine AS deps_nodejs
22+
FROM ${registry}/library/node:20-alpine AS deps_nodejs_build
2323
ARG project_dir
2424
RUN mkdir -p ${project_dir}
2525
WORKDIR ${project_dir}
2626

2727
RUN apk add --no-cache git
2828

2929
COPY --from=deps_php ${project_dir} .
30-
RUN yarn install --production --frozen-lockfile \
30+
RUN yarn install --production=false --frozen-lockfile \
3131
&& yarn build \
3232
&& yarn cache clean
3333

34+
FROM ${registry}/library/node:20-alpine AS deps_nodejs_runtime
35+
ARG project_dir
36+
RUN mkdir -p ${project_dir}
37+
WORKDIR ${project_dir}
38+
39+
COPY --from=deps_php ${project_dir} .
40+
RUN yarn install --production=true --frozen-lockfile --ignore-scripts
41+
3442
#----------------------------------------------------------------------
3543
# Créer l'image de base de l'application
3644
#----------------------------------------------------------------------
@@ -56,6 +64,7 @@ RUN apt-get -qq update \
5664
&& apt-get -qq install --no-install-recommends -y \
5765
unzip zip \
5866
curl wget \
67+
rclone \
5968
&& apt-get -qq clean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
6069

6170
#------------------------------------------------------------------------
@@ -143,7 +152,10 @@ WORKDIR ${project_dir}
143152
COPY --exclude=.git --exclude=var . .
144153
COPY --from=deps_php ${project_dir}/vendor vendor
145154
COPY --from=deps_php ${project_dir}/public/bundles public/bundles
146-
COPY --from=deps_nodejs ${project_dir}/public/build public/build
155+
COPY --from=deps_nodejs_build ${project_dir}/public/build public/build
156+
COPY --from=deps_nodejs_runtime ${project_dir}/node_modules node_modules
157+
COPY --from=node:20-slim /usr/local/lib/node_modules /usr/local/lib/node_modules
158+
COPY --from=node:20-slim /usr/local/bin/node /usr/local/bin/node
147159

148160
RUN mkdir -p ${project_dir}/var \
149161
&& chown -R www-data:www-data ${project_dir}/var/
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
#!/bin/bash
2+
3+
node bin/articles-scraper.mjs

.eslintrc.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,12 @@
4646
"rules": {
4747
"react/jsx-uses-react": "error" // import React from 'react' est nécessaire seulement dans les fichiers javascript (non-typescript)
4848
}
49+
},
50+
{
51+
"files": ["bin/**"],
52+
"env": {
53+
"node": true
54+
}
4955
}
5056
],
5157
"settings": {

assets/modules/entrepot/breadcrumbs/Breadcrumb.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ const getBreadcrumb = (route: Route<typeof routes>, datastore?: Datastore): Brea
3333
defaultProps.segments.push({ label: t("contact"), linkProps: routes.contact().link });
3434
return { ...defaultProps, currentPageLabel: t(route.name) };
3535
case "news_list":
36+
case "news_list_by_tag":
3637
return { ...defaultProps, currentPageLabel: t("news") };
3738
case "news_article":
3839
// géré dans le composant NewsArticle

assets/pages/news/NewsArticle.tsx

Lines changed: 53 additions & 56 deletions
Original file line numberDiff line numberDiff line change
@@ -1,87 +1,84 @@
11
import { fr } from "@codegouvfr/react-dsfr";
2-
import Tag from "@codegouvfr/react-dsfr/Tag";
3-
import { FC } from "react";
2+
import Alert from "@codegouvfr/react-dsfr/Alert";
3+
import { useQuery } from "@tanstack/react-query";
4+
import { FC, useMemo } from "react";
45
import { symToStr } from "tsafe/symToStr";
56

67
import { type NewsArticle } from "../../@types/newsArticle";
7-
import articles from "../../data/actualites.json";
8+
import Main from "../../components/Layout/Main";
9+
import LoadingText from "../../components/Utils/LoadingText";
810
import { useTranslation } from "../../i18n/i18n";
9-
import { appRoot, routes } from "../../router/router";
10-
import { formatDateFromISO } from "../../utils";
11+
import SymfonyRouting from "../../modules/Routing";
12+
import { routes } from "../../router/router";
1113
import PageNotFound from "../error/PageNotFound";
12-
import Main from "../../components/Layout/Main";
1314

1415
type NewsArticleProps = {
1516
slug: string;
1617
};
1718

1819
const NewsArticle: FC<NewsArticleProps> = ({ slug }) => {
20+
const { t: tCommon } = useTranslation("Common");
1921
const { t: tBreadcrumb } = useTranslation("Breadcrumb");
2022

21-
const newsArticle: NewsArticle | undefined = articles[slug];
23+
const articleQuery = useQuery({
24+
queryKey: ["articles", "slug", slug],
25+
queryFn: async ({ signal }) => {
26+
const url = SymfonyRouting.generate("cartesgouvfr_s3_gateway_get_content", {
27+
path: `articles/${slug}.html`,
28+
});
29+
const response = await fetch(url, { signal });
30+
31+
if (!response.ok) {
32+
return Promise.reject({
33+
message: "Fetching articles failed",
34+
code: response.status,
35+
});
36+
}
37+
38+
const text = await response.text();
39+
return text;
40+
},
41+
});
2242

23-
if (newsArticle === undefined) {
43+
const documentTitle = useMemo(() => {
44+
if (articleQuery.data === undefined) return undefined;
45+
46+
const parser = new DOMParser();
47+
const htmlDoc = parser.parseFromString(articleQuery.data, "text/html");
48+
49+
return htmlDoc.querySelector("h1")?.innerText;
50+
}, [articleQuery.data]);
51+
52+
// @ts-expect-error fausse alerte
53+
if (articleQuery.error?.code === 404) {
2454
return <PageNotFound />;
2555
}
2656

27-
const tags = newsArticle?.tags?.map((tag, i) => (
28-
<Tag key={`${slug}_tag_${i}`} className={fr.cx("fr-mr-2v")}>
29-
{tag}
30-
</Tag>
31-
));
32-
3357
return (
3458
<Main
3559
customBreadcrumbProps={{
3660
homeLinkProps: routes.home().link,
3761
segments: [{ label: tBreadcrumb("news"), linkProps: routes.news_list().link }],
38-
currentPageLabel: newsArticle?.breadcrumb ?? newsArticle.title,
62+
currentPageLabel: documentTitle,
3963
}}
40-
title={newsArticle?.title}
64+
title={documentTitle}
4165
>
42-
<div className={fr.cx("fr-grid-row")}>
43-
<div className={fr.cx("fr-col-12", "fr-col-md-8")}>
44-
<div className={fr.cx("fr-grid-row", "fr-grid-row--center")}>
45-
<div className={fr.cx("fr-tags-group")}>{tags}</div>
46-
</div>
47-
<div className={fr.cx("fr-grid-row", "fr-grid-row--center")}>
48-
<h1>{newsArticle?.title}</h1>
49-
</div>
50-
<div className={fr.cx("fr-grid-row", "fr-grid-row--center")}>
51-
<p dangerouslySetInnerHTML={{ __html: newsArticle.short_description ?? "" }} />
52-
</div>
53-
<div className={fr.cx("fr-grid-row", "fr-grid-row--center")}>
54-
<p
55-
style={{
56-
fontStyle: "italic",
57-
color: fr.colors.decisions.text.mention.grey.default,
58-
}}
59-
>
60-
<i className="ri-article-line" />
61-
&nbsp;Publié le {formatDateFromISO(newsArticle.date)}
62-
</p>
63-
</div>
66+
{articleQuery.isLoading && <LoadingText message="Actualités" as="h1" withSpinnerIcon={true} />}
6467

65-
<div className={fr.cx("fr-grid-row", "fr-grid-row--center")}>
66-
<figure className={fr.cx("fr-content-media")} role="group" aria-label={newsArticle?.thumbnail_caption}>
67-
<div className={fr.cx("fr-content-media__img")}>
68-
<img
69-
className={fr.cx("fr-responsive-img")}
70-
src={`${appRoot}/${newsArticle.thumbnail_url}`}
71-
alt={newsArticle?.thumbnail_alt}
72-
role="presentation"
73-
data-fr-js-ratio="true"
74-
/>
75-
</div>
76-
<figcaption className={fr.cx("fr-content-media__caption")}>{newsArticle?.thumbnail_caption}</figcaption>
77-
</figure>
78-
</div>
68+
{articleQuery.error && (
69+
<Alert severity={"error"} title={tCommon("error")} description={articleQuery.error?.message} className={fr.cx("fr-my-3w")} />
70+
)}
7971

80-
<div className={fr.cx("fr-grid-row", "fr-mt-2w")}>
81-
<div className={fr.cx("fr-col")} dangerouslySetInnerHTML={{ __html: newsArticle.content }} />
82-
</div>
72+
{articleQuery.data && (
73+
<div className={fr.cx("fr-grid-row")}>
74+
<div
75+
className={fr.cx("fr-col-12", "fr-col-md-8")}
76+
dangerouslySetInnerHTML={{
77+
__html: articleQuery.data,
78+
}}
79+
/>
8380
</div>
84-
</div>
81+
)}
8582
</Main>
8683
);
8784
};

assets/pages/news/NewsList.tsx

Lines changed: 86 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -1,62 +1,103 @@
11
import { fr } from "@codegouvfr/react-dsfr";
2-
import { Card } from "@codegouvfr/react-dsfr/Card";
3-
import { Tag } from "@codegouvfr/react-dsfr/Tag";
4-
import { FC } from "react";
2+
import Alert from "@codegouvfr/react-dsfr/Alert";
3+
import { useQuery } from "@tanstack/react-query";
4+
import { FC, useEffect } from "react";
55
import { symToStr } from "tsafe/symToStr";
66

7-
import articles from "../../data/actualites.json";
8-
import { appRoot, routes } from "../../router/router";
9-
import { type NewsArticle } from "../../@types/newsArticle";
10-
import { formatDateFromISO } from "../../utils";
117
import Main from "../../components/Layout/Main";
8+
import LoadingText from "../../components/Utils/LoadingText";
9+
import { useTranslation } from "../../i18n/i18n";
10+
import SymfonyRouting from "../../modules/Routing";
11+
import { routes } from "../../router/router";
1212

13-
type NewsListItemProps = {
14-
slug: string;
15-
newsArticle: NewsArticle;
13+
// NOTE pour que la commande "react-dsfr update-icons" inclue l'icone article dans les assets qui est utilisée dans les articles
14+
// fr-icon-article-line
15+
16+
type NewsListProps = {
17+
page: number;
18+
tag?: string;
1619
};
20+
const NewsList: FC<NewsListProps> = ({ page = 0, tag }) => {
21+
const { t: tCommon } = useTranslation("Common");
1722

18-
const NewsListItem: FC<NewsListItemProps> = ({ slug, newsArticle }) => {
19-
const SHORT_DESC_MAX_CHAR = 120;
23+
const articlesListQuery = useQuery({
24+
queryKey: ["articles", "list", tag, page],
25+
queryFn: async ({ signal }) => {
26+
const url = SymfonyRouting.generate("cartesgouvfr_s3_gateway_get_content", {
27+
path: tag ? `articles/list/tags/${tag}/${page}.html` : `articles/list/${page}.html`,
28+
});
29+
const response = await fetch(url, { signal });
2030

21-
const tags = newsArticle?.tags?.map((tag, i) => <Tag key={`${slug}_tag_${i}`}>{tag}</Tag>);
31+
if (!response.ok) {
32+
return Promise.reject({
33+
message: "Fetching articles failed",
34+
code: response.status,
35+
});
36+
}
2237

23-
return (
24-
<div className={fr.cx("fr-col-sm-12", "fr-col-md-4", "fr-col-lg-4")}>
25-
<Card
26-
start={<div className={fr.cx("fr-tags-group")}>{tags}</div>}
27-
desc={
28-
<span
29-
dangerouslySetInnerHTML={{
30-
__html:
31-
newsArticle?.short_description && newsArticle?.short_description.length > SHORT_DESC_MAX_CHAR
32-
? newsArticle?.short_description.substring(0, 100) + "..."
33-
: (newsArticle?.short_description ?? ""),
34-
}}
35-
/>
38+
const text = await response.text();
39+
return text;
40+
},
41+
});
42+
43+
// @ts-expect-error fausse alerte
44+
if (articlesListQuery.error?.code === 404) {
45+
routes.news_list({ page: 0 }).push();
46+
}
47+
48+
// gestion des liens de navigation côté client pour les tags et les cartes d'articles
49+
useEffect(() => {
50+
const handleTagClick = (event: MouseEvent) => {
51+
const target = event.target as HTMLElement;
52+
if (target.tagName === "A" && target.classList?.contains("fr-tag")) {
53+
event.preventDefault();
54+
const href = (target as HTMLAnchorElement).href;
55+
const tag = href.split("/")?.[5];
56+
if (tag) {
57+
routes.news_list_by_tag({ page: 0, tag }).push();
58+
} else {
59+
routes.news_list({ page: 0 }).push();
3660
}
37-
detail={newsArticle?.date && formatDateFromISO(newsArticle?.date)}
38-
enlargeLink
39-
imageAlt={newsArticle?.thumbnail_alt ?? "Vignette de l’article"}
40-
imageUrl={`${appRoot}/${newsArticle.thumbnail_url}`}
41-
linkProps={routes.news_article({ slug }).link}
42-
title={<span dangerouslySetInnerHTML={{ __html: newsArticle?.title ?? "" }} />}
43-
titleAs="h2"
44-
/>
45-
</div>
46-
);
47-
};
48-
NewsListItem.displayName = symToStr({ NewsListItem });
61+
}
62+
};
63+
64+
const handleCardClick = (event: MouseEvent) => {
65+
const target = event.target as HTMLElement;
66+
if (target.tagName === "A" && target.parentElement?.className.includes("fr-card")) {
67+
event.preventDefault();
68+
const href = (target as HTMLAnchorElement).href;
69+
const slug = href.split("/")?.[4];
70+
if (slug) {
71+
routes.news_article({ slug }).push();
72+
}
73+
}
74+
};
75+
76+
document.addEventListener("click", handleTagClick);
77+
document.addEventListener("click", handleCardClick);
78+
79+
return () => {
80+
document.removeEventListener("click", handleTagClick);
81+
document.removeEventListener("click", handleCardClick);
82+
};
83+
}, []);
4984

50-
const NewsList = () => {
5185
return (
5286
<Main title="Actualités">
53-
<div className={fr.cx("fr-container")}>
54-
<h1>Actualités</h1>
87+
{articlesListQuery.isLoading && <LoadingText message="Actualités" as="h1" withSpinnerIcon={true} />}
88+
89+
{articlesListQuery.error && (
90+
<Alert severity={"error"} title={tCommon("error")} description={articlesListQuery.error?.message} className={fr.cx("fr-my-3w")} />
91+
)}
5592

56-
<div className={fr.cx("fr-grid-row", "fr-grid-row--gutters")}>
57-
{Object.entries(articles)?.map(([slug, article]) => <NewsListItem key={slug} slug={slug} newsArticle={article} />)}
58-
</div>
59-
</div>
93+
{articlesListQuery.data && (
94+
<div
95+
tabIndex={-1}
96+
dangerouslySetInnerHTML={{
97+
__html: articlesListQuery.data,
98+
}}
99+
/>
100+
)}
60101
</Main>
61102
);
62103
};

assets/router/GroupApp.tsx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,9 @@ function GroupApp(props: IGroupAppProps) {
6565
case "contact_confirmation":
6666
return { render: <ContactConfirmation /> };
6767
case "news_list":
68-
return { render: <NewsList /> };
68+
return { render: <NewsList page={route.params.page} /> };
69+
case "news_list_by_tag":
70+
return { render: <NewsList page={route.params.page} tag={route.params.tag} /> };
6971
case "news_article":
7072
return { render: <NewsArticle slug={route.params.slug} /> };
7173
case "faq":

assets/router/router.ts

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,19 @@ const publicRoutes = {
1818
documentation: defineRoute(`${appRoot}/documentation`),
1919
contact: defineRoute(`${appRoot}/nous-ecrire`),
2020
contact_confirmation: defineRoute(`${appRoot}/nous-ecrire/demande-envoyee`),
21-
news_list: defineRoute(`${appRoot}/actualites`),
21+
news_list: defineRoute(
22+
{
23+
page: param.query.optional.number.default(0),
24+
},
25+
() => `${appRoot}/actualites`
26+
),
27+
news_list_by_tag: defineRoute(
28+
{
29+
tag: param.path.optional.string,
30+
page: param.query.optional.number.default(0),
31+
},
32+
(p) => `${appRoot}/actualites/liste/${p.tag}`
33+
),
2234
news_article: defineRoute(
2335
{
2436
slug: param.path.string,

0 commit comments

Comments
 (0)