Merge pull request #4 from PreferredAI/fix-crawler

hieuddo · web-flow · commit 9f58d3c08e27 · 2025-12-17T01:41:58.000+08:00
diff --git a/.github/workflows/nextjs.yml b/.github/workflows/nextjs.yml
@@ -8,10 +8,20 @@ on:
   # Runs on pushes targeting the default branch
   push:
     branches: ["main"]
+    paths-ignore:
+      # If there are changes to these files, wait for the update-publications workflow to finish
+      - "scripts/crawl-publications.ts"
+      - ".github/workflows/update-publications.yml"
 
   # Allows you to run this workflow manually from the Actions tab
   workflow_dispatch:
 
+  # Run after update-publications workflow
+  workflow_run:
+    workflows: ["Update Publications"]
+    types:
+      - completed
+
 # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages
 permissions:
   contents: read
diff --git a/.github/workflows/update-publications.yml b/.github/workflows/update-publications.yml
@@ -6,6 +6,11 @@ on:
   schedule:
     - cron: "0 0 * * 0"
 
+  push:
+    paths:
+      - ".github/workflows/update-publications.yml"
+      - "scripts/crawl-publications.ts"
+
   # Allow manual trigger
   workflow_dispatch:
 
diff --git a/scripts/crawl-publications.ts b/scripts/crawl-publications.ts
@@ -309,7 +309,7 @@ function parsePublicationFromLi(liContent: string): Publication | null {
     const isPaperHost = PAPER_HOST_DOMAINS.some(domain => url.includes(domain));
     const isTitle = isPaperHost &&
       text.length > MIN_TITLE_LINK_TEXT_LENGTH &&
-      !text.toLowerCase().includes('code');
+      (!text.toLowerCase().includes('code') || text.length > 30);
 
     links.push({ text, url, isTitle });
   }
diff --git a/src/data/publications.ts b/src/data/publications.ts
@@ -1,5 +1,5 @@
 // Auto-generated by crawl-publications.ts
-// Last updated: 2025-12-14T17:18:31.964Z
+// Last updated: 2025-12-16T17:41:08.912Z
 
 export interface Publication {
   title: string;
@@ -29,11 +29,13 @@ export const PUBLICATIONS_DATA: YearSection[] = [
         title: "Conv4Rec: A 1-by-1 Convolutional AutoEncoder for User Profiling through Joint Analysis of Implicit and Explicit Feedbacks",
         authors: "Antoine Ledent, Petr Kasalický, Rodrigo Alves, and Hady W. Lauw",
         venue: "IEEE Transactions on Neural Networks and Learning Systems (TNNLS), Vol. 36, No. 12, Dec 2025",
+        pdfUrl: "https://arxiv.org/abs/2509.07499",
       },
       {
         title: "Parameter-Efficient Variational AutoEncoder for Multimodal Multi-Interest Recommendation",
         authors: "Nhu-Thuat Tran and Hady W. Lauw",
         venue: "ACM Multimedia Conference (ACM MM'25), Oct 2025",
+        pdfUrl: "https://www.dropbox.com/scl/fi/q7g7ct173jxgqkuz5s4tb/acmmm25.pdf?rlkey=7dve8mod7trsjt6ubhgf02wm7&dl=0",
       },
       {
         title: "Optimal Transport Alignment of User Preferences from Ratings and Texts",
@@ -63,6 +65,7 @@ export const PUBLICATIONS_DATA: YearSection[] = [
         title: "VARIUM: Variational Autoencoder for Multi-Interest Representation with Inter-User Memory",
         authors: "Nhu-Thuat Tran and Hady W. Lauw",
         venue: "ACM International Conference on Web Search and Data Mining (WSDM'25), Mar 2025",
+        pdfUrl: "https://www.dropbox.com/scl/fi/5nezl5hq0xhd0jrk6012n/wsdm25a.pdf?rlkey=bk8s3akrfzjv4ykse6thomzgd&dl=0",
       },
       {
         title: "Selecting Comparative Sets of Reviews Across Multiple Items",
@@ -193,6 +196,7 @@ export const PUBLICATIONS_DATA: YearSection[] = [
         title: "Multi-Representation Variational Autoencoder via Iterative Latent Attention and Implicit Differentiation",
         authors: "Nhu-Thuat Tran and Hady W. Lauw",
         venue: "ACM International Conference on Information and Knowledge Management (CIKM'23), Oct 2023",
+        pdfUrl: "https://www.dropbox.com/scl/fi/eb30uta1e3vmn6gh1f8lu/cikm23.pdf?rlkey=ljfxqg8j9580wysvi70emu6d7&dl=0",
       },
       {
         title: "Robust Bidirectional Poly-Matching",
@@ -384,6 +388,7 @@ export const PUBLICATIONS_DATA: YearSection[] = [
         title: "Bilateral Variational Autoencoder for Collaborative Filtering",
         authors: "Quoc-Tuan Truong, Aghiles Salah, and Hady W. Lauw",
         venue: "ACM International Conference on Web Search and Data Mining (WSDM'21), Mar 2021",
+        pdfUrl: "https://www.dropbox.com/s/8id1sf17cimp6j5/wsdm21b.pdf?dl=0",
       },
       {
         title: "Explainable Recommendation with Comparative Constraints on Product Aspects",
@@ -428,6 +433,7 @@ export const PUBLICATIONS_DATA: YearSection[] = [
         title: "Topic Modeling on Document Networks with Adjacent-Encoder",
         authors: "Ce Zhang and Hady W. Lauw",
         venue: "AAAI Conference on Artificial Intelligence (AAAI'20), Feb 2020.",
+        pdfUrl: "https://www.dropbox.com/s/eqkyfviowwwleey/aaai20a.pdf?dl=0",
         extraLinks: [{"text":"Code","url":"https://github.com/PreferredAI/adjacent-encoder"}],
       },
     ],
@@ -521,6 +527,7 @@ export const PUBLICATIONS_DATA: YearSection[] = [
         title: "Collaborative Topic Regression with Denoising AutoEncoder for Content and Community Co-Representation",
         authors: "Trong T. Nguyen and Hady W. Lauw",
         venue: "ACM Conference on Information and Knowledge Management (CIKM'17), short paper, Nov 2017",
+        pdfUrl: "https://www.dropbox.com/s/bq5q9lxn9igiyvp/cikm17b.pdf?dl=0",
       },
       {
         title: "SemVis: Semantic Visualization for Interactive Topical Analysis",

Original file line number	Diff line number	Diff line change
`@@ -309,7 +309,7 @@ function parsePublicationFromLi(liContent: string): Publication \| null {`
`309`	`309`	`const isPaperHost = PAPER_HOST_DOMAINS.some(domain => url.includes(domain));`
`310`	`310`	`const isTitle = isPaperHost &&`
`311`	`311`	`text.length > MIN_TITLE_LINK_TEXT_LENGTH &&`
`312`		`- !text.toLowerCase().includes('code');`
	`312`	`+ (!text.toLowerCase().includes('code') \|\| text.length > 30);`
`313`	`313`
`314`	`314`	`links.push({ text, url, isTitle });`
`315`	`315`	`}`