Skip to content

Commit 26173fb

Browse files
authored
add scopus csv support (#123)
* Add new version of bibx * Add support for scopus CSV * Be more descriptive with that error
1 parent 238abf6 commit 26173fb

File tree

8 files changed

+93
-20
lines changed

8 files changed

+93
-20
lines changed

functions/main.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
from enum import Enum
55
from functools import reduce
66
from io import StringIO
7-
from typing import Any, Dict, List
7+
from typing import Any, Dict, List, cast
88

99
import arrow
1010
import networkx as nx
@@ -20,10 +20,11 @@
2020
from firebase_functions.options import MemoryOption
2121
from firebase_functions.scheduler_fn import ScheduledEvent, on_schedule
2222
from google.cloud.firestore import DocumentReference, DocumentSnapshot
23+
from google.cloud.storage.client import Blob
2324
from pydantic import BaseModel, ValidationError
2425

2526
initialize_app()
26-
logging.basicConfig(level=logging.INFO)
27+
logging.basicConfig(level=logging.WARNING)
2728

2829

2930
ROOT = "root"
@@ -148,7 +149,11 @@ def get_contents(
148149
"""Get the contents for the files in order to create the graph."""
149150
names = [f"isi-files/{name}" for name in document_data["files"]]
150151
logger.info("Reading source files", extra={"names": names})
151-
blobs = list(filter(None, [storage.bucket().get_blob(name) for name in names]))
152+
bucket = storage.bucket()
153+
blobs: list[Blob] = cast(
154+
list[Blob],
155+
list(filter(None, [bucket.get_blob(blob_name=name) for name in names])), # type: ignore
156+
)
152157

153158
size = 0
154159
output = {}
@@ -158,7 +163,7 @@ def get_contents(
158163
size += blob.size or 0
159164
if (size / 1e6) > max_size_megabytes:
160165
break
161-
output[blob.name] = blob.download_as_text()
166+
output[blob.name] = blob.download_as_text() # type: ignore
162167
return output
163168

164169

functions/requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
firebase-admin~=6.6.0
22
firebase_functions~=0.4.2
3-
bibx==0.5.0
3+
bibx==0.6.0
44
pydantic~=2.10.6
55
arrow~=1.3.0

package-lock.json

Lines changed: 5 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/components/ui/Button.tsx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ import { cva, type VariantProps } from "class-variance-authority";
44
import * as React from "react";
55

66
const buttonVariants = cva(
7-
"inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-sm font-medium transition-colors ease-in focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-8 [&_svg]:shrink-0",
7+
"inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-sm font-medium transition-colors ease-in focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:shrink-0",
88
{
99
variants: {
1010
variant: {

src/components/upload/FileDropper.tsx

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,14 @@
11
import useError from "../../hooks/useError";
22
import useUpload from "../../hooks/useUpload";
33
import { looksLikeIsi } from "../../utils/isi";
4-
import { looksLikeScopus } from "../../utils/scopus";
4+
import { looksLikeScopusCsv } from "../../utils/scopusCsv";
5+
import { looksLikeScopusRis } from "../../utils/scopusRis";
56
import { FC, useCallback } from "react";
67
import { useDropzone } from "react-dropzone";
78
import { useLocation, useNavigate } from "react-router-dom";
89

910
const FileErrorMap = {
10-
not_supported: "does not look like a valid ISI file",
11+
not_supported: "does not look like a valid CSV, BIB, or ISI file",
1112
max_size: (maxSize: number) => `is too big to process (max. ${maxSize}MB)`,
1213
};
1314

@@ -35,7 +36,11 @@ const FileDropper: FC<FileDropperProps> = ({ maxSize }) => {
3536
.map((file) => file.text().then((text) => ({ text, file }))),
3637
).then((data) => {
3738
data.forEach(({ text, file }) => {
38-
if (looksLikeIsi(text) || looksLikeScopus(text)) {
39+
if (
40+
looksLikeIsi(text) ||
41+
looksLikeScopusRis(text) ||
42+
looksLikeScopusCsv(text)
43+
) {
3944
upload(Object(file).name, file);
4045
} else {
4146
error(Object(file).name, file, FileErrorMap.not_supported);

src/utils/metadata.ts

Lines changed: 18 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
11
import { FileMetadata } from "../types/fileMetadata";
22
import { mostCommon } from "./arrays";
33
import * as isi from "./isi";
4-
import * as scopus from "./scopus";
4+
import * as scopusCsv from "./scopusCsv";
5+
import * as scopusRis from "./scopusRis";
56
import md5 from "md5";
67

78
const metadata = async (name: string, blob: Blob): Promise<FileMetadata> => {
@@ -20,14 +21,26 @@ const metadata = async (name: string, blob: Blob): Promise<FileMetadata> => {
2021
};
2122
}
2223

23-
if (scopus.looksLikeScopus(content)) {
24+
if (scopusRis.looksLikeScopusRis(content)) {
2425
return {
2526
name,
2627
blob,
2728
hash,
28-
keywords: mostCommon(scopus.keywords(content), 3),
29-
articles: scopus.countArticles(content),
30-
citations: scopus.countReferences(content),
29+
keywords: mostCommon(scopusRis.keywords(content), 3),
30+
articles: scopusRis.countArticles(content),
31+
citations: scopusRis.countReferences(content),
32+
valid: true,
33+
};
34+
}
35+
36+
if (scopusCsv.looksLikeScopusCsv(content)) {
37+
return {
38+
name,
39+
blob,
40+
hash,
41+
keywords: mostCommon(scopusCsv.keywords(content), 3),
42+
articles: scopusCsv.countArticles(content),
43+
citations: scopusCsv.countReferences(content),
3144
valid: true,
3245
};
3346
}

src/utils/scopusCsv.ts

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
import Papa from "papaparse";
2+
import { z } from "zod";
3+
4+
const fileSchema = z.array(
5+
z.object({
6+
Authors: z.string(),
7+
"Author Keywords": z.string(),
8+
"Index Keywords": z.string(),
9+
References: z.string(),
10+
}),
11+
);
12+
13+
type CsvFileType = z.infer<typeof fileSchema>;
14+
15+
const readCsvText = (text: string): CsvFileType => {
16+
const { data } = Papa.parse(text, { header: true, skipEmptyLines: true });
17+
return fileSchema.parse(data);
18+
};
19+
20+
const looksLikeScopusCsv = (text: string): boolean => {
21+
try {
22+
const data = readCsvText(text);
23+
return data.length > 0;
24+
} catch {
25+
return false;
26+
}
27+
};
28+
29+
const keywords = (text: string): string[] => {
30+
const data = readCsvText(text);
31+
const keywords = data.flatMap((item) => [
32+
...item["Author Keywords"].split(";"),
33+
...item["Index Keywords"].split(";"),
34+
]);
35+
return Array.from(new Set(keywords));
36+
};
37+
38+
const countArticles = (text: string): number => {
39+
const data = readCsvText(text);
40+
return data.length;
41+
};
42+
43+
const countReferences = (text: string): number => {
44+
const data = readCsvText(text);
45+
const references = data.flatMap((item) => item["References"].split("; "));
46+
return references.length;
47+
};
48+
49+
export { looksLikeScopusCsv, keywords, countArticles, countReferences };
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
const RIS_PATTERN = /^(?<key>[A-Z0-9]{2})[ ]{2}-[ ](?<value>.*)$/;
22

3-
const looksLikeScopus = (content: string): boolean => {
3+
const looksLikeScopusRis = (content: string): boolean => {
44
let currentKey = null;
55
for (const line of content.split("\n")) {
66
if (!line) {
@@ -75,4 +75,4 @@ const countReferences = (content: string): number =>
7575
{ counting: false, count: 0 },
7676
).count;
7777

78-
export { looksLikeScopus, keywords, countArticles, countReferences };
78+
export { looksLikeScopusRis, keywords, countArticles, countReferences };

0 commit comments

Comments
 (0)