|
2 | 2 | "cells": [ |
3 | 3 | { |
4 | 4 | "cell_type": "code", |
5 | | - "execution_count": 1, |
| 5 | + "execution_count": 26, |
6 | 6 | "metadata": {}, |
7 | 7 | "outputs": [], |
8 | 8 | "source": [ |
9 | | - "import pandas as pd" |
| 9 | + "import pandas as pd\n", |
| 10 | + "from uuid import uuid5, NAMESPACE_URL\n" |
10 | 11 | ] |
11 | 12 | }, |
12 | 13 | { |
|
194 | 195 | }, |
195 | 196 | { |
196 | 197 | "cell_type": "code", |
197 | | - "execution_count": null, |
| 198 | + "execution_count": 15, |
198 | 199 | "metadata": {}, |
199 | 200 | "outputs": [], |
200 | 201 | "source": [ |
201 | 202 | "for i, row in dccs.iterrows():\n", |
202 | 203 | "\tmeta = [\"layout: ../../layouts/DCC.astro\"]\n", |
203 | | - "\tdescription = ''\n", |
204 | | - "\tfor k,v in row:\n", |
205 | | - "\t\tif not k == 'description':\n", |
| 204 | + "\tif row['active']:\n", |
| 205 | + "\t\tdescription = ''\n", |
| 206 | + "\t\tfor k,v in row.items():\n", |
| 207 | + "\t\t\tif not k == 'description':\n", |
| 208 | + "\t\t\t\tif not v == '':\n", |
| 209 | + "\t\t\t\t\tif k == 'icon' and v.startswith('/img'):\n", |
| 210 | + "\t\t\t\t\t\tv = \"https://cfde-drc.s3.amazonaws.com/assets\" + v\n", |
| 211 | + "\n", |
| 212 | + "\t\t\t\t\tmeta.append(\"%s: %s\"%(k, v))\n", |
| 213 | + "\t\t\telse:\n", |
| 214 | + "\t\t\t\tdescription = v\n", |
| 215 | + "\t\twith open('out/dccs/%s.md'%row['short_label'], 'w') as o:\n", |
| 216 | + "\t\t\to.write('---\\n')\n", |
| 217 | + "\t\t\to.write(\"\\n\".join(meta))\n", |
| 218 | + "\t\t\to.write('\\n---\\n')\n", |
| 219 | + "\t\t\to.write(description)" |
| 220 | + ] |
| 221 | + }, |
| 222 | + { |
| 223 | + "cell_type": "code", |
| 224 | + "execution_count": 17, |
| 225 | + "metadata": {}, |
| 226 | + "outputs": [ |
| 227 | + { |
| 228 | + "data": { |
| 229 | + "text/html": [ |
| 230 | + "<div>\n", |
| 231 | + "<style scoped>\n", |
| 232 | + " .dataframe tbody tr th:only-of-type {\n", |
| 233 | + " vertical-align: middle;\n", |
| 234 | + " }\n", |
| 235 | + "\n", |
| 236 | + " .dataframe tbody tr th {\n", |
| 237 | + " vertical-align: top;\n", |
| 238 | + " }\n", |
| 239 | + "\n", |
| 240 | + " .dataframe thead th {\n", |
| 241 | + " text-align: right;\n", |
| 242 | + " }\n", |
| 243 | + "</style>\n", |
| 244 | + "<table border=\"1\" class=\"dataframe\">\n", |
| 245 | + " <thead>\n", |
| 246 | + " <tr style=\"text-align: right;\">\n", |
| 247 | + " <th></th>\n", |
| 248 | + " <th>title</th>\n", |
| 249 | + " <th>journal</th>\n", |
| 250 | + " <th>authors</th>\n", |
| 251 | + " <th>year</th>\n", |
| 252 | + " <th>page</th>\n", |
| 253 | + " <th>volume</th>\n", |
| 254 | + " <th>issue</th>\n", |
| 255 | + " <th>pmid</th>\n", |
| 256 | + " <th>pmcid</th>\n", |
| 257 | + " <th>doi</th>\n", |
| 258 | + " <th>...</th>\n", |
| 259 | + " <th>dccs</th>\n", |
| 260 | + " <th>partnerships</th>\n", |
| 261 | + " <th>carousel</th>\n", |
| 262 | + " <th>carousel_title</th>\n", |
| 263 | + " <th>carousel_link</th>\n", |
| 264 | + " <th>carousel_description</th>\n", |
| 265 | + " <th>image</th>\n", |
| 266 | + " <th>featured</th>\n", |
| 267 | + " <th>r03</th>\n", |
| 268 | + " <th>keywords</th>\n", |
| 269 | + " </tr>\n", |
| 270 | + " </thead>\n", |
| 271 | + " <tbody>\n", |
| 272 | + " <tr>\n", |
| 273 | + " <th>0</th>\n", |
| 274 | + " <td>Computational screen to identify potential tar...</td>\n", |
| 275 | + " <td>Aging cell</td>\n", |
| 276 | + " <td>Deng EZ, Fleishman RH, Xie Z, Marino GB, Clark...</td>\n", |
| 277 | + " <td>2023</td>\n", |
| 278 | + " <td>e13809</td>\n", |
| 279 | + " <td>22</td>\n", |
| 280 | + " <td>6</td>\n", |
| 281 | + " <td>37082798.0</td>\n", |
| 282 | + " <td>PMC10265163</td>\n", |
| 283 | + " <td>10.1111/acel.13809</td>\n", |
| 284 | + " <td>...</td>\n", |
| 285 | + " <td>LINCS</td>\n", |
| 286 | + " <td></td>\n", |
| 287 | + " <td>False</td>\n", |
| 288 | + " <td></td>\n", |
| 289 | + " <td></td>\n", |
| 290 | + " <td></td>\n", |
| 291 | + " <td></td>\n", |
| 292 | + " <td>False</td>\n", |
| 293 | + " <td></td>\n", |
| 294 | + " <td>[\"neoantigens\", \"Gene Expression Profiling\", \"...</td>\n", |
| 295 | + " </tr>\n", |
| 296 | + " <tr>\n", |
| 297 | + " <th>1</th>\n", |
| 298 | + " <td>Contribution of Circulating Host and Microbial...</td>\n", |
| 299 | + " <td>International journal of tryptophan research :...</td>\n", |
| 300 | + " <td>Morgan EW, Dong F, Annalora AJ, Murray IA, Wol...</td>\n", |
| 301 | + " <td>2023</td>\n", |
| 302 | + " <td>11786469231182510</td>\n", |
| 303 | + " <td>16</td>\n", |
| 304 | + " <td></td>\n", |
| 305 | + " <td>37441265.0</td>\n", |
| 306 | + " <td>PMC10334013</td>\n", |
| 307 | + " <td>10.1177/11786469231182510</td>\n", |
| 308 | + " <td>...</td>\n", |
| 309 | + " <td>Metabolomics</td>\n", |
| 310 | + " <td></td>\n", |
| 311 | + " <td>False</td>\n", |
| 312 | + " <td></td>\n", |
| 313 | + " <td></td>\n", |
| 314 | + " <td></td>\n", |
| 315 | + " <td></td>\n", |
| 316 | + " <td>False</td>\n", |
| 317 | + " <td></td>\n", |
| 318 | + " <td>[\"microbiome\", \"metabolomics\", \"indole\", \"Aryl...</td>\n", |
| 319 | + " </tr>\n", |
| 320 | + " <tr>\n", |
| 321 | + " <th>2</th>\n", |
| 322 | + " <td>Lactate-dependent transcriptional regulation c...</td>\n", |
| 323 | + " <td>Nature communications</td>\n", |
| 324 | + " <td>Takata N, Miska JM, Morgan MA, Patel P, Billin...</td>\n", |
| 325 | + " <td>2023</td>\n", |
| 326 | + " <td>4129</td>\n", |
| 327 | + " <td>14</td>\n", |
| 328 | + " <td>1</td>\n", |
| 329 | + " <td>37452018.0</td>\n", |
| 330 | + " <td>PMC10349100</td>\n", |
| 331 | + " <td>10.1038/s41467-023-39672-2</td>\n", |
| 332 | + " <td>...</td>\n", |
| 333 | + " <td>Metabolomics</td>\n", |
| 334 | + " <td></td>\n", |
| 335 | + " <td>False</td>\n", |
| 336 | + " <td></td>\n", |
| 337 | + " <td></td>\n", |
| 338 | + " <td></td>\n", |
| 339 | + " <td></td>\n", |
| 340 | + " <td>False</td>\n", |
| 341 | + " <td></td>\n", |
| 342 | + " <td>[\"Eye\", \"Gene Expression Regulation\", \"Mice\", ...</td>\n", |
| 343 | + " </tr>\n", |
| 344 | + " <tr>\n", |
| 345 | + " <th>3</th>\n", |
| 346 | + " <td>Modular and mechanistic changes across stages ...</td>\n", |
| 347 | + " <td>BMC cancer</td>\n", |
| 348 | + " <td>Rahiminejad S, Maurya MR, Mukund K, Subramaniam S</td>\n", |
| 349 | + " <td>2022</td>\n", |
| 350 | + " <td>436</td>\n", |
| 351 | + " <td>22</td>\n", |
| 352 | + " <td>1</td>\n", |
| 353 | + " <td>35448980.0</td>\n", |
| 354 | + " <td>PMC9022252</td>\n", |
| 355 | + " <td>10.1186/s12885-022-09479-3</td>\n", |
| 356 | + " <td>...</td>\n", |
| 357 | + " <td>Metabolomics</td>\n", |
| 358 | + " <td></td>\n", |
| 359 | + " <td>False</td>\n", |
| 360 | + " <td></td>\n", |
| 361 | + " <td></td>\n", |
| 362 | + " <td></td>\n", |
| 363 | + " <td></td>\n", |
| 364 | + " <td>False</td>\n", |
| 365 | + " <td></td>\n", |
| 366 | + " <td>[\"Gene Expression Profiling\", \"Computational B...</td>\n", |
| 367 | + " </tr>\n", |
| 368 | + " <tr>\n", |
| 369 | + " <th>4</th>\n", |
| 370 | + " <td>The stability of the myelinating oligodendrocy...</td>\n", |
| 371 | + " <td>Cell reports</td>\n", |
| 372 | + " <td>Pruvost M, Patzig J, Yattah C, Selcen I, Herna...</td>\n", |
| 373 | + " <td>2023</td>\n", |
| 374 | + " <td>112848</td>\n", |
| 375 | + " <td>42</td>\n", |
| 376 | + " <td>8</td>\n", |
| 377 | + " <td>37515770.0</td>\n", |
| 378 | + " <td>PMC10600948</td>\n", |
| 379 | + " <td>10.1016/j.celrep.2023.112848</td>\n", |
| 380 | + " <td>...</td>\n", |
| 381 | + " <td>Metabolomics</td>\n", |
| 382 | + " <td></td>\n", |
| 383 | + " <td>False</td>\n", |
| 384 | + " <td></td>\n", |
| 385 | + " <td></td>\n", |
| 386 | + " <td></td>\n", |
| 387 | + " <td></td>\n", |
| 388 | + " <td>False</td>\n", |
| 389 | + " <td></td>\n", |
| 390 | + " <td>[\"Chromatin\", \"Oligodendroglia\", \"progenitors\"...</td>\n", |
| 391 | + " </tr>\n", |
| 392 | + " </tbody>\n", |
| 393 | + "</table>\n", |
| 394 | + "<p>5 rows × 22 columns</p>\n", |
| 395 | + "</div>" |
| 396 | + ], |
| 397 | + "text/plain": [ |
| 398 | + " title \\\n", |
| 399 | + "0 Computational screen to identify potential tar... \n", |
| 400 | + "1 Contribution of Circulating Host and Microbial... \n", |
| 401 | + "2 Lactate-dependent transcriptional regulation c... \n", |
| 402 | + "3 Modular and mechanistic changes across stages ... \n", |
| 403 | + "4 The stability of the myelinating oligodendrocy... \n", |
| 404 | + "\n", |
| 405 | + " journal \\\n", |
| 406 | + "0 Aging cell \n", |
| 407 | + "1 International journal of tryptophan research :... \n", |
| 408 | + "2 Nature communications \n", |
| 409 | + "3 BMC cancer \n", |
| 410 | + "4 Cell reports \n", |
| 411 | + "\n", |
| 412 | + " authors year page \\\n", |
| 413 | + "0 Deng EZ, Fleishman RH, Xie Z, Marino GB, Clark... 2023 e13809 \n", |
| 414 | + "1 Morgan EW, Dong F, Annalora AJ, Murray IA, Wol... 2023 11786469231182510 \n", |
| 415 | + "2 Takata N, Miska JM, Morgan MA, Patel P, Billin... 2023 4129 \n", |
| 416 | + "3 Rahiminejad S, Maurya MR, Mukund K, Subramaniam S 2022 436 \n", |
| 417 | + "4 Pruvost M, Patzig J, Yattah C, Selcen I, Herna... 2023 112848 \n", |
| 418 | + "\n", |
| 419 | + " volume issue pmid pmcid doi ... \\\n", |
| 420 | + "0 22 6 37082798.0 PMC10265163 10.1111/acel.13809 ... \n", |
| 421 | + "1 16 37441265.0 PMC10334013 10.1177/11786469231182510 ... \n", |
| 422 | + "2 14 1 37452018.0 PMC10349100 10.1038/s41467-023-39672-2 ... \n", |
| 423 | + "3 22 1 35448980.0 PMC9022252 10.1186/s12885-022-09479-3 ... \n", |
| 424 | + "4 42 8 37515770.0 PMC10600948 10.1016/j.celrep.2023.112848 ... \n", |
| 425 | + "\n", |
| 426 | + " dccs partnerships carousel carousel_title carousel_link \\\n", |
| 427 | + "0 LINCS False \n", |
| 428 | + "1 Metabolomics False \n", |
| 429 | + "2 Metabolomics False \n", |
| 430 | + "3 Metabolomics False \n", |
| 431 | + "4 Metabolomics False \n", |
| 432 | + "\n", |
| 433 | + " carousel_description image featured r03 \\\n", |
| 434 | + "0 False \n", |
| 435 | + "1 False \n", |
| 436 | + "2 False \n", |
| 437 | + "3 False \n", |
| 438 | + "4 False \n", |
| 439 | + "\n", |
| 440 | + " keywords \n", |
| 441 | + "0 [\"neoantigens\", \"Gene Expression Profiling\", \"... \n", |
| 442 | + "1 [\"microbiome\", \"metabolomics\", \"indole\", \"Aryl... \n", |
| 443 | + "2 [\"Eye\", \"Gene Expression Regulation\", \"Mice\", ... \n", |
| 444 | + "3 [\"Gene Expression Profiling\", \"Computational B... \n", |
| 445 | + "4 [\"Chromatin\", \"Oligodendroglia\", \"progenitors\"... \n", |
| 446 | + "\n", |
| 447 | + "[5 rows x 22 columns]" |
| 448 | + ] |
| 449 | + }, |
| 450 | + "execution_count": 17, |
| 451 | + "metadata": {}, |
| 452 | + "output_type": "execute_result" |
| 453 | + } |
| 454 | + ], |
| 455 | + "source": [ |
| 456 | + "publications = pd.read_csv('data/publications.tsv', sep=\"\\t\")\n", |
| 457 | + "publications = publications[[i for i in publications.columns if not i == \"id\"]]\n", |
| 458 | + "publications = publications.fillna('')\n", |
| 459 | + "publications.head()" |
| 460 | + ] |
| 461 | + }, |
| 462 | + { |
| 463 | + "cell_type": "code", |
| 464 | + "execution_count": 22, |
| 465 | + "metadata": {}, |
| 466 | + "outputs": [ |
| 467 | + { |
| 468 | + "data": { |
| 469 | + "text/plain": [ |
| 470 | + "((85, 22), 85)" |
| 471 | + ] |
| 472 | + }, |
| 473 | + "execution_count": 22, |
| 474 | + "metadata": {}, |
| 475 | + "output_type": "execute_result" |
| 476 | + } |
| 477 | + ], |
| 478 | + "source": [ |
| 479 | + "publications.shape, publications.doi.nunique()" |
| 480 | + ] |
| 481 | + }, |
| 482 | + { |
| 483 | + "cell_type": "code", |
| 484 | + "execution_count": 24, |
| 485 | + "metadata": {}, |
| 486 | + "outputs": [ |
| 487 | + { |
| 488 | + "data": { |
| 489 | + "text/plain": [ |
| 490 | + "np.int64(1)" |
| 491 | + ] |
| 492 | + }, |
| 493 | + "execution_count": 24, |
| 494 | + "metadata": {}, |
| 495 | + "output_type": "execute_result" |
| 496 | + } |
| 497 | + ], |
| 498 | + "source": [ |
| 499 | + "(publications.pmid == '').sum()" |
| 500 | + ] |
| 501 | + }, |
| 502 | + { |
| 503 | + "cell_type": "code", |
| 504 | + "execution_count": 28, |
| 505 | + "metadata": {}, |
| 506 | + "outputs": [], |
| 507 | + "source": [ |
| 508 | + "for i, row in publications.iterrows():\n", |
| 509 | + "\tmeta = [\"layout: ../../layouts/Publication.astro\"]\n", |
| 510 | + "\tfor k,v in row.items():\n", |
| 511 | + "\t\tif not v == '':\n", |
| 512 | + "\t\t\tif k == 'dccs':\n", |
| 513 | + "\t\t\t\tv = v.split(\"; \")\n", |
| 514 | + "\n", |
206 | 515 | "\t\t\tmeta.append(\"%s: %s\"%(k, v))\n", |
207 | | - "\t\telse:\n", |
208 | | - "\t\t\tdescription = description\n", |
209 | | - "\twith open('out/dccs/%s.md'%row['short_label'], 'w') as o:\n", |
210 | | - "\t\to.write('---')\n", |
| 516 | + "\tuid = str(uuid5(NAMESPACE_URL, row['title']))\n", |
| 517 | + "\twith open('out/publications/%s.md'%uid, 'w') as o:\n", |
| 518 | + "\t\to.write('---\\n')\n", |
211 | 519 | "\t\to.write(\"\\n\".join(meta))\n", |
212 | | - "\t\to.write('---')\n", |
213 | | - "\t\to.write(description)" |
| 520 | + "\t\to.write('\\n---')" |
214 | 521 | ] |
| 522 | + }, |
| 523 | + { |
| 524 | + "cell_type": "code", |
| 525 | + "execution_count": null, |
| 526 | + "metadata": {}, |
| 527 | + "outputs": [], |
| 528 | + "source": [] |
215 | 529 | } |
216 | 530 | ], |
217 | 531 | "metadata": { |
|
0 commit comments