Skip to content

Commit 30e3b70

Browse files
Merge pull request #82 from paxtonfitzpatrick/master
final edits to paper and supplement
2 parents 117c538 + 30d4b0e commit 30e3b70

File tree

12 files changed

+921
-300
lines changed

12 files changed

+921
-300
lines changed

code/notebooks/README.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ analyses are described or figures appear in the paper.
1515

1616
### `supp/`
1717
**code to reproduce tables and figures from the Supplementary Materials document**
18-
- `1_tables.ipynb` – code to generate Tables S1 & S2
19-
- `2_topic-weight-fig.ipynb` – code to generate Figure S1 and compute related stats
20-
- `3_individual-maps.ipynb` – code to generate Figures S2–S6
18+
- `1_tables.ipynb` – code to generate Supplementary Tables 1 & 2
19+
- `2_topic-word-distributions-fig.ipynb` – code to generate Supplementary Figure 1
20+
- `2_topic-weight-fig.ipynb` – code to generate Supplementary Figure 2 and compute related stats
21+
- `3_individual-maps.ipynb` – code to generate Supplementary Figures 2–6

code/notebooks/supp/1_tables.ipynb

Lines changed: 197 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
}
3434
],
3535
"source": [
36+
"import numpy as np\n",
3637
"import pandas as pd\n",
3738
"\n",
3839
"from khan_helpers import Experiment\n",
@@ -103,16 +104,19 @@
103104
"show_source(get_top_words)"
104105
]
105106
},
107+
{
108+
"cell_type": "markdown",
109+
"id": "c213ed3e",
110+
"metadata": {},
111+
"source": [
112+
"# Load data"
113+
]
114+
},
106115
{
107116
"cell_type": "code",
108117
"execution_count": 3,
109-
"id": "d9e5d7c7",
110-
"metadata": {
111-
"ExecuteTime": {
112-
"end_time": "2023-02-01T02:47:31.840419Z",
113-
"start_time": "2023-02-01T02:47:31.834087Z"
114-
}
115-
},
118+
"id": "b343c783",
119+
"metadata": {},
116120
"outputs": [],
117121
"source": [
118122
"exp = Experiment()\n",
@@ -385,13 +389,8 @@
385389
{
386390
"cell_type": "code",
387391
"execution_count": 6,
388-
"id": "3aa1b25b",
389-
"metadata": {
390-
"ExecuteTime": {
391-
"end_time": "2023-02-01T02:47:31.878945Z",
392-
"start_time": "2023-02-01T02:47:31.865589Z"
393-
}
394-
},
392+
"id": "3ef9a02e",
393+
"metadata": {},
395394
"outputs": [
396395
{
397396
"data": {
@@ -492,24 +491,159 @@
492491
" <td>several</td>\n",
493492
" <td>jupiter</td>\n",
494493
" </tr>\n",
494+
" <tr>\n",
495+
" <th>6</th>\n",
496+
" <td>enough</td>\n",
497+
" <td>ignition</td>\n",
498+
" <td>proton</td>\n",
499+
" <td>force</td>\n",
500+
" <td>get</td>\n",
501+
" <td>close</td>\n",
502+
" <td>nucleus</td>\n",
503+
" <td>coulomb</td>\n",
504+
" <td>fusion</td>\n",
505+
" <td>would</td>\n",
506+
" </tr>\n",
507+
" <tr>\n",
508+
" <th>7</th>\n",
509+
" <td>energy</td>\n",
510+
" <td>pressure</td>\n",
511+
" <td>ignition</td>\n",
512+
" <td>mass</td>\n",
513+
" <td>little</td>\n",
514+
" <td>keep</td>\n",
515+
" <td>provide</td>\n",
516+
" <td>fusion</td>\n",
517+
" <td>get</td>\n",
518+
" <td>hydrogen</td>\n",
519+
" </tr>\n",
520+
" <tr>\n",
521+
" <th>8</th>\n",
522+
" <td>proton</td>\n",
523+
" <td>weak</td>\n",
524+
" <td>neutron</td>\n",
525+
" <td>interaction</td>\n",
526+
" <td>one</td>\n",
527+
" <td>go</td>\n",
528+
" <td>nucleon</td>\n",
529+
" <td>cesium</td>\n",
530+
" <td>extra</td>\n",
531+
" <td>get</td>\n",
532+
" </tr>\n",
533+
" <tr>\n",
534+
" <th>10</th>\n",
535+
" <td>huge</td>\n",
536+
" <td>cloud</td>\n",
537+
" <td>space</td>\n",
538+
" <td>float</td>\n",
539+
" <td>imagine</td>\n",
540+
" <td>hydrogen</td>\n",
541+
" <td>atom</td>\n",
542+
" <td>say</td>\n",
543+
" <td>distance</td>\n",
544+
" <td>combine</td>\n",
545+
" </tr>\n",
546+
" <tr>\n",
547+
" <th>11</th>\n",
548+
" <td>one</td>\n",
549+
" <td>hydrogen</td>\n",
550+
" <td>helium</td>\n",
551+
" <td>go</td>\n",
552+
" <td>proton</td>\n",
553+
" <td>neutron</td>\n",
554+
" <td>keep</td>\n",
555+
" <td>atomic</td>\n",
556+
" <td>detail</td>\n",
557+
" <td>fuse</td>\n",
558+
" </tr>\n",
559+
" <tr>\n",
560+
" <th>12</th>\n",
561+
" <td>gravity</td>\n",
562+
" <td>force</td>\n",
563+
" <td>weak</td>\n",
564+
" <td>interaction</td>\n",
565+
" <td>apply</td>\n",
566+
" <td>strength</td>\n",
567+
" <td>distance</td>\n",
568+
" <td>ten</td>\n",
569+
" <td>relative</td>\n",
570+
" <td>next</td>\n",
571+
" </tr>\n",
572+
" <tr>\n",
573+
" <th>13</th>\n",
574+
" <td>force</td>\n",
575+
" <td>go</td>\n",
576+
" <td>electrostatic</td>\n",
577+
" <td>call</td>\n",
578+
" <td>charge</td>\n",
579+
" <td>magnet</td>\n",
580+
" <td>side</td>\n",
581+
" <td>coulomb</td>\n",
582+
" <td>know</td>\n",
583+
" <td>different</td>\n",
584+
" </tr>\n",
585+
" <tr>\n",
586+
" <th>14</th>\n",
587+
" <td>force</td>\n",
588+
" <td>atom</td>\n",
589+
" <td>nucleus</td>\n",
590+
" <td>electron</td>\n",
591+
" <td>much</td>\n",
592+
" <td>hydrogen</td>\n",
593+
" <td>get</td>\n",
594+
" <td>around</td>\n",
595+
" <td>coulomb</td>\n",
596+
" <td>charge</td>\n",
597+
" </tr>\n",
598+
" <tr>\n",
599+
" <th>15</th>\n",
600+
" <td>force</td>\n",
601+
" <td>scale</td>\n",
602+
" <td>gravity</td>\n",
603+
" <td>start</td>\n",
604+
" <td>weak</td>\n",
605+
" <td>orbit</td>\n",
606+
" <td>keep</td>\n",
607+
" <td>fundamental</td>\n",
608+
" <td>around</td>\n",
609+
" <td>surprise</td>\n",
610+
" </tr>\n",
495611
" </tbody>\n",
496612
"</table>\n",
497613
"</div>"
498614
],
499615
"text/plain": [
500-
"Topic 1 2 3 4 5 6 \\\n",
501-
"1 star helium main mass atomic sequence \n",
502-
"2 charge force mass gravity strong attract \n",
503-
"3 huge force electromagnetic macro way scale \n",
504-
"4 atom dense go hydrogen slow get \n",
505-
"5 fusion get threshold core occur mass \n",
616+
"Topic 1 2 3 4 5 6 \\\n",
617+
"1 star helium main mass atomic sequence \n",
618+
"2 charge force mass gravity strong attract \n",
619+
"3 huge force electromagnetic macro way scale \n",
620+
"4 atom dense go hydrogen slow get \n",
621+
"5 fusion get threshold core occur mass \n",
622+
"6 enough ignition proton force get close \n",
623+
"7 energy pressure ignition mass little keep \n",
624+
"8 proton weak neutron interaction one go \n",
625+
"10 huge cloud space float imagine hydrogen \n",
626+
"11 one hydrogen helium go proton neutron \n",
627+
"12 gravity force weak interaction apply strength \n",
628+
"13 force go electrostatic call charge magnet \n",
629+
"14 force atom nucleus electron much hydrogen \n",
630+
"15 force scale gravity start weak orbit \n",
506631
"\n",
507-
"Topic 7 8 9 10 \n",
508-
"1 get energy fuse hydrogen \n",
509-
"2 large strength distance electromagnetic \n",
510-
"3 concentration apply kind charge \n",
511-
"4 huge condense mass would \n",
512-
"5 something start several jupiter "
632+
"Topic 7 8 9 10 \n",
633+
"1 get energy fuse hydrogen \n",
634+
"2 large strength distance electromagnetic \n",
635+
"3 concentration apply kind charge \n",
636+
"4 huge condense mass would \n",
637+
"5 something start several jupiter \n",
638+
"6 nucleus coulomb fusion would \n",
639+
"7 provide fusion get hydrogen \n",
640+
"8 nucleon cesium extra get \n",
641+
"10 atom say distance combine \n",
642+
"11 keep atomic detail fuse \n",
643+
"12 distance ten relative next \n",
644+
"13 side coulomb know different \n",
645+
"14 get around coulomb charge \n",
646+
"15 keep fundamental around surprise "
513647
]
514648
},
515649
"execution_count": 6,
@@ -522,7 +656,8 @@
522656
"top_words.columns = range(1, len(top_words.columns) + 1)\n",
523657
"top_words.index = range(1, len(top_words.index) + 1)\n",
524658
"top_words.columns.name = 'Topic'\n",
525-
"top_words.head()"
659+
"top_words = top_words.drop(9)\n",
660+
"top_words"
526661
]
527662
},
528663
{
@@ -540,34 +675,51 @@
540675
"name": "stdout",
541676
"output_type": "stream",
542677
"text": [
543-
"\\begin{tabular}{lllllllllll}\n",
678+
"\\begin{tabular}{r|l|l|l|l|l|l|l|l|l|l}\n",
544679
"\\toprule\n",
545-
"Topic & 1 & 2 & 3 & 4 & 5 & 6 & 7 & 8 & 9 & 10 \\\\\\hline\n",
680+
"\\textbf{Topic} & \\textbf{1} & \\textbf{2} & \\textbf{3} & \\textbf{4} & \\textbf{5} & \\textbf{6} & \\textbf{7} & \\textbf{8} & \\textbf{9} & \\textbf{10} \\\\\\hline\n",
546681
"\\midrule\n",
547-
"1 & star & helium & main & mass & atomic & sequence & get & energy & fuse & hydrogen \\\\\\hline\n",
548-
"2 & charge & force & mass & gravity & strong & attract & large & strength & distance & electromagnetic \\\\\\hline\n",
549-
"3 & huge & force & electromagnetic & macro & way & scale & concentration & apply & kind & charge \\\\\\hline\n",
550-
"4 & atom & dense & go & hydrogen & slow & get & huge & condense & mass & would \\\\\\hline\n",
551-
"5 & fusion & get & threshold & core & occur & mass & something & start & several & jupiter \\\\\\hline\n",
552-
"6 & enough & ignition & proton & force & get & close & nucleus & coulomb & fusion & would \\\\\\hline\n",
553-
"7 & energy & pressure & ignition & mass & little & keep & provide & fusion & get & hydrogen \\\\\\hline\n",
554-
"8 & proton & weak & neutron & interaction & one & go & nucleon & cesium & extra & get \\\\\\hline\n",
555-
"9 & run & kind & say & go & want & would & get & well & give & although \\\\\\hline\n",
556-
"10 & huge & cloud & space & float & imagine & hydrogen & atom & say & distance & combine \\\\\\hline\n",
557-
"11 & one & hydrogen & helium & go & proton & neutron & keep & atomic & detail & fuse \\\\\\hline\n",
558-
"12 & gravity & force & weak & interaction & apply & strength & distance & ten & relative & next \\\\\\hline\n",
559-
"13 & force & go & electrostatic & call & charge & magnet & side & coulomb & know & different \\\\\\hline\n",
560-
"14 & force & atom & nucleus & electron & much & hydrogen & get & around & coulomb & charge \\\\\\hline\n",
561-
"15 & force & scale & gravity & start & weak & orbit & keep & fundamental & around & surprise \\\\\\hline\n",
682+
" 1 & star & helium & main & mass & atomic & sequence & get & energy & fuse & hydrogen \\\\\\hline\n",
683+
" 2 & charge & force & mass & gravity & strong & attract & large & strength & distance & electromagnetic \\\\\\hline\n",
684+
" 3 & huge & force & electromagnetic & macro & way & scale & concentration & apply & kind & charge \\\\\\hline\n",
685+
" 4 & atom & dense & go & hydrogen & slow & get & huge & condense & mass & would \\\\\\hline\n",
686+
" 5 & fusion & get & threshold & core & occur & mass & something & start & several & jupiter \\\\\\hline\n",
687+
" 6 & enough & ignition & proton & force & get & close & nucleus & coulomb & fusion & would \\\\\\hline\n",
688+
" 7 & energy & pressure & ignition & mass & little & keep & provide & fusion & get & hydrogen \\\\\\hline\n",
689+
" 8 & proton & weak & neutron & interaction & one & go & nucleon & cesium & extra & get \\\\\\hline\n",
690+
" 10 & huge & cloud & space & float & imagine & hydrogen & atom & say & distance & combine \\\\\\hline\n",
691+
" 11 & one & hydrogen & helium & go & proton & neutron & keep & atomic & detail & fuse \\\\\\hline\n",
692+
" 12 & gravity & force & weak & interaction & apply & strength & distance & ten & relative & next \\\\\\hline\n",
693+
" 13 & force & go & electrostatic & call & charge & magnet & side & coulomb & know & different \\\\\\hline\n",
694+
" 14 & force & atom & nucleus & electron & much & hydrogen & get & around & coulomb & charge \\\\\\hline\n",
695+
" 15 & force & scale & gravity & start & weak & orbit & keep & fundamental & around & surprise \\\\\\hline\n",
562696
"\\bottomrule\n",
563697
"\\end{tabular}\n",
564698
"\n"
565699
]
566700
}
567701
],
568702
"source": [
569-
"print(top_words.to_latex().replace(r'\\\\', r'\\\\\\hline'))"
703+
"top_words.index.name = top_words.columns.name\n",
704+
"top_words.columns.name = None\n",
705+
"top_words = top_words.reset_index()\n",
706+
"colnames_bold = [f'\\\\textbf{{{c}}}' for c in top_words.columns]\n",
707+
"\n",
708+
"latex_code = top_words.to_latex(index=False, \n",
709+
" na_rep='', \n",
710+
" column_format='r|l|l|l|l|l|l|l|l|l|l', \n",
711+
" escape=False, \n",
712+
" header=colnames_bold)\n",
713+
"print(latex_code.replace(r'\\\\', r'\\\\\\hline'))"
570714
]
715+
},
716+
{
717+
"cell_type": "code",
718+
"execution_count": null,
719+
"id": "b32f4f41",
720+
"metadata": {},
721+
"outputs": [],
722+
"source": []
571723
}
572724
],
573725
"metadata": {

0 commit comments

Comments
 (0)