|
62 | 62 | }, |
63 | 63 | { |
64 | 64 | "cell_type": "code", |
65 | | - "execution_count": 1, |
| 65 | + "execution_count": 2, |
66 | 66 | "metadata": { |
67 | 67 | "ExecuteTime": { |
68 | 68 | "end_time": "2018-05-18T09:17:16.202764Z", |
|
77 | 77 | }, |
78 | 78 | { |
79 | 79 | "cell_type": "code", |
80 | | - "execution_count": 2, |
| 80 | + "execution_count": 3, |
81 | 81 | "metadata": { |
82 | 82 | "ExecuteTime": { |
83 | 83 | "end_time": "2018-05-18T09:17:17.537171Z", |
|
120 | 120 | }, |
121 | 121 | { |
122 | 122 | "cell_type": "code", |
123 | | - "execution_count": 3, |
| 123 | + "execution_count": 4, |
124 | 124 | "metadata": { |
125 | 125 | "ExecuteTime": { |
126 | 126 | "end_time": "2018-05-18T09:17:19.878701Z", |
|
132 | 132 | "name": "stdout", |
133 | 133 | "output_type": "stream", |
134 | 134 | "text": [ |
135 | | - "This is Text-Fabric 4.3.1\n", |
| 135 | + "This is Text-Fabric 5.4.2\n", |
136 | 136 | "Api reference : https://dans-labs.github.io/text-fabric/Api/General/\n", |
137 | 137 | "Tutorial : https://github.com/Dans-labs/text-fabric/blob/master/docs/tutorial.ipynb\n", |
138 | 138 | "Example data : https://github.com/Dans-labs/text-fabric-data\n", |
|
189 | 189 | }, |
190 | 190 | { |
191 | 191 | "cell_type": "code", |
192 | | - "execution_count": 4, |
| 192 | + "execution_count": 10, |
193 | 193 | "metadata": { |
194 | 194 | "ExecuteTime": { |
195 | 195 | "end_time": "2018-05-18T09:17:31.204738Z", |
|
203 | 203 | "output_type": "stream", |
204 | 204 | "text": [ |
205 | 205 | " 0.00s loading features ...\n", |
206 | | - " | 0.15s B g_word from /Users/dirk/github/etcbc/bhsa/tf/2017\n", |
207 | | - " | 0.00s B qere from /Users/dirk/github/etcbc/bhsa/tf/2017\n", |
208 | | - " | 0.00s B qere_trailer from /Users/dirk/github/etcbc/bhsa/tf/2017\n", |
209 | | - " | 0.08s B trailer from /Users/dirk/github/etcbc/bhsa/tf/2017\n", |
210 | | - " | 0.13s B sp from /Users/dirk/github/etcbc/bhsa/tf/2017\n", |
211 | | - " | 0.13s B lex from /Users/dirk/github/etcbc/bhsa/tf/2017\n", |
212 | | - " | 0.01s B voc_lex_utf8 from /Users/dirk/github/etcbc/bhsa/tf/2017\n", |
213 | | - " | 0.13s B language from /Users/dirk/github/etcbc/bhsa/tf/2017\n", |
214 | | - " | 0.09s B freq_lex from /Users/dirk/github/etcbc/bhsa/tf/2017\n", |
215 | | - " | 0.00s B gloss from /Users/dirk/github/etcbc/bhsa/tf/2017\n", |
216 | | - " | 0.20s B mother from /Users/dirk/github/etcbc/bhsa/tf/2017\n", |
217 | | - " 5.68s All features loaded/computed - for details use loadLog()\n" |
| 206 | + " 0.67s All features loaded/computed - for details use loadLog()\n" |
218 | 207 | ] |
219 | 208 | } |
220 | 209 | ], |
221 | 210 | "source": [ |
222 | 211 | "api = TF.load('''\n", |
223 | 212 | " sp lex voc_lex_utf8\n", |
224 | 213 | " g_word trailer\n", |
| 214 | + " g_lex_utf8\n", |
225 | 215 | " qere qere_trailer\n", |
226 | 216 | " language freq_lex gloss\n", |
227 | 217 | " mother\n", |
|
256 | 246 | }, |
257 | 247 | { |
258 | 248 | "cell_type": "code", |
259 | | - "execution_count": 5, |
| 249 | + "execution_count": 11, |
260 | 250 | "metadata": { |
261 | 251 | "ExecuteTime": { |
262 | 252 | "end_time": "2018-05-18T09:17:35.677198Z", |
|
267 | 257 | { |
268 | 258 | "data": { |
269 | 259 | "text/markdown": [ |
270 | | - "**Documentation:** <a target=\"_blank\" href=\"https://etcbc.github.io/bhsa\" title=\"{provenance of this corpus}\">BHSA</a> <a target=\"_blank\" href=\"https://etcbc.github.io/bhsa/features/hebrew/2017/0_home.html\" title=\"{CORPUS} feature documentation\">Feature docs</a> <a target=\"_blank\" href=\"https://dans-labs.github.io/text-fabric/Api/Bhsa/\" title=\"BHSA API documentation\">BHSA API</a> <a target=\"_blank\" href=\"https://dans-labs.github.io/text-fabric/Api/General/\" title=\"text-fabric-api\">Text-Fabric API 4.3.1</a> <a target=\"_blank\" href=\"https://dans-labs.github.io/text-fabric/Api/General/#search-templates\" title=\"Search Templates Introduction and Reference\">Search Reference</a>" |
| 260 | + "**Documentation:** <a target=\"_blank\" href=\"https://etcbc.github.io/bhsa\" title=\"{provenance of this corpus}\">BHSA</a> <a target=\"_blank\" href=\"https://etcbc.github.io/bhsa/features/hebrew/2017/0_home.html\" title=\"{CORPUS.upper()} feature documentation\">Feature docs</a> <a target=\"_blank\" href=\"https://dans-labs.github.io/text-fabric/Api/Bhsa/\" title=\"BHSA API documentation\">BHSA API</a> <a target=\"_blank\" href=\"https://dans-labs.github.io/text-fabric/Api/General/\" title=\"text-fabric-api\">Text-Fabric API 5.4.2</a> <a target=\"_blank\" href=\"https://dans-labs.github.io/text-fabric/Api/General/#search-templates\" title=\"Search Templates Introduction and Reference\">Search Reference</a>" |
271 | 261 | ], |
272 | 262 | "text/plain": [ |
273 | 263 | "<IPython.core.display.Markdown object>" |
|
295 | 285 | "data": { |
296 | 286 | "text/html": [ |
297 | 287 | "\n", |
298 | | - "<style>\n", |
| 288 | + "<style type=\"text/css\">\n", |
299 | 289 | ".verse {\n", |
300 | 290 | " display: flex;\n", |
301 | 291 | " flex-flow: row wrap;\n", |
|
304 | 294 | ".vl {\n", |
305 | 295 | " display: flex;\n", |
306 | 296 | " flex-flow: column nowrap;\n", |
| 297 | + " justify-content: flex-end;\n", |
| 298 | + " align-items: flex-end;\n", |
307 | 299 | " direction: ltr;\n", |
| 300 | + " width: 100%;\n", |
| 301 | + "}\n", |
| 302 | + ".outeritem {\n", |
| 303 | + " display: flex;\n", |
| 304 | + " flex-flow: row wrap;\n", |
| 305 | + " direction: rtl;\n", |
308 | 306 | "}\n", |
309 | 307 | ".sentence,.clause,.phrase {\n", |
310 | 308 | " margin-top: -1.2em;\n", |
|
382 | 380 | " direction: rtl;\n", |
383 | 381 | " background-color: #ffffff;\n", |
384 | 382 | "}\n", |
| 383 | + ".occs {\n", |
| 384 | + " font-size: x-small;\n", |
| 385 | + "}\n", |
385 | 386 | ".satom.l,.catom.l,.patom.l {\n", |
386 | 387 | " border-left-style: dotted\n", |
387 | 388 | "}\n", |
|
394 | 395 | ".satom.R,.catom.R,.patom.R {\n", |
395 | 396 | " border-right-style: none\n", |
396 | 397 | "}\n", |
397 | | - ".h,.h :visited,.h :link {\n", |
| 398 | + ".h,.h a:visited,.h a:link {\n", |
398 | 399 | " font-family: \"Ezra SIL\", \"SBL Hebrew\", sans-serif;\n", |
399 | 400 | " font-size: large;\n", |
400 | 401 | " color: #000044;\n", |
401 | 402 | " direction: rtl;\n", |
402 | 403 | " text-decoration: none;\n", |
403 | 404 | "}\n", |
| 405 | + ".hb,.hb a:visited,.hb a:link {\n", |
| 406 | + " font-family: \"Ezra SIL\", \"SBL Hebrew\", sans-serif;\n", |
| 407 | + " font-size: large;\n", |
| 408 | + " direction: rtl;\n", |
| 409 | + " text-decoration: none;\n", |
| 410 | + "}\n", |
404 | 411 | ".rela,.function,.typ {\n", |
405 | 412 | " font-family: monospace;\n", |
406 | 413 | " font-size: small;\n", |
407 | 414 | " color: #0000bb;\n", |
408 | 415 | "}\n", |
409 | | - ".sp,.sp :visited,.sp :link {\n", |
| 416 | + ".pdp,.pdp a:visited,.pdp a:link {\n", |
410 | 417 | " font-family: monospace;\n", |
411 | 418 | " font-size: medium;\n", |
412 | 419 | " color: #0000bb;\n", |
413 | 420 | " text-decoration: none;\n", |
414 | 421 | "}\n", |
415 | | - ".vl {\n", |
| 422 | + ".voc_lex {\n", |
416 | 423 | " font-family: monospace;\n", |
417 | 424 | " font-size: medium;\n", |
418 | 425 | " color: #0000bb;\n", |
419 | 426 | "}\n", |
420 | | - ".vvs {\n", |
| 427 | + ".vs {\n", |
421 | 428 | " font-family: monospace;\n", |
422 | 429 | " font-size: medium;\n", |
423 | 430 | " font-weight: bold;\n", |
424 | 431 | " color: #0000bb;\n", |
425 | 432 | "}\n", |
426 | | - ".vvt {\n", |
| 433 | + ".vt {\n", |
427 | 434 | " font-family: monospace;\n", |
428 | 435 | " font-size: medium;\n", |
| 436 | + " font-weight: bold;\n", |
429 | 437 | " color: #0000bb;\n", |
430 | 438 | "}\n", |
431 | | - ".gl {\n", |
| 439 | + ".gloss {\n", |
432 | 440 | " font-family: sans-serif;\n", |
433 | 441 | " font-size: small;\n", |
| 442 | + " font-weight: normal;\n", |
434 | 443 | " color: #aaaaaa;\n", |
435 | 444 | "}\n", |
436 | | - ".vs {\n", |
| 445 | + ".vrs {\n", |
437 | 446 | " font-family: sans-serif;\n", |
438 | 447 | " font-size: small;\n", |
439 | 448 | " font-weight: bold;\n", |
|
444 | 453 | " font-size: x-small;\n", |
445 | 454 | " color: #999999;\n", |
446 | 455 | "}\n", |
447 | | - ".feat {\n", |
| 456 | + ".features {\n", |
448 | 457 | " font-family: monospace;\n", |
449 | 458 | " font-size: medium;\n", |
450 | 459 | " font-weight: bold;\n", |
451 | 460 | " color: #0a6611;\n", |
| 461 | + " display: flex;\n", |
| 462 | + " flex-flow: column nowrap;\n", |
| 463 | + " padding: 0.1em;\n", |
| 464 | + " margin: 0.1em;\n", |
452 | 465 | "}\n", |
453 | | - ".feat .f {\n", |
| 466 | + ".features .f {\n", |
454 | 467 | " font-family: sans-serif;\n", |
455 | 468 | " font-size: x-small;\n", |
| 469 | + " font-weight: normal;\n", |
456 | 470 | " color: #5555bb;\n", |
457 | 471 | "}\n", |
| 472 | + ".word .features div,.word .features span {\n", |
| 473 | + " padding: 0;\n", |
| 474 | + " margin: -0.1rem 0;\n", |
| 475 | + "}\n", |
| 476 | + "\n", |
458 | 477 | ".hl {\n", |
459 | 478 | " background-color: #ffee66;\n", |
460 | 479 | "}\n", |
|
1844 | 1863 | "A five minute wait is not pleasant in interactive computing!" |
1845 | 1864 | ] |
1846 | 1865 | }, |
| 1866 | + { |
| 1867 | + "cell_type": "markdown", |
| 1868 | + "metadata": {}, |
| 1869 | + "source": [ |
| 1870 | + "### A frequency mapping of lexemes\n", |
| 1871 | + "\n", |
| 1872 | + "We make a mapping between lexeme forms and the number of occurrences of those lexemes." |
| 1873 | + ] |
| 1874 | + }, |
| 1875 | + { |
| 1876 | + "cell_type": "code", |
| 1877 | + "execution_count": 17, |
| 1878 | + "metadata": {}, |
| 1879 | + "outputs": [], |
| 1880 | + "source": [ |
| 1881 | + "lexeme_dict = {\n", |
| 1882 | + " F.g_lex_utf8.v(n): F.freq_lex.v(n) \n", |
| 1883 | + " for n in F.otype.s('word')\n", |
| 1884 | + "}" |
| 1885 | + ] |
| 1886 | + }, |
| 1887 | + { |
| 1888 | + "cell_type": "code", |
| 1889 | + "execution_count": 18, |
| 1890 | + "metadata": {}, |
| 1891 | + "outputs": [ |
| 1892 | + { |
| 1893 | + "data": { |
| 1894 | + "text/plain": [ |
| 1895 | + "[('בְּ', 15542),\n", |
| 1896 | + " ('רֵאשִׁית', 51),\n", |
| 1897 | + " ('בָּרָא', 48),\n", |
| 1898 | + " ('אֱלֹה', 2601),\n", |
| 1899 | + " ('אֵת', 10997),\n", |
| 1900 | + " ('הַ', 30386),\n", |
| 1901 | + " ('שָּׁמַי', 421),\n", |
| 1902 | + " ('וְ', 50272),\n", |
| 1903 | + " ('הָ', 30386),\n", |
| 1904 | + " ('אָרֶץ', 2504)]" |
| 1905 | + ] |
| 1906 | + }, |
| 1907 | + "execution_count": 18, |
| 1908 | + "metadata": {}, |
| 1909 | + "output_type": "execute_result" |
| 1910 | + } |
| 1911 | + ], |
| 1912 | + "source": [ |
| 1913 | + "list(lexeme_dict.items())[0:10]" |
| 1914 | + ] |
| 1915 | + }, |
| 1916 | + { |
| 1917 | + "cell_type": "markdown", |
| 1918 | + "metadata": {}, |
| 1919 | + "source": [ |
| 1920 | + "### Real work\n", |
| 1921 | + "\n", |
| 1922 | + "As a primer of real world work on lexeme distribution, have a look at James Cuénod's notebook on \n", |
| 1923 | + "[Collocation MI Analysis of the Hebrew Bible](https://nbviewer.jupyter.org/github/jcuenod/hebrewCollocations/blob/master/Collocation%20MI%20Analysis%20of%20the%20Hebrew%20Bible.ipynb)\n", |
| 1924 | + "\n", |
| 1925 | + "It is a nice example how you collect data with TF API calls, then do research with your own methods and tools, and then use TF for presenting results.\n", |
| 1926 | + "\n", |
| 1927 | + "In case the name has changed, the enclosing repo is\n", |
| 1928 | + "[here](https://nbviewer.jupyter.org/github/jcuenod/hebrewCollocations/tree/master/)." |
| 1929 | + ] |
| 1930 | + }, |
1847 | 1931 | { |
1848 | 1932 | "cell_type": "markdown", |
1849 | 1933 | "metadata": {}, |
|
5085 | 5169 | "name": "python", |
5086 | 5170 | "nbconvert_exporter": "python", |
5087 | 5171 | "pygments_lexer": "ipython3", |
5088 | | - "version": "3.6.5" |
| 5172 | + "version": "3.7.0" |
5089 | 5173 | }, |
5090 | 5174 | "toc": { |
5091 | 5175 | "base_numbering": 1, |
|
0 commit comments