|
18 | 18 | }, |
19 | 19 | { |
20 | 20 | "cell_type": "code", |
21 | | - "execution_count": 1, |
| 21 | + "execution_count": 5, |
22 | 22 | "id": "a3c58dad-95fa-4fe1-8971-521842ea4182", |
23 | 23 | "metadata": {}, |
24 | 24 | "outputs": [], |
|
38 | 38 | }, |
39 | 39 | { |
40 | 40 | "cell_type": "code", |
41 | | - "execution_count": 2, |
| 41 | + "execution_count": 6, |
42 | 42 | "id": "966418dd-9400-405c-8983-a4714ad51704", |
43 | 43 | "metadata": {}, |
44 | 44 | "outputs": [ |
|
4316 | 4316 | { |
4317 | 4317 | "cell_type": "code", |
4318 | 4318 | "execution_count": null, |
| 4319 | + "id": "326f8a0e-61f5-42fb-871f-aaeb60b9b33a", |
| 4320 | + "metadata": {}, |
| 4321 | + "outputs": [], |
| 4322 | + "source": [] |
| 4323 | + }, |
| 4324 | + { |
| 4325 | + "cell_type": "code", |
| 4326 | + "execution_count": null, |
| 4327 | + "id": "53c5c073-a47e-4d08-99e3-91ee91bb5f64", |
| 4328 | + "metadata": {}, |
| 4329 | + "outputs": [], |
| 4330 | + "source": [] |
| 4331 | + }, |
| 4332 | + { |
| 4333 | + "cell_type": "code", |
| 4334 | + "execution_count": null, |
| 4335 | + "id": "007e3796-3095-42b7-b90b-6c536722a719", |
| 4336 | + "metadata": {}, |
| 4337 | + "outputs": [], |
| 4338 | + "source": [] |
| 4339 | + }, |
| 4340 | + { |
| 4341 | + "cell_type": "code", |
| 4342 | + "execution_count": 1, |
| 4343 | + "id": "f1f616b5-1187-40c5-8736-cd1c8f5eb554", |
| 4344 | + "metadata": {}, |
| 4345 | + "outputs": [], |
| 4346 | + "source": [ |
| 4347 | + "import numpy as np" |
| 4348 | + ] |
| 4349 | + }, |
| 4350 | + { |
| 4351 | + "cell_type": "code", |
| 4352 | + "execution_count": 2, |
4319 | 4353 | "id": "d298a22c-d9fe-44d4-897f-e763d35cb7d9", |
4320 | 4354 | "metadata": {}, |
| 4355 | + "outputs": [ |
| 4356 | + { |
| 4357 | + "data": { |
| 4358 | + "text/plain": [ |
| 4359 | + "array([9148, 9149, 9150, ..., 1937, 1938, 1939])" |
| 4360 | + ] |
| 4361 | + }, |
| 4362 | + "execution_count": 2, |
| 4363 | + "metadata": {}, |
| 4364 | + "output_type": "execute_result" |
| 4365 | + } |
| 4366 | + ], |
| 4367 | + "source": [ |
| 4368 | + "seq_len = 37498\n", |
| 4369 | + "n_reads = 9527-12\n", |
| 4370 | + "read_length = 151\n", |
| 4371 | + "\n", |
| 4372 | + "\n", |
| 4373 | + "df_sim = np.array([int(x) for x in np.random.uniform(low=0.0, high=seq_len, size=n_reads)])\n", |
| 4374 | + "pos = []\n", |
| 4375 | + "for s in df_sim:\n", |
| 4376 | + " for i in range(s, s+read_length):\n", |
| 4377 | + " pos.append(i)\n", |
| 4378 | + "pos = np.array(pos)\n", |
| 4379 | + "pos" |
| 4380 | + ] |
| 4381 | + }, |
| 4382 | + { |
| 4383 | + "cell_type": "code", |
| 4384 | + "execution_count": 30, |
| 4385 | + "id": "35643811-795f-4387-9f9c-d99bfc17e3a8", |
| 4386 | + "metadata": {}, |
| 4387 | + "outputs": [ |
| 4388 | + { |
| 4389 | + "name": "stdout", |
| 4390 | + "output_type": "stream", |
| 4391 | + "text": [ |
| 4392 | + "[15.37810676 13.95450312 14.17387291 10.11706523]\n" |
| 4393 | + ] |
| 4394 | + } |
| 4395 | + ], |
| 4396 | + "source": [ |
| 4397 | + "def depth_per_haplotype(rng, mean_depth, std_depth, n_hap):\n", |
| 4398 | + " if isinstance(mean_depth, np.ndarray):\n", |
| 4399 | + " return mean_depth\n", |
| 4400 | + " else:\n", |
| 4401 | + " dp = np.full((n_hap, ), 0.0)\n", |
| 4402 | + " while (dp <= 0).sum():\n", |
| 4403 | + " n = (dp <= 0).sum()\n", |
| 4404 | + " dp[dp <= 0] = rng.normal(loc = mean_depth, scale = std_depth, size=n)\n", |
| 4405 | + " return dp\n", |
| 4406 | + "\n", |
| 4407 | + "gm = np.array([[0, 0, 1, 0], [1, 1, 0, 1]])\n", |
| 4408 | + "mean_depth = 15\n", |
| 4409 | + "e = 0.05\n", |
| 4410 | + "ploidy = 2\n", |
| 4411 | + "seed = 2\n", |
| 4412 | + "std_depth = 2\n", |
| 4413 | + "\n", |
| 4414 | + "err = np.array([[1-e, e/3, e/3, e/3], [e/3, 1-e, e/3, e/3], [e/3, e/3, 1-e, e/3], [e/3, e/3, e/3, 1-e]])\n", |
| 4415 | + "rng = np.random.default_rng(seed)\n", |
| 4416 | + "#1. Depths (DP) per haplotype (h)\n", |
| 4417 | + "DPh = depth_per_haplotype(rng, mean_depth, std_depth, gm.shape[1])\n", |
| 4418 | + "print(DPh)" |
| 4419 | + ] |
| 4420 | + }, |
| 4421 | + { |
| 4422 | + "cell_type": "code", |
| 4423 | + "execution_count": 10, |
| 4424 | + "id": "566fd8ee-ed0c-49c4-b326-83c6cd7e9aa0", |
| 4425 | + "metadata": {}, |
| 4426 | + "outputs": [ |
| 4427 | + { |
| 4428 | + "data": { |
| 4429 | + "text/plain": [ |
| 4430 | + "array([15.37810676, 13.95450312, 14.17387291, 10.11706523])" |
| 4431 | + ] |
| 4432 | + }, |
| 4433 | + "execution_count": 10, |
| 4434 | + "metadata": {}, |
| 4435 | + "output_type": "execute_result" |
| 4436 | + } |
| 4437 | + ], |
| 4438 | + "source": [ |
| 4439 | + "DPh" |
| 4440 | + ] |
| 4441 | + }, |
| 4442 | + { |
| 4443 | + "cell_type": "code", |
| 4444 | + "execution_count": 57, |
| 4445 | + "id": "259f5a19-f129-4251-beb6-3a9eaac155c4", |
| 4446 | + "metadata": {}, |
| 4447 | + "outputs": [ |
| 4448 | + { |
| 4449 | + "data": { |
| 4450 | + "text/plain": [ |
| 4451 | + "(4, 10)" |
| 4452 | + ] |
| 4453 | + }, |
| 4454 | + "execution_count": 57, |
| 4455 | + "metadata": {}, |
| 4456 | + "output_type": "execute_result" |
| 4457 | + } |
| 4458 | + ], |
| 4459 | + "source": [ |
| 4460 | + "def linked_depth(rng, DPh, read_length, sites_n):\n", |
| 4461 | + " '''\n", |
| 4462 | + " Simulates reads in a contiguous genomic region to compute the depth per position.\n", |
| 4463 | + " \n", |
| 4464 | + " Parameters\n", |
| 4465 | + " ----------\n", |
| 4466 | + " rng : `numpy.random._generator.Generator` \n", |
| 4467 | + " random number generation numpy object\n", |
| 4468 | + " DPh : `numpy.ndarray`\n", |
| 4469 | + " Numpy array with the depth per haplotype\n", |
| 4470 | + " read_length : `int`\n", |
| 4471 | + " Read length in base pair units\n", |
| 4472 | + " sites_n : `int`\n", |
| 4473 | + " number of sites that depth has to be simulated for\n", |
| 4474 | + " \n", |
| 4475 | + " Returns \n", |
| 4476 | + " -------\n", |
| 4477 | + " DP : `numpy.ndarray`\n", |
| 4478 | + " Depth per site per haplotype\n", |
| 4479 | + " '''\n", |
| 4480 | + " DP = []\n", |
| 4481 | + " read_n = ((DPh*sites_n)/read_length).astype(\"int\")\n", |
| 4482 | + " for r in read_n:\n", |
| 4483 | + " dp = np.zeros((sites_n,), dtype=int)\n", |
| 4484 | + " for p in rng.integers(low=0, high=sites_n-read_length+1, size=r):\n", |
| 4485 | + " dp[p:p+read_length] += 1\n", |
| 4486 | + " DP.append(dp.tolist())\n", |
| 4487 | + " return np.array(DP)\n", |
| 4488 | + " \n", |
| 4489 | + "linked_depth(rng, DPh, read_length = 2, sites_n = 10)" |
| 4490 | + ] |
| 4491 | + }, |
| 4492 | + { |
| 4493 | + "cell_type": "code", |
| 4494 | + "execution_count": 26, |
| 4495 | + "id": "08b04ef0-d54f-45f8-bff1-640916061ea3", |
| 4496 | + "metadata": {}, |
| 4497 | + "outputs": [ |
| 4498 | + { |
| 4499 | + "data": { |
| 4500 | + "text/plain": [ |
| 4501 | + "array([5, 4, 1, 1, 1, 3, 7, 9, 4, 3])" |
| 4502 | + ] |
| 4503 | + }, |
| 4504 | + "execution_count": 26, |
| 4505 | + "metadata": {}, |
| 4506 | + "output_type": "execute_result" |
| 4507 | + } |
| 4508 | + ], |
| 4509 | + "source": [ |
| 4510 | + "np.random.randint(low = 0, high = 10, size = 10)" |
| 4511 | + ] |
| 4512 | + }, |
| 4513 | + { |
| 4514 | + "cell_type": "code", |
| 4515 | + "execution_count": 38, |
| 4516 | + "id": "a03db2fd-3480-47fd-aa80-6c41a0f41cc6", |
| 4517 | + "metadata": {}, |
| 4518 | + "outputs": [ |
| 4519 | + { |
| 4520 | + "data": { |
| 4521 | + "text/plain": [ |
| 4522 | + "(10,)" |
| 4523 | + ] |
| 4524 | + }, |
| 4525 | + "execution_count": 38, |
| 4526 | + "metadata": {}, |
| 4527 | + "output_type": "execute_result" |
| 4528 | + } |
| 4529 | + ], |
| 4530 | + "source": [ |
| 4531 | + "np.arange(10).shape" |
| 4532 | + ] |
| 4533 | + }, |
| 4534 | + { |
| 4535 | + "cell_type": "code", |
| 4536 | + "execution_count": 58, |
| 4537 | + "id": "bc2d50fd-39e0-48fd-8088-31dd9c42bbb8", |
| 4538 | + "metadata": {}, |
| 4539 | + "outputs": [ |
| 4540 | + { |
| 4541 | + "data": { |
| 4542 | + "text/plain": [ |
| 4543 | + "numpy.random._generator.Generator" |
| 4544 | + ] |
| 4545 | + }, |
| 4546 | + "execution_count": 58, |
| 4547 | + "metadata": {}, |
| 4548 | + "output_type": "execute_result" |
| 4549 | + } |
| 4550 | + ], |
| 4551 | + "source": [ |
| 4552 | + "type(rng)" |
| 4553 | + ] |
| 4554 | + }, |
| 4555 | + { |
| 4556 | + "cell_type": "code", |
| 4557 | + "execution_count": null, |
| 4558 | + "id": "2a474f22-7871-42cd-8459-ea7197b1284a", |
| 4559 | + "metadata": {}, |
4321 | 4560 | "outputs": [], |
4322 | 4561 | "source": [] |
4323 | 4562 | } |
|
0 commit comments