diff --git a/pdbtools/pdb_selaltloc.py b/pdbtools/pdb_selaltloc.py index 7067672..a8ca489 100644 --- a/pdbtools/pdb_selaltloc.py +++ b/pdbtools/pdb_selaltloc.py @@ -307,15 +307,30 @@ def flush_resloc_occ(altloc_lines, **kw): # detects which altloc identifier has the highest occupancy for key, lines2flush in altloc_lines.items(): + if key == ' ': + continue # we check only the first line because all atoms in one identifier # should have the same occupancy value occ = float(lines2flush[0][54:60]) + if occ > highest: altloc = key highest = occ - for line2flush in altloc_lines[altloc]: - yield line2flush[:16] + ' ' + line2flush[17:] + if ' ' in altloc_lines.keys(): + # here we concatenate atoms with no altloc with the + # atoms with the highest altloc + output_lines = {' ': altloc_lines[' '] + altloc_lines[altloc]} + sorted_atoms = _get_sort_atoms(output_lines) + for atom, linet in sorted_atoms: + lines = linet[1] + for line in lines: + yield line[:16] + ' ' + line[17:] + else: + # only alternate locations. Just yield all those lines + output_lines = altloc_lines[altloc] + for line2flush in altloc_lines[altloc]: + yield line2flush[:16] + ' ' + line2flush[17:] def flush_resloc_id_same_residue(selloc, altloc_lines): diff --git a/tests/data/pdb4xoj-small.pdb b/tests/data/pdb4xoj-small.pdb new file mode 100644 index 0000000..340f13a --- /dev/null +++ b/tests/data/pdb4xoj-small.pdb @@ -0,0 +1,120 @@ +ATOM 511 N SER A 81 -2.125 6.104 21.775 1.00 8.48 N +ANISOU 511 N SER A 81 1227 964 1028 -36 224 13 N +ATOM 512 CA SER A 81 -1.772 4.741 22.205 1.00 8.74 C +ANISOU 512 CA SER A 81 1233 1007 1077 11 199 56 C +ATOM 513 C SER A 81 -2.262 3.698 21.215 1.00 8.42 C +ANISOU 513 C SER A 81 1167 989 1041 -75 90 138 C +ATOM 514 O SER A 81 -3.353 3.829 20.638 1.00 9.44 O +ANISOU 514 O SER A 81 1153 1058 1375 -48 41 62 O +ATOM 515 CB SER A 81 -2.424 4.479 23.564 1.00 10.45 C +ANISOU 515 CB SER A 81 1683 1191 1094 -49 249 19 C +ATOM 516 OG SER A 81 -1.889 5.387 24.506 1.00 13.63 O +ANISOU 516 OG SER A 81 2449 1531 1199 -52 258 -6 O +ATOM 517 N AALA A 82 -1.494 2.619 21.079 0.50 8.56 N +ANISOU 517 N AALA A 82 1205 991 1055 55 81 1 N +ATOM 518 N BALA A 82 -1.502 2.619 21.093 0.50 8.57 N +ANISOU 518 N BALA A 82 1221 989 1047 51 73 3 N +ATOM 519 CA AALA A 82 -2.054 1.440 20.442 0.50 8.66 C +ANISOU 519 CA AALA A 82 1292 950 1046 -2 73 22 C +ATOM 520 CA BALA A 82 -2.004 1.455 20.381 0.50 9.05 C +ANISOU 520 CA BALA A 82 1452 961 1024 -102 -44 33 C +ATOM 521 C AALA A 82 -3.189 0.918 21.328 0.50 8.99 C +ANISOU 521 C AALA A 82 1375 993 1045 73 344 138 C +ATOM 522 C BALA A 82 -3.028 0.721 21.197 0.50 9.88 C +ANISOU 522 C BALA A 82 1773 960 1018 -222 -222 -32 C +ATOM 523 O AALA A 82 -3.041 0.736 22.520 0.80 10.64 O +ANISOU 523 O AALA A 82 1631 1389 1020 -117 191 98 O +ATOM 524 O BALA A 82 -2.558 0.035 22.148 0.20 9.90 O +ANISOU 524 O BALA A 82 1963 1076 722 -39 43 189 O +ATOM 525 CB AALA A 82 -0.983 0.385 20.289 0.50 8.83 C +ANISOU 525 CB AALA A 82 1351 1028 974 41 216 30 C +ATOM 526 CB BALA A 82 -0.809 0.503 20.120 0.50 11.15 C +ANISOU 526 CB BALA A 82 1719 1117 1399 107 -281 -110 C +ATOM 527 N SER A 83 -4.261 0.519 20.678 1.00 9.47 N +ANISOU 527 N SER A 83 1432 1102 1061 -125 244 55 N +ATOM 528 CA ASER A 83 -5.308 -0.275 21.345 0.70 10.42 C +ANISOU 528 CA ASER A 83 1524 1180 1256 61 497 220 C +ATOM 529 CA BSER A 83 -5.296 -0.225 21.292 0.30 9.99 C +ANISOU 529 CA BSER A 83 1473 1149 1171 -81 344 -232 C +ATOM 530 C SER A 83 -5.324 -1.728 20.924 1.00 10.39 C +ANISOU 530 C SER A 83 1457 1076 1412 -6 466 118 C +ATOM 531 O SER A 83 -5.811 -2.551 21.683 1.00 12.89 O +ANISOU 531 O SER A 83 2075 1194 1628 -49 830 133 O +ATOM 532 CB ASER A 83 -6.691 0.285 21.121 0.70 11.38 C +ANISOU 532 CB ASER A 83 1538 1167 1615 116 448 25 C +ATOM 533 CB BSER A 83 -6.610 0.424 20.946 0.30 12.23 C +ANISOU 533 CB BSER A 83 1463 1397 1784 -157 274 -151 C +ATOM 534 OG ASER A 83 -6.965 0.397 19.729 0.70 11.09 O +ANISOU 534 OG ASER A 83 1338 1455 1418 106 285 72 O +ATOM 535 OG BSER A 83 -7.621 -0.353 21.495 0.30 20.97 O +ANISOU 535 OG BSER A 83 2290 2191 3486 -53 1535 585 O +ATOM 536 N LYS A 84 -4.827 -2.055 19.735 1.00 9.68 N +ANISOU 536 N LYS A 84 1326 1010 1339 101 429 46 N +ATOM 537 CA ALYS A 84 -4.644 -3.431 19.279 0.70 10.05 C +ANISOU 537 CA ALYS A 84 1522 1007 1287 42 336 144 C +ATOM 538 CA BLYS A 84 -4.643 -3.451 19.329 0.30 10.14 C +ANISOU 538 CA BLYS A 84 1491 993 1368 139 550 68 C +ATOM 539 C LYS A 84 -3.278 -3.508 18.640 1.00 9.20 C +ANISOU 539 C LYS A 84 1375 1055 1064 95 218 67 C +ATOM 540 O LYS A 84 -2.951 -2.592 17.884 1.00 9.59 O +ANISOU 540 O LYS A 84 1298 1105 1241 165 244 147 O +ATOM 541 CB ALYS A 84 -5.744 -3.907 18.283 0.70 10.32 C +ANISOU 541 CB ALYS A 84 1408 1097 1416 -68 49 -77 C +ATOM 542 CB BLYS A 84 -5.751 -3.940 18.359 0.30 11.80 C +ANISOU 542 CB BLYS A 84 1533 1237 1712 95 111 84 C +ATOM 543 CG ALYS A 84 -7.150 -3.728 18.755 0.70 12.75 C +ANISOU 543 CG ALYS A 84 1483 1417 1942 -138 412 254 C +ATOM 544 CG BLYS A 84 -7.189 -3.845 18.791 0.30 12.58 C +ANISOU 544 CG BLYS A 84 1623 1474 1683 204 124 -240 C +ATOM 545 CD ALYS A 84 -8.204 -4.287 17.849 0.70 14.48 C +ANISOU 545 CD ALYS A 84 1679 1886 1936 -324 273 150 C +ATOM 546 CD BLYS A 84 -8.139 -4.069 17.666 0.30 12.72 C +ANISOU 546 CD BLYS A 84 1610 888 2335 -136 -285 -181 C +ATOM 547 CE ALYS A 84 -9.569 -4.039 18.445 0.70 21.19 C +ANISOU 547 CE ALYS A 84 1246 3570 3235 -471 223 759 C +ATOM 548 CE BLYS A 84 -9.604 -4.051 18.097 0.30 16.57 C +ANISOU 548 CE BLYS A 84 2201 1973 2122 -543 402 -985 C +ATOM 549 NZ ALYS A 84 -10.614 -4.841 17.764 0.70 22.64 N +ANISOU 549 NZ ALYS A 84 2289 3604 2707 -1098 686 -21 N +ATOM 550 NZ BLYS A 84 -9.825 -4.761 19.395 0.30 16.91 N +ANISOU 550 NZ BLYS A 84 1927 1989 2508 -957 607 -278 N +ATOM 551 N ASER A 85 -2.563 -4.588 18.881 0.50 9.32 N +ANISOU 551 N ASER A 85 1536 976 1028 171 216 66 N +ATOM 552 N BSER A 85 -2.513 -4.566 18.859 0.50 9.48 N +ANISOU 552 N BSER A 85 1610 922 1068 188 342 63 N +ATOM 553 CA ASER A 85 -1.332 -4.882 18.202 0.50 8.74 C +ANISOU 553 CA ASER A 85 1455 1080 786 116 49 -76 C +ATOM 554 CA BSER A 85 -1.229 -4.752 18.163 0.50 9.42 C +ANISOU 554 CA BSER A 85 1467 1050 1060 398 267 26 C +ATOM 555 C ASER A 85 -1.483 -6.317 17.695 0.50 9.92 C +ANISOU 555 C ASER A 85 1752 836 1179 282 208 -106 C +ATOM 556 C BSER A 85 -1.244 -6.199 17.691 0.50 8.70 C +ANISOU 556 C BSER A 85 1254 1236 813 315 160 171 C +ATOM 557 O ASER A 85 -1.652 -7.279 18.484 0.50 11.61 O +ANISOU 557 O ASER A 85 2551 941 916 222 215 86 O +ATOM 558 O BSER A 85 -1.048 -7.069 18.528 0.50 10.44 O +ANISOU 558 O BSER A 85 1973 1010 981 389 -113 130 O +ATOM 559 CB ASER A 85 -0.160 -4.700 19.129 0.50 12.48 C +ANISOU 559 CB ASER A 85 1350 1725 1666 185 -278 -2 C +ATOM 560 CB BSER A 85 -0.049 -4.451 19.125 0.50 12.99 C +ANISOU 560 CB BSER A 85 2150 1600 1183 804 -217 -517 C +ATOM 561 OG ASER A 85 -0.049 -3.356 19.526 0.50 16.00 O +ANISOU 561 OG ASER A 85 1752 2069 2256 354 -901 -970 O +ATOM 562 OG BSER A 85 -0.099 -3.127 19.634 0.50 16.22 O +ANISOU 562 OG BSER A 85 2356 1937 1869 867 -163 -876 O +ATOM 563 N ILE A 86 -1.504 -6.433 16.390 1.00 7.88 N +ANISOU 563 N ILE A 86 1184 936 872 61 178 65 N +ATOM 564 CA ILE A 86 -1.872 -7.694 15.783 1.00 7.95 C +ANISOU 564 CA ILE A 86 1192 863 963 -21 192 154 C +ATOM 565 C ILE A 86 -0.722 -8.196 14.932 1.00 7.58 C +ANISOU 565 C ILE A 86 1181 748 949 -103 205 87 C +ATOM 566 O ILE A 86 -0.535 -7.767 13.777 1.00 8.17 O +ANISOU 566 O ILE A 86 1249 887 967 -55 242 135 O +ATOM 567 CB ILE A 86 -3.164 -7.571 14.969 1.00 8.30 C +ANISOU 567 CB ILE A 86 1190 920 1043 -32 175 224 C +ATOM 568 CG1 ILE A 86 -4.302 -7.000 15.799 1.00 9.14 C +ANISOU 568 CG1 ILE A 86 1182 1078 1211 15 159 194 C +ATOM 569 CG2 ILE A 86 -3.524 -8.934 14.351 1.00 8.71 C +ANISOU 569 CG2 ILE A 86 1228 984 1097 -91 80 176 C +ATOM 570 CD1 ILE A 86 -5.630 -6.861 15.071 1.00 10.85 C +ANISOU 570 CD1 ILE A 86 1307 1331 1482 31 162 271 C \ No newline at end of file diff --git a/tests/test_pdb_selaltloc.py b/tests/test_pdb_selaltloc.py index 085376a..59bab97 100644 --- a/tests/test_pdb_selaltloc.py +++ b/tests/test_pdb_selaltloc.py @@ -869,6 +869,105 @@ def test_captures_previous_residue_maxocc_B(self): "ATOM 201 CB GLY A 22 -7.405 -25.428 33.847 0.60 0.00 C ", ] ) + + def test_handle_multiple_residues(self): + """ + pdb_selaltloc data/pdb4xoj-small.pdb. + + The test checks that the SER85 residue is not removed by pdb_selaltloc + as it happened until version 2.5.0. + """ + sys.argv = ['', os.path.join(data_dir, 'pdb4xoj-small.pdb')] + print("executing") + self.exec_module() + self.assertEqual(self.retcode, 0) + self.assertEqual(len(self.stdout), 80) + self.assertEqual(len(self.stderr), 0) + self.assertEqual( + self.stdout, + [ + "ATOM 511 N SER A 81 -2.125 6.104 21.775 1.00 8.48 N ", + "ANISOU 511 N SER A 81 1227 964 1028 -36 224 13 N ", + "ATOM 512 CA SER A 81 -1.772 4.741 22.205 1.00 8.74 C ", + "ANISOU 512 CA SER A 81 1233 1007 1077 11 199 56 C ", + "ATOM 513 C SER A 81 -2.262 3.698 21.215 1.00 8.42 C ", + "ANISOU 513 C SER A 81 1167 989 1041 -75 90 138 C ", + "ATOM 514 O SER A 81 -3.353 3.829 20.638 1.00 9.44 O ", + "ANISOU 514 O SER A 81 1153 1058 1375 -48 41 62 O ", + "ATOM 515 CB SER A 81 -2.424 4.479 23.564 1.00 10.45 C ", + "ANISOU 515 CB SER A 81 1683 1191 1094 -49 249 19 C ", + "ATOM 516 OG SER A 81 -1.889 5.387 24.506 1.00 13.63 O ", + "ANISOU 516 OG SER A 81 2449 1531 1199 -52 258 -6 O ", + "ATOM 517 N ALA A 82 -1.494 2.619 21.079 0.50 8.56 N ", + "ANISOU 517 N ALA A 82 1205 991 1055 55 81 1 N ", + "ATOM 519 CA ALA A 82 -2.054 1.440 20.442 0.50 8.66 C ", + "ANISOU 519 CA ALA A 82 1292 950 1046 -2 73 22 C ", + "ATOM 521 C ALA A 82 -3.189 0.918 21.328 0.50 8.99 C ", + "ANISOU 521 C ALA A 82 1375 993 1045 73 344 138 C ", + "ATOM 523 O ALA A 82 -3.041 0.736 22.520 0.80 10.64 O ", + "ANISOU 523 O ALA A 82 1631 1389 1020 -117 191 98 O ", + "ATOM 525 CB ALA A 82 -0.983 0.385 20.289 0.50 8.83 C ", + "ANISOU 525 CB ALA A 82 1351 1028 974 41 216 30 C ", + "ATOM 527 N SER A 83 -4.261 0.519 20.678 1.00 9.47 N ", + "ANISOU 527 N SER A 83 1432 1102 1061 -125 244 55 N ", + "ATOM 528 CA SER A 83 -5.308 -0.275 21.345 0.70 10.42 C ", + "ANISOU 528 CA SER A 83 1524 1180 1256 61 497 220 C ", + "ATOM 530 C SER A 83 -5.324 -1.728 20.924 1.00 10.39 C ", + "ANISOU 530 C SER A 83 1457 1076 1412 -6 466 118 C ", + "ATOM 531 O SER A 83 -5.811 -2.551 21.683 1.00 12.89 O ", + "ANISOU 531 O SER A 83 2075 1194 1628 -49 830 133 O ", + "ATOM 532 CB SER A 83 -6.691 0.285 21.121 0.70 11.38 C ", + "ANISOU 532 CB SER A 83 1538 1167 1615 116 448 25 C ", + "ATOM 534 OG SER A 83 -6.965 0.397 19.729 0.70 11.09 O ", + "ANISOU 534 OG SER A 83 1338 1455 1418 106 285 72 O ", + "ATOM 536 N LYS A 84 -4.827 -2.055 19.735 1.00 9.68 N ", + "ANISOU 536 N LYS A 84 1326 1010 1339 101 429 46 N ", + "ATOM 537 CA LYS A 84 -4.644 -3.431 19.279 0.70 10.05 C ", + "ANISOU 537 CA LYS A 84 1522 1007 1287 42 336 144 C ", + "ATOM 539 C LYS A 84 -3.278 -3.508 18.640 1.00 9.20 C ", + "ANISOU 539 C LYS A 84 1375 1055 1064 95 218 67 C ", + "ATOM 540 O LYS A 84 -2.951 -2.592 17.884 1.00 9.59 O ", + "ANISOU 540 O LYS A 84 1298 1105 1241 165 244 147 O ", + "ATOM 541 CB LYS A 84 -5.744 -3.907 18.283 0.70 10.32 C ", + "ANISOU 541 CB LYS A 84 1408 1097 1416 -68 49 -77 C ", + "ATOM 543 CG LYS A 84 -7.150 -3.728 18.755 0.70 12.75 C ", + "ANISOU 543 CG LYS A 84 1483 1417 1942 -138 412 254 C ", + "ATOM 545 CD LYS A 84 -8.204 -4.287 17.849 0.70 14.48 C ", + "ANISOU 545 CD LYS A 84 1679 1886 1936 -324 273 150 C ", + "ATOM 547 CE LYS A 84 -9.569 -4.039 18.445 0.70 21.19 C ", + "ANISOU 547 CE LYS A 84 1246 3570 3235 -471 223 759 C ", + "ATOM 549 NZ LYS A 84 -10.614 -4.841 17.764 0.70 22.64 N ", + "ANISOU 549 NZ LYS A 84 2289 3604 2707 -1098 686 -21 N ", + "ATOM 551 N SER A 85 -2.563 -4.588 18.881 0.50 9.32 N ", + "ANISOU 551 N SER A 85 1536 976 1028 171 216 66 N ", + "ATOM 553 CA SER A 85 -1.332 -4.882 18.202 0.50 8.74 C ", + "ANISOU 553 CA SER A 85 1455 1080 786 116 49 -76 C ", + "ATOM 555 C SER A 85 -1.483 -6.317 17.695 0.50 9.92 C ", + "ANISOU 555 C SER A 85 1752 836 1179 282 208 -106 C ", + "ATOM 557 O SER A 85 -1.652 -7.279 18.484 0.50 11.61 O ", + "ANISOU 557 O SER A 85 2551 941 916 222 215 86 O ", + "ATOM 559 CB SER A 85 -0.160 -4.700 19.129 0.50 12.48 C ", + "ANISOU 559 CB SER A 85 1350 1725 1666 185 -278 -2 C ", + "ATOM 561 OG SER A 85 -0.049 -3.356 19.526 0.50 16.00 O ", + "ANISOU 561 OG SER A 85 1752 2069 2256 354 -901 -970 O ", + "ATOM 563 N ILE A 86 -1.504 -6.433 16.390 1.00 7.88 N ", + "ANISOU 563 N ILE A 86 1184 936 872 61 178 65 N ", + "ATOM 564 CA ILE A 86 -1.872 -7.694 15.783 1.00 7.95 C ", + "ANISOU 564 CA ILE A 86 1192 863 963 -21 192 154 C ", + "ATOM 565 C ILE A 86 -0.722 -8.196 14.932 1.00 7.58 C ", + "ANISOU 565 C ILE A 86 1181 748 949 -103 205 87 C ", + "ATOM 566 O ILE A 86 -0.535 -7.767 13.777 1.00 8.17 O ", + "ANISOU 566 O ILE A 86 1249 887 967 -55 242 135 O ", + "ATOM 567 CB ILE A 86 -3.164 -7.571 14.969 1.00 8.30 C ", + "ANISOU 567 CB ILE A 86 1190 920 1043 -32 175 224 C ", + "ATOM 568 CG1 ILE A 86 -4.302 -7.000 15.799 1.00 9.14 C ", + "ANISOU 568 CG1 ILE A 86 1182 1078 1211 15 159 194 C ", + "ATOM 569 CG2 ILE A 86 -3.524 -8.934 14.351 1.00 8.71 C ", + "ANISOU 569 CG2 ILE A 86 1228 984 1097 -91 80 176 C ", + "ATOM 570 CD1 ILE A 86 -5.630 -6.861 15.071 1.00 10.85 C ", + "ANISOU 570 CD1 ILE A 86 1307 1331 1482 31 162 271 C " + ] + ) def test_file_not_found(self):