Skip to content

Commit 0102417

Browse files
committed
Add tests for executable IO
1 parent f41d9e6 commit 0102417

File tree

2 files changed

+102
-0
lines changed

2 files changed

+102
-0
lines changed
75.6 KB
Binary file not shown.
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
import re
2+
import pytest
3+
from pathlib import Path
4+
from helpers import parse_inchi_from_executable_output
5+
from sdf_pipeline.utils import select_records_from_gzipped_sdf
6+
7+
8+
ID_PATTERN = re.compile(r"> <ID>\n(.*?)\n\$\$\$\$", re.DOTALL)
9+
10+
11+
def get_molfile_id(molfile: str) -> str:
12+
id_match = ID_PATTERN.search(molfile)
13+
14+
return id_match.group(1).strip() if id_match else ""
15+
16+
17+
def get_record(sdf_path: Path, molfile_id: str) -> str:
18+
_, molfile = next(
19+
select_records_from_gzipped_sdf(sdf_path, {molfile_id}, get_molfile_id)
20+
)
21+
22+
return molfile
23+
24+
25+
@pytest.fixture
26+
def sdf_path():
27+
return Path(__file__).parent.absolute().joinpath("data/test_io.sdf.gz")
28+
29+
30+
@pytest.fixture
31+
def molfile_v3000_scsr_extension(sdf_path):
32+
return get_record(sdf_path, "001")
33+
34+
35+
@pytest.fixture
36+
def molfile_v3000_more_than_999_atoms_and_bonds(sdf_path):
37+
return get_record(sdf_path, "002")
38+
39+
40+
@pytest.fixture
41+
def molfile_v3000_more_than_999_bonds(sdf_path):
42+
return get_record(sdf_path, "003")
43+
44+
45+
@pytest.fixture
46+
def molfile_v3000_with_999_atoms(sdf_path):
47+
return get_record(sdf_path, "004")
48+
49+
50+
def test_executable_rejects_scsr_extension(molfile_v3000_scsr_extension, run_inchi_exe):
51+
result = run_inchi_exe(molfile_v3000_scsr_extension)
52+
53+
assert "Error 190 (no InChI; Unknown element(s): Thr) inp" in result.stderr
54+
55+
56+
def test_executable_rejects_more_than_999_atoms(
57+
molfile_v3000_more_than_999_atoms_and_bonds, run_inchi_exe
58+
):
59+
result = run_inchi_exe(molfile_v3000_more_than_999_atoms_and_bonds)
60+
61+
assert (
62+
"Error 170 (no InChI; V3000 enhanced stereo read/stored but ignored; Too many atoms [did you forget 'LargeMolecules' switch?])"
63+
in result.stderr
64+
)
65+
66+
67+
def test_executable_accepts_more_than_999_atoms_with_flag(
68+
molfile_v3000_more_than_999_atoms_and_bonds, run_inchi_exe
69+
):
70+
result = run_inchi_exe(
71+
molfile_v3000_more_than_999_atoms_and_bonds, "-LargeMolecules"
72+
)
73+
74+
assert (
75+
"Error 170 (no InChI; V3000 enhanced stereo read/stored but ignored; Too many atoms [did you forget 'LargeMolecules' switch?])"
76+
not in result.stderr
77+
)
78+
assert "Experimental mode: Up to 32766 atoms per structure" in result.stderr
79+
80+
81+
@pytest.mark.xfail(strict=True, raises=AssertionError)
82+
def test_executable_writes_v3000(molfile_v3000_more_than_999_bonds, run_inchi_exe):
83+
result = run_inchi_exe(molfile_v3000_more_than_999_bonds)
84+
inchi = parse_inchi_from_executable_output(result.output)
85+
re_result = run_inchi_exe(inchi, "-InChI2Struct -OutputSDF")
86+
87+
molfile = re_result.output.split("\n")
88+
version_line = molfile[3]
89+
counts_line = molfile[5]
90+
91+
assert "v3000" in version_line
92+
assert "COUNTS 989 1014" in counts_line
93+
94+
95+
def test_executable_accepts_999_atoms(molfile_v3000_with_999_atoms, run_inchi_exe):
96+
result = run_inchi_exe(molfile_v3000_with_999_atoms)
97+
inchi = parse_inchi_from_executable_output(result.output)
98+
99+
assert (
100+
inchi
101+
== "InChI=1S/C611H958N170O180S9Se3/c1-74-287(28)460(579(933)677-317(58)492(846)705-378(216-282(18)19)524(878)708-379(217-283(20)21)525(879)709-382(221-333-153-155-344(805)156-154-333)538(892)769-476(320(61)794)594(948)697-359(144-108-118-195-652-575(929)457-308(49)183-208-643-457)502(856)733-407(256-784)549(903)692-366(158-168-437(817)818)508(862)688-372(164-174-443(829)830)515(869)753-462(289(30)76-3)582(936)722-383(220-332-129-93-90-94-130-332)539(893)770-479(323(64)797)599(953)726-389(227-339-248-632-276-665-339)527(881)690-373(165-175-444(831)832)516(870)754-463(290(31)77-4)584(938)730-399(237-432(618)811)534(888)746-417(266-964)559(913)713-392(230-342-251-635-279-668-342)543(897)762-473(300(41)87-14)592(946)775-481(325(66)799)598(952)698-360(145-109-119-196-653-576(930)458-309(50)184-209-644-458)503(857)735-410(259-787)553(907)771-478(322(63)796)596(950)700-364(150-124-201-658-611(628)629)506(860)750-424(273-973)565(919)747-421(270-968)562(916)774-480(324(65)798)604(958)748-418(267-965)557(911)687-356(141-105-115-192-649-572(926)454-305(46)180-205-640-454)499(853)715-400(238-433(619)812)545(899)752-450(285(24)25)578(932)701-370(162-172-441(825)826)509(863)683-363(149-123-200-657-610(626)627)504(858)734-409(258-786)551(905)764-461(288(29)75-2)580(934)696-358(143-107-117-194-651-574(928)456-307(48)182-207-642-456)500(854)714-395(233-428(614)807)531(885)711-384(222-334-243-659-348-134-98-95-131-345(334)348)526(880)685-354(139-103-113-190-647-570(924)452-303(44)178-203-638-452)496(850)682-362(148-122-199-656-609(624)625)497(851)681-353(137-101-111-188-612)501(855)732-405(254-782)547(901)675-315(56)491(845)707-394(232-427(613)806)530(884)718-402(240-446(835)836)519(873)670-286(26)27)766-561(915)420(269-967)744-513(867)369(161-171-440(823)824)695-567(921)426-152-126-212-781(426)607(961)414(263-791)741-512(866)368(160-170-439(821)822)693-550(904)408(257-785)737-535(889)403(241-447(837)838)719-532(886)396(234-429(615)808)716-563(917)422(271-971)749-505(859)357(142-106-116-193-650-573(927)455-306(47)181-206-641-455)694-566(920)425-151-125-211-780(425)606(960)376(187-214-970-73)703-498(852)355(140-104-114-191-648-571(925)453-304(45)179-204-639-453)686-556(910)415(264-962)743-507(861)365(157-167-436(815)816)678-435(814)253-662-494(848)351(147-121-198-655-608(622)623)679-487(841)311(52)672-520(874)377(215-281(16)17)704-489(843)313(54)674-522(876)388(226-338-247-631-275-664-338)725-601(955)483(327(68)801)777-591(945)472(299(40)86-13)761-541(895)386(224-336-245-661-350-136-100-97-133-347(336)350)723-583(937)467(294(35)81-8)758-542(896)391(229-341-250-634-278-667-341)712-558(912)416(265-963)745-533(887)398(236-431(617)810)729-585(939)464(291(32)78-5)755-517(871)374(166-176-445(833)834)691-528(882)390(228-340-249-633-277-666-340)727-602(956)484(328(69)802)779-605(959)486(330(71)804)773-554(908)411(260-788)736-511(865)371(163-173-442(827)828)702-597(951)475(319(60)793)768-514(868)361(146-110-120-197-654-577(931)459-310(51)185-210-645-459)699-595(949)477(321(62)795)772-555(909)413(262-790)740-603(957)485(329(70)803)778-593(947)474(301(42)88-15)763-546(900)401(239-434(620)813)717-564(918)423(272-972)751-536(890)404(242-448(839)840)731-587(941)468(295(36)82-9)767-560(914)419(268-966)742-493(847)316(57)671-495(849)352(138-102-112-189-646-569(923)451-302(43)177-202-637-451)684-529(883)397(235-430(616)809)728-586(940)465(292(33)79-6)756-518(872)375(186-213-969-72)680-488(842)312(53)673-521(875)387(225-337-246-630-274-663-337)724-600(954)482(326(67)800)776-590(944)471(298(39)85-12)760-540(894)385(223-335-244-660-349-135-99-96-132-346(335)349)710-510(864)367(159-169-438(819)820)689-523(877)380(218-284(22)23)721-581(935)466(293(34)80-7)757-537(891)381(219-331-127-91-89-92-128-331)706-490(844)314(55)676-548(902)406(255-783)738-589(943)470(297(38)84-11)765-552(906)412(261-789)739-588(942)469(296(37)83-10)759-544(898)393(231-343-252-636-280-669-343)720-568(922)449(621)318(59)792/h89-100,127-136,153-156,202-210,243-252,274-330,351-426,449-486,659-661,782-805,962-968,971-973H,74-88,101-126,137-152,157-201,211-242,253-273,612,621H2,1-73H3,(H2,613,806)(H2,614,807)(H2,615,808)(H2,616,809)(H2,617,810)(H2,618,811)(H2,619,812)(H2,620,813)(H,630,663)(H,631,664)(H,632,665)(H,633,666)(H,634,667)(H,635,668)(H,636,669)(H,646,923)(H,647,924)(H,648,925)(H,649,926)(H,650,927)(H,651,928)(H,652,929)(H,653,930)(H,654,931)(H,662,848)(H,670,873)(H,671,849)(H,672,874)(H,673,875)(H,674,876)(H,675,901)(H,676,902)(H,677,933)(H,678,814)(H,679,841)(H,680,842)(H,681,851)(H,682,850)(H,683,863)(H,684,883)(H,685,880)(H,686,910)(H,687,911)(H,688,862)(H,689,877)(H,690,881)(H,691,882)(H,692,903)(H,693,904)(H,694,920)(H,695,921)(H,696,934)(H,697,948)(H,698,952)(H,699,949)(H,700,950)(H,701,932)(H,702,951)(H,703,852)(H,704,843)(H,705,846)(H,706,844)(H,707,845)(H,708,878)(H,709,879)(H,710,864)(H,711,885)(H,712,912)(H,713,913)(H,714,854)(H,715,853)(H,716,917)(H,717,918)(H,718,884)(H,719,886)(H,720,922)(H,721,935)(H,722,936)(H,723,937)(H,724,954)(H,725,955)(H,726,953)(H,727,956)(H,728,940)(H,729,939)(H,730,938)(H,731,941)(H,732,855)(H,733,856)(H,734,858)(H,735,857)(H,736,865)(H,737,889)(H,738,943)(H,739,942)(H,740,957)(H,741,866)(H,742,847)(H,743,861)(H,744,867)(H,745,887)(H,746,888)(H,747,919)(H,748,958)(H,749,859)(H,750,860)(H,751,890)(H,752,899)(H,753,869)(H,754,870)(H,755,871)(H,756,872)(H,757,891)(H,758,896)(H,759,898)(H,760,894)(H,761,895)(H,762,897)(H,763,900)(H,764,905)(H,765,906)(H,766,915)(H,767,914)(H,768,868)(H,769,892)(H,770,893)(H,771,907)(H,772,909)(H,773,908)(H,774,916)(H,775,946)(H,776,944)(H,777,945)(H,778,947)(H,779,959)(H,815,816)(H,817,818)(H,819,820)(H,821,822)(H,823,824)(H,825,826)(H,827,828)(H,829,830)(H,831,832)(H,833,834)(H,835,836)(H,837,838)(H,839,840)(H4,622,623,655)(H4,624,625,656)(H4,626,627,657)(H4,628,629,658)/t287-,288-,289-,290-,291-,292-,293-,294-,295-,296-,297-,298-,299-,300-,301-,302?,303?,304?,305?,306?,307?,308?,309?,310?,311-,312-,313-,314-,315-,316-,317-,318+,319+,320+,321+,322+,323+,324+,325+,326+,327+,328+,329+,330+,351-,352-,353-,354-,355-,356-,357-,358-,359-,360-,361-,362-,363-,364-,365-,366-,367-,368-,369-,370-,371-,372-,373-,374-,375-,376-,377-,378-,379-,380-,381-,382-,383-,384-,385-,386-,387-,388-,389-,390-,391-,392-,393-,394-,395-,396-,397-,398-,399-,400-,401-,402-,403-,404-,405-,406-,407-,408-,409-,410-,411-,412-,413-,414-,415-,416-,417-,418-,419-,420-,421-,422-,423-,424-,425-,426-,449-,450-,451+,452+,453+,454+,455+,456+,457+,458+,459+,460-,461-,462-,463-,464-,465-,466-,467-,468-,469-,470-,471-,472-,473-,474-,475-,476-,477-,478-,479-,480-,481-,482-,483-,484-,485-,486-/m0/s1"
102+
)

0 commit comments

Comments
 (0)