22# This file is a part of MolecularGraph.jl
33# Licensed under the MIT License http://opensource.org/licenses/MIT
44#
5+ const INCHI_StereoType_None = 0
6+ const INCHI_StereoType_DoubleBond = 1
7+ const INCHI_StereoType_Tetrahedral= 2
8+ const INCHI_StereoType_Allene = 3
9+
10+ const INCHI_PARITY_NONE = 0
11+ const INCHI_PARITY_ODD = 1 # 'o'
12+ const INCHI_PARITY_EVEN = 2 # 'e'
13+ const INCHI_PARITY_UNKNOWN = 3 # 'u'
14+ const INCHI_PARITY_UNDEFINED = 4 # '?', rarely used
15+
16+ const NO_ATOM = - 1 # from header: NO_ATOM == -1
17+
18+ const MAXVAL = 20
19+ const ATOM_EL_LEN = 6
20+ const NUM_H_ISOTOPES = 3
21+ const AT_NUM = Int16
22+ const S_CHAR = Int8
23+ struct inchi_Atom
24+ x:: Cdouble
25+ y:: Cdouble
26+ z:: Cdouble
27+ neighbor:: NTuple{MAXVAL,AT_NUM}
28+ bond_type:: NTuple{MAXVAL,S_CHAR}
29+ bond_stereo:: NTuple{MAXVAL,S_CHAR}
30+ elname:: NTuple{ATOM_EL_LEN,Cchar}
31+ num_bonds:: AT_NUM
32+ num_iso_H:: NTuple{NUM_H_ISOTOPES+1,S_CHAR}
33+ isotopic_mass:: AT_NUM
34+ radical:: S_CHAR
35+ charge:: S_CHAR
36+ end
537
638mutable struct inchi_Output
739 szInChI:: Cstring
@@ -18,10 +50,17 @@ mutable struct inchi_InputINCHI
1850 # '/' or '-' depending on OS and compiler
1951end
2052
53+ struct inchi_Stereo0D
54+ neighbor:: NTuple{4, AT_NUM} # always 4 atoms
55+ central_atom:: AT_NUM # central atom index (or NO_ATOM)
56+ type:: S_CHAR # inchi_StereoType0D
57+ parity:: S_CHAR # inchi_StereoParity0D (possibly combined)
58+ end
59+
2160mutable struct inchi_Input
2261 # the caller is responsible for the data allocation and deallocation
23- atom:: Ptr{Cvoid } # array of num_atoms elements
24- stereo0D:: Ptr{Cvoid } # array of num_stereo0D 0D stereo elements or NULL
62+ atom:: Ptr{inchi_Atom } # array of num_atoms elements
63+ stereo0D:: Ptr{inchi_Stereo0D } # array of num_stereo0D 0D stereo elements or NULL
2564 szOptions:: Cstring # InChI options: space-delimited; each is preceded by
2665 # '/' or '-' depending on OS and compiler
2766 num_atoms:: Cshort # number of atoms in the structure < MAX_ATOMS
3069
3170mutable struct inchi_InputEx
3271 # the caller is responsible for the data allocation and deallocation
33- atom:: Ptr{Cvoid} # array of num_atoms elements
34- stereo0D:: Ptr{Cvoid} # array of num_stereo0D 0D stereo elements or NULL
35- szOptions:: Cstring # InChI options: space-delimited; each is preceded by
36- # '/' or '-' depending on OS and compiler
37- num_atoms:: Cshort # number of atoms in the structure < MAX_ATOMS
38- num_stereo0D:: Cshort # number of 0D stereo elements
72+ atom:: Ptr{inchi_Atom} # array of num_atoms elements
73+ stereo0D:: Ptr{inchi_Stereo0D} # array of num_stereo0D 0D stereo elements or NULL
74+ szOptions:: Cstring # InChI options: space-delimited; each is preceded by
75+ # '/' or '-' depending on OS and compiler
76+ num_atoms:: Cshort # number of atoms in the structure < MAX_ATOMS
77+ num_stereo0D:: Cshort # number of 0D stereo elements
3978 polymer:: Ptr{Cvoid}
4079 v3000:: Ptr{Cvoid}
4180end
4281
4382mutable struct inchi_OutputStruct
4483 # the caller is responsible for the data allocation and deallocation
45- atom:: Ptr{Cvoid } # array of num_atoms elements
46- stereo0D:: Ptr{Cvoid } # array of num_stereo0D 0D stereo elements or NULL
84+ atom:: Ptr{inchi_Atom } # array of num_atoms elements
85+ stereo0D:: Ptr{inchi_Stereo0D } # array of num_stereo0D 0D stereo elements or NULL
4786 num_atoms:: Cshort ; # number of atoms in the structure < MAX_ATOMS
4887 num_stereo0D:: Cshort # number of 0D stereo elements
4988 szMessage:: Cstring # Error/warning ASCIIZ message
62101
63102mutable struct inchi_OutputStructEx
64103 # the caller is responsible for the data allocation and deallocation
65- atom:: Ptr{Cvoid} # array of num_atoms elements
66- stereo0D:: Ptr{Cvoid} # array of num_stereo0D 0D stereo elements or NULL
67- num_atoms:: Cshort ; # number of atoms in the structure < MAX_ATOMS
68- num_stereo0D:: Cshort # number of 0D stereo elements
69- szMessage:: Cstring # Error/warning ASCIIZ message
70- szLog:: Cstring # log-file ASCIIZ string, contains a human-readable list
71- # of recognized options and possibly an Error/warn message
104+ atom:: Ptr{inchi_Atom} # array of num_atoms elements
105+ stereo0D:: Ptr{inchi_Stereo0D} # array of num_stereo0D 0D stereo elements or NULL
106+ num_atoms:: Cshort ; # number of atoms in the structure < MAX_ATOMS
107+ num_stereo0D:: Cshort # number of 0D stereo elements
108+ szMessage:: Cstring # Error/warning ASCIIZ message
109+ szLog:: Cstring # log-file ASCIIZ string, contains a human-readable list
110+ # of recognized options and possibly an Error/warn message
72111 warningflags:: NTuple{2, NTuple{2, Clong}} # warnings, see INCHIDIFF in inchicmp.h */
73112 # [x][y]:
74113 # x=0 => Reconnected if present in InChI
@@ -160,52 +199,235 @@ end
160199inchikey(mol:: SimpleMolGraph ) = inchikey(inchi(mol))
161200
162201"""
163- inchitosdf(inchi::String ; options::String = "") -> Union{String, Nothing}
202+ inchitosdf(inchi::AbstractString ; options::String = "", config:: Union{Nothing,Dict{Symbol,Any}})
164203
165- Generate sdf string from inchi string, `options` are specified in https://github.com/mojaie/libinchi/blob/master/INCHI_BASE/src/inchi_api.h
204+ Generate sdf string from inchi string.
205+ This new version goes via parsing to a MolGraph and exporting as sdf.
206+ Parsing `options` are specified in https://github.com/mojaie/libinchi/blob/master/INCHI_BASE/src/inchi_api.h
207+ Coordinates are generated via coordgen (Schrödinger coordgenlibs).
166208"""
167- function inchitosdf(inchi:: String ; options:: String = " " , verbose:: Bool = false )
168- # support the correct options format depending on OS
169- opts = opt_array(options)
170- # add a timeout of 60s per molecule
171- any(occursin.(r" ^Wm?\d +$" , opts)) || push!(opts, " W60" )
172- # switch output to sdf format
173- " OutputSDF" ∈ opts || push!(opts, " OutputSDF" )
174- options = opt_string(opts)
175-
176- structure = inchi_OutputStructEx()
177- inchi_input = inchi_InputINCHI(Base. unsafe_convert(Cstring, inchi), Base. unsafe_convert(Cstring, options))
178- @ccall libinchi. GetStructFromINCHIEx(
179- inchi_input:: Ref{inchi_InputINCHI} , structure:: Ref{inchi_OutputStructEx} ):: Int32
180-
181- input = inchi_InputEx(
182- structure. atom,
183- structure. stereo0D,
184- Base. unsafe_convert(Cstring, options),
185- structure. num_atoms,
186- structure. num_stereo0D,
187- structure. polymer,
188- structure. v3000
189- )
190- output = inchi_Output()
191- @ccall libinchi. GetINCHIEx(
192- input:: Ref{inchi_InputEx} , output:: Ref{inchi_Output} ):: Cint
193- report_output(output, verbose)
209+ function inchitosdf(inchi:: AbstractString ; options:: String = " " , config:: Union{Nothing,Dict{Symbol,Any}} = nothing )
210+ printv2mol(inchitomol(inchi; options, config))
211+ end
194212
195- res = output. szInChI == C_NULL ? nothing : unsafe_string(output. szInChI)
213+ # decode parity byte (connected in low 3 bits, disconnected in bits 3..5)
214+ # returns the parity value to use (0..4)
215+ decode_parity(p:: Integer ) = begin
216+ p_raw = UInt8(p) # make sure unsigned before bit ops
217+ p_conn = Int(p_raw & 0x07 ) # low 3 bits
218+ p_disc = Int((p_raw >> 3 ) & 0x07 ) # next 3 bits
219+ p_conn != 0 ? p_conn : p_disc # prefer connected parity if present
220+ end
196221
197- # Free buffers allocated by GetStructFromINCHIEx and GetINCHI
198- @ccall libinchi. FreeStructFromINCHIEx(structure:: Ref{inchi_OutputStructEx} ):: Cvoid
199- @ccall libinchi. FreeINCHI(output:: Ref{inchi_Output} ):: Cvoid
222+ function process_inchi_stereo!(g:: T , structure:: inchi_OutputStructEx ) where T <: MolGraph
223+ ET = edgetype(T)
224+
225+ stereocenters = Dict{Int64, Tuple{Int64,Int64,Int64,Bool}}() # center => (looking_from,v1,v2,is_clockwise)
226+ stereobonds = Dict{Edge{Int64}, Tuple{Int64,Int64,Bool}}() # edge => (a1,a2,is_cis)
227+
228+ atoms = unsafe_wrap(Vector{inchi_Atom}, structure. atom, structure. num_atoms)
229+ stereos = unsafe_wrap(Vector{inchi_Stereo0D}, structure. stereo0D, structure. num_stereo0D)
230+ n_heavy = length(atoms) # number of heavy atoms (without isotopic H/D/T)
231+ is_heavy(idx) = 0 < idx <= n_heavy # for 1-based indices
232+
233+ for s in stereos
234+ parity = decode_parity(s. parity)
235+ parity == INCHI_PARITY_NONE || parity == INCHI_PARITY_UNKNOWN && continue # nothing to set
236+ parity == INCHI_PARITY_UNDEFINED && continue # explicit undefined -> skip
237+
238+ # convert to 1-based indexing
239+ nbrs = s. neighbor .+ 1
240+ center = s. central_atom + 1
241+
242+ if s. type == INCHI_StereoType_Tetrahedral
243+ center == 0 && continue # sanity check
244+
245+ # InChI neighbors order corresponds to W,X,Y,Z (see InChI comments).
246+ w, x, y, z = nbrs # these are already expanded indices (or NO_ATOM)
247+ is_clockwise = (parity == INCHI_PARITY_EVEN) # 'e' == clockwise per docs
248+ stereocenters[center] = (w, x, y, is_clockwise)
249+ elseif s. type == INCHI_StereoType_DoubleBond
250+ all(is_heavy.(nbrs)) || continue # skip disconnected or missing substituents
251+
252+ # the double bond is between the middle two atoms
253+ # by InChI convention, neighbor[1,2] on one end, neighbor[3,4] on the other
254+ # determine the actual bond by looking up the shared bond in the graph
255+ bond_candidates = [u_edge(g, nbrs[i], nbrs[j]) for i in 1 : 2 , j in 3 : 4 ]
256+ # pick the one that actually exists in your bond list
257+ edges = collect(g. eprops) # materialize iterator
258+ bond_key_index = findfirst(e -> e[1 ] in bond_candidates, edges)
259+ bond_key_index === nothing && continue
260+ bond_key = edges[bond_key_index][1 ]
261+
262+ is_trans = if parity == INCHI_PARITY_ODD
263+ true # InChI uses ODD -> trans
264+ elseif parity == INCHI_PARITY_EVEN
265+ false # EVEN -> cis
266+ else
267+ # we currently don't get here, because we filtered out NONE/UNKNOWN/UNDEFINED above
268+ # once we can handle undetermined stereo, we need to implement that logic here
269+ continue # unknown
270+ end
271+
272+ # store stereobond
273+ stereobonds[bond_key] = (nbrs[1 ], nbrs[3 ], is_trans)
274+ elseif s. type == INCHI_StereoType_Allene
275+ @info " Allene stereo, not yet tested"
276+ # Tried testing with `mol = smilestomol("C[C@]=C=C(C)F")`,
277+ # but the InChI generated from it does not contain stereo information.
278+ # Allene (axial) handling
279+ # InChI gives neighbors as W,X,Y,Z where W/X are substituents on one end
280+ # and Y/Z on the other. central_atom is the central cumulated C.
281+ # We need to pick a viewing atom and two reference atoms to encode axial chirality.
282+ center == 0 && continue
283+
284+ # For an allene to be chiral, both ends must have two substituents (i.e. W/X and Y/Z present)
285+ all(is_heavy.(nbrs)) || continue
286+ w, x, y, z = nbrs
287+
288+ is_clockwise = (parity == INCHI_PARITY_EVEN)
289+
290+ # store into same stereocenters dict so coordgen!/rendering code can use it
291+ stereocenters[center] = (w, x, y, is_clockwise)
292+ else
293+ @warn " Unknown stereo type $(s. type) , skipping"
294+ end
295+ end
200296
201- return res
297+ # merge into mol.gprops the same way you already did:
298+ merge!(g. gprops. stereocenter, stereocenters)
299+ merge!(g. gprops. stereobond, stereobonds)
300+ return g
202301end
203302
204303"""
205- function inchitomol(inchi::String; options = "", verbose = false )
304+ function inchitomol(inchi::String; options = "", config::Union{Nothing,Dict{Symbol,Any}} = nothing )
206305
207306Generate molecule from inchi string, `options` are specified in https://github.com/mojaie/libinchi/blob/master/INCHI_BASE/src/inchi_api.h
307+ `config` is for internal or advanced use only. Maybe removed in a future release.
208308"""
209- function inchitomol(inchi:: String ; options = " " , verbose = false )
210- inchitosdf(inchi; options, verbose) |> IOBuffer |> sdftomol
309+ function inchitomol(:: Type{T} , inchi:: AbstractString ;
310+ options:: String = " " ,
311+ config:: Union{Nothing,Dict{Symbol,Any}} = nothing
312+ ) where T <: MolGraph
313+ inchi isa String || (inchi = " $inchi " )
314+
315+ structure = inchi_OutputStructEx()
316+ inchi_input = inchi_InputINCHI(Base. unsafe_convert(Cstring, inchi),
317+ Base. unsafe_convert(Cstring, options))
318+
319+ ret = @ccall libinchi. GetStructFromINCHIEx(
320+ inchi_input:: Ref{inchi_InputINCHI} ,
321+ structure:: Ref{inchi_OutputStructEx}
322+ ):: Int32
323+
324+ if ret < 0
325+ error(" libinchi failed with code $ret " )
326+ end
327+
328+ config = if config === nothing && T === SMILESMolGraph
329+ Dict{Symbol, Any}(:on_init => smiles_on_init!, :on_update => smiles_on_update!)
330+ elseif config === nothing && T === SDFMolGraph
331+ Dict{Symbol, Any}(:on_init => sdf_on_init!, :on_update => sdf_on_update!)
332+ else
333+ config
334+ end
335+
336+ # determine the MolecularGraph atom/bond types and constructors
337+ V = vproptype(T) === AbstractAtom ? SDFAtom : vproptype(T)
338+ E = eproptype(T) === AbstractBond ? SDFBond : eproptype(T)
339+ ET = edgetype(T)
340+
341+ atoms = unsafe_wrap(Vector{inchi_Atom}, structure. atom, structure. num_atoms)
342+
343+ # heavy atoms
344+ expanded_syms = String[
345+ String(reinterpret(UInt8, collect(Iterators. takewhile(!= (0x00 ), a. elname))))
346+ for a in atoms]
347+
348+ # isotopic H/D/T
349+ for a in atoms
350+ # a.num_iso_H is a NTuple{4,S_CHAR}, where index 1 = non-specified, 2 = H, 3 = D, 4 = T
351+ # let's add symbols only for explicit isotopes [1H], [D] and [T]
352+ # as NUM_H_ISOTOPES = 3, we only need to check indices 2, 3, and 4
353+ for iso_index in 1 : NUM_H_ISOTOPES
354+ n = a. num_iso_H[iso_index + 1 ]
355+ iso_sym = [" H" , " D" , " T" ][iso_index]
356+ for _ in 1 : n
357+ push!(expanded_syms, iso_sym)
358+ end
359+ end
360+ end
361+
362+ # build bonds to isotopic H/D/T
363+ bonds = Tuple{Int,Int,Int}[]
364+ first_isotope_index = length(atoms) + 1
365+ next_iso_idx = first_isotope_index
366+ for (parent_idx, a) in enumerate(atoms)
367+ for iso_index in 1 : NUM_H_ISOTOPES
368+ n = a. num_iso_H[iso_index + 1 ]
369+ for _ in 1 : n
370+ push!(bonds, (parent_idx, next_iso_idx, 1 ))
371+ next_iso_idx += 1
372+ end
373+ end
374+ end
375+
376+ # heavy atom bonds
377+ for (i, a) in enumerate(atoms)
378+ for k in 1 : a. num_bonds
379+ j = a. neighbor[k] + 1
380+ j > i && push!(bonds, (i, j, Int(a. bond_type[k])))
381+ end
382+ end
383+
384+ N = length(expanded_syms)
385+ vprops = V[]
386+ edges = ET[]
387+ eprops = E[]
388+
389+ for idx in 1 : N
390+ if idx <= length(atoms)
391+ a = atoms[idx]
392+ symbol = expanded_syms[idx]
393+ d = Dict{String,Any}(
394+ " symbol" => symbol,
395+ " charge" => Int(a. charge),
396+ " multiplicity" => (Int(a. radical) == 0 ? 1 : Int(a. radical)),
397+ " isotope" => Int(a. isotopic_mass),
398+ )
399+ :coords in fieldnames(V) && push!(d, " coords" => [Float64(a. x), Float64(a. y), Float64(a. z)])
400+ push!(vprops, V(d))
401+ else
402+ symbol = expanded_syms[idx]
403+ d = Dict(
404+ " symbol" => symbol,
405+ " charge" => 0 ,
406+ " multiplicity" => 1 ,
407+ " isotope" => 0
408+ )
409+ :coords in fieldnames(V) && push!(d, " coords" => [0.0 , 0.0 , 0.0 ])
410+ push!(vprops, V(d))
411+ end
412+ end
413+
414+ for (i, j, order) in bonds
415+ ekey = i < j ? ET(i, j) : ET(j, i)
416+ push!(edges, ekey)
417+ push!(eprops, E(; order))
418+ end
419+
420+ g = T(edges, vprops, eprops; NamedTuple((k, v) for (k, v) in config). .. )
421+
422+ process_inchi_stereo!(g, structure)
423+
424+ @ccall libinchi. FreeStructFromINCHIEx(structure:: Ref{inchi_OutputStructEx} ):: Cvoid
425+
426+ coordgen!(g)
427+ remove_hydrogens!(g)
428+ return g
429+ end
430+
431+ function inchitomol(inchi:: String ; options:: String = " " , config:: Union{Nothing,Dict{Symbol,Any}} = nothing )
432+ inchitomol(SDFMolGraph, inchi; options, config)
211433end
0 commit comments