@@ -16763,3 +16763,245 @@ @Article{Zhang_PhysFluids_2024_v36
1676316763 simulations, addressing various and complex scenarios based on
1676416764 detailed chemistry, while significantly reducing computational costs.},
1676516765}
16766+ @Article{Wang_NatCommun_2024_v15_p1904,
16767+ author = {Jingqi Wang and Jiapeng Liu and Hongshuai Wang and Musen Zhou and
16768+ Guolin Ke and Linfeng Zhang and Jianzhong Wu and Zhifeng Gao and
16769+ Diannan Lu},
16770+ title = {{A comprehensive transformer-based approach for high-accuracy gas
16771+ adsorption predictions in metal-organic frameworks}},
16772+ journal = {Nat. Commun.},
16773+ year = 2024,
16774+ volume = 15,
16775+ number = 1,
16776+ pages = 1904,
16777+ doi = {10.1038/s41467-024-46276-x},
16778+ abstract = {Gas separation is crucial for industrial production and environmental
16779+ protection, with metal-organic frameworks (MOFs) offering a promising
16780+ solution due to their tunable structural properties and chemical
16781+ compositions. Traditional simulation approaches, such as molecular
16782+ dynamics, are complex and computationally demanding. Although feature
16783+ engineering-based machine learning methods perform better, they are
16784+ susceptible to overfitting because of limited labeled data.
16785+ Furthermore, these methods are typically designed for single tasks,
16786+ such as predicting gas adsorption capacity under specific conditions,
16787+ which restricts the utilization of comprehensive datasets including
16788+ all adsorption capacities. To address these challenges, we propose
16789+ Uni-MOF, an innovative framework for large-scale, three-dimensional
16790+ MOF representation learning, designed for multi-purpose gas
16791+ prediction. Specifically, Uni-MOF serves as a versatile gas adsorption
16792+ estimator for MOF materials, employing pure three-dimensional
16793+ representations learned from over 631,000 collected MOF and COF
16794+ structures. Our experimental results show that Uni-MOF can
16795+ automatically extract structural representations and predict
16796+ adsorption capacities under various operating conditions using a
16797+ single model. For simulated data, Uni-MOF exhibits remarkably high
16798+ predictive accuracy across all datasets. Additionally, the values
16799+ predicted by Uni-MOF correspond with the outcomes of adsorption
16800+ experiments. Furthermore, Uni-MOF demonstrates considerable potential
16801+ for broad applicability in predicting a wide array of other
16802+ properties.},
16803+ }
16804+
16805+
16806+ @Article{Luo_JacsAu_2024_v4_p3451,
16807+ author = {Weiliang Luo and Gengmo Zhou and Zhengdan Zhu and Yannan Yuan and
16808+ Guolin Ke and Zhewei Wei and Zhifeng Gao and Hang Zheng},
16809+ title = {{Bridging Machine Learning and Thermodynamics for Accurate pK a
16810+ Prediction}},
16811+ journal = {Jacs Au},
16812+ year = 2024,
16813+ volume = 4,
16814+ number = 9,
16815+ pages = {3451--3465},
16816+ doi = {10.1021/jacsau.4c00271},
16817+ abstract = {Integrating scientific principles into machine learning models to
16818+ enhance their predictive performance and generalizability is a central
16819+ challenge in the development of AI for Science. Herein, we introduce
16820+ Uni-pK a, a novel framework that successfully incorporates
16821+ thermodynamic principles into machine learning modeling, achieving
16822+ high-precision predictions of acid dissociation constants (pK a), a
16823+ crucial task in the rational design of drugs and catalysts, as well as
16824+ a modeling challenge in computational physical chemistry for small
16825+ organic molecules. Uni-pK a utilizes a comprehensive free energy model
16826+ to represent molecular protonation equilibria accurately. It features
16827+ a structure enumerator that reconstructs molecular configurations from
16828+ pK a data, coupled with a neural network that functions as a free
16829+ energy predictor, ensuring high-throughput, data-driven prediction
16830+ while preserving thermodynamic consistency. Employing a pretraining-
16831+ finetuning strategy with both predicted and experimental pK a data,
16832+ Uni-pK a not only achieves state-of-the-art accuracy in
16833+ chemoinformatics but also shows comparable precision to quantum
16834+ mechanics-based methods.},
16835+ }
16836+
16837+ @Article{Fan_JChemInfModel_2024_v64_p8414,
16838+ author = {Jiahao Fan and Ziyao Li and Eric Alcaide and Guolin Ke and Huaqing
16839+ Huang and Weinan E},
16840+ title = {{Accurate Conformation Sampling via Protein Structural Diffusion}},
16841+ journal = {J. Chem. Inf. Model.},
16842+ year = 2024,
16843+ volume = 64,
16844+ number = 22,
16845+ pages = {8414--8426},
16846+ doi = {10.1021/acs.jcim.4c00928},
16847+ abstract = {Accurate sampling of protein conformations is pivotal for advances in
16848+ biology and medicine. Although there has been tremendous progress in
16849+ protein structure prediction in recent years due to deep learning,
16850+ models that can predict the different stable conformations of proteins
16851+ with high accuracy and structural validity are still lacking. Here, we
16852+ introduce UFConf, a cutting-edge approach designed for robust sampling
16853+ of diverse protein conformations based solely on amino acid sequences.
16854+ This method transforms AlphaFold2 into a diffusion model by
16855+ implementing a conformation-based diffusion process and adapting the
16856+ architecture to process diffused inputs effectively. To counteract the
16857+ inherent conformational bias in the Protein Data Bank, we developed a
16858+ novel hierarchical reweighting protocol based on structural
16859+ clustering. Our evaluations demonstrate that UFConf outperforms
16860+ existing methods in terms of successful sampling and structural
16861+ validity. The comparisons with long-time molecular dynamics show that
16862+ UFConf can overcome the energy barrier existing in molecular dynamics
16863+ simulations and perform more efficient sampling. Furthermore, We
16864+ showcase UFConf's utility in drug discovery through its application in
16865+ neural protein-ligand docking. In a blind test, it accurately
16866+ predicted a novel protein-ligand complex, underscoring its potential
16867+ to impact real-world biological research. Additionally, we present
16868+ other modes of sampling using UFConf, including partial sampling with
16869+ fixed motif, Langevin dynamics, and structural interpolation.},
16870+ }
16871+
16872+ @Article{He_NatCommun_2024_v15_p5163,
16873+ author = {Xinheng He and Lifen Zhao and Yinping Tian and Rui Li and Qinyu Chu
16874+ and Zhiyong Gu and Mingyue Zheng and Yusong Wang and Shaoning Li and
16875+ Hualiang Jiang and Yi Jiang and Liuqing Wen and Dingyan Wang and Xi
16876+ Cheng},
16877+ title = {{Highly accurate carbohydrate-binding site prediction with
16878+ DeepGlycanSite}},
16879+ journal = {Nat. Commun.},
16880+ year = 2024,
16881+ volume = 15,
16882+ number = 1,
16883+ pages = 5163,
16884+ doi = {10.1038/s41467-024-49516-2},
16885+ abstract = {As the most abundant organic substances in nature, carbohydrates are
16886+ essential for life. Understanding how carbohydrates regulate proteins
16887+ in the physiological and pathological processes presents opportunities
16888+ to address crucial biological problems and develop new therapeutics.
16889+ However, the diversity and complexity of carbohydrates pose a
16890+ challenge in experimentally identifying the sites where carbohydrates
16891+ bind to and act on proteins. Here, we introduce a deep learning model,
16892+ DeepGlycanSite, capable of accurately predicting carbohydrate-binding
16893+ sites on a given protein structure. Incorporating geometric and
16894+ evolutionary features of proteins into a deep equivariant graph neural
16895+ network with the transformer architecture, DeepGlycanSite remarkably
16896+ outperforms previous state-of-the-art methods and effectively predicts
16897+ binding sites for diverse carbohydrates. Integrating with a
16898+ mutagenesis study, DeepGlycanSite reveals the
16899+ guanosine-5'-diphosphate-sugar-recognition site of an important
16900+ G-protein coupled receptor. These findings demonstrate DeepGlycanSite
16901+ is invaluable for carbohydrate-binding site prediction and could
16902+ provide insights into molecular mechanisms underlying carbohydrate-
16903+ regulation of therapeutically important proteins.},
16904+ }
16905+
16906+ @Article{Comajuncosa-Creus_JCheminformatics_2024_v16_p70,
16907+ author = {Arnau Comajuncosa-Creus and Aksel Lenes and Miguel S{\'a}nchez-
16908+ Palomino and Dylan Dalton and Patrick Aloy},
16909+ title = {{Stereochemically-aware bioactivity descriptors for uncharacterized
16910+ chemical compounds}},
16911+ journal = {J. Cheminformatics},
16912+ year = 2024,
16913+ volume = 16,
16914+ number = 1,
16915+ pages = 70,
16916+ doi = {10.1186/s13321-024-00867-4},
16917+ abstract = {Stereochemistry plays a fundamental role in pharmacology. Here, we
16918+ systematically investigate the relationship between stereoisomerism
16919+ and bioactivity on over 1{~}M compounds, finding that a very
16920+ significant fraction ({\textasciitilde}{\,}40{\%}) of spatial isomer
16921+ pairs show, to some extent, distinct bioactivities. We then use the 3D
16922+ representation of these molecules to train a collection of deep neural
16923+ networks (Signaturizers3D) to generate bioactivity descriptors
16924+ associated to small molecules, that capture their effects at
16925+ increasing levels of biological complexity (i.e. from protein targets
16926+ to clinical outcomes). Further, we assess the ability of the
16927+ descriptors to distinguish between stereoisomers and to recapitulate
16928+ their different target binding profiles. Overall, we show how these
16929+ new stereochemically-aware descriptors provide an even more faithful
16930+ description of complex small molecule bioactivity properties,
16931+ capturing key differences in the activity of stereoisomers.Scientific
16932+ contributionWe systematically assess the relationship between
16933+ stereoisomerism and bioactivity on a large scale, focusing on
16934+ compound-target binding events, and use our findings to train novel
16935+ deep learning models to generate stereochemically-aware bioactivity
16936+ signatures for any compound of interest.},
16937+ }
16938+
16939+ @Article{Lu_NatCommun_2024_v15_p7104,
16940+ author = {Shuqi Lu and Zhifeng Gao and Di He and Linfeng Zhang and Guolin Ke},
16941+ title = {{Data-driven quantum chemical property prediction leveraging 3D
16942+ conformations with Uni-Mol}},
16943+ journal = {Nat. Commun.},
16944+ year = 2024,
16945+ volume = 15,
16946+ number = 1,
16947+ pages = 7104,
16948+ doi = {10.1038/s41467-024-51321-w},
16949+ abstract = {Quantum chemical (QC) property prediction is crucial for computational
16950+ materials and drug design, but relies on expensive electronic
16951+ structure calculations like density functional theory (DFT). Recent
16952+ deep learning methods accelerate this process using 1D SMILES or 2D
16953+ graphs as inputs but struggle to achieve high accuracy as most QC
16954+ properties depend on refined 3D molecular equilibrium conformations.
16955+ We introduce Uni-Mol+, a deep learning approach that leverages 3D
16956+ conformations for accurate QC property prediction. Uni-Mol+ first
16957+ generates a raw 3D conformation using RDKit then iteratively refines
16958+ it towards DFT equilibrium conformation using neural networks, which
16959+ is finally used to predict the QC properties. To effectively learn
16960+ this conformation update process, we introduce a two-track Transformer
16961+ model backbone and a novel training approach. Our benchmarking results
16962+ demonstrate that the proposed Uni-Mol+ significantly improves the
16963+ accuracy of QC property prediction in various datasets.},
16964+ }
16965+
16966+
16967+ @Article{Ding_JChemInfModel_2024_v64_p2955,
16968+ author = {Yuheng Ding and Bo Qiang and Qixuan Chen and Yiqiao Liu and Liangren
16969+ Zhang and Zhenming Liu},
16970+ title = {{Exploring Chemical Reaction Space with Machine Learning Models:
16971+ Representation and Feature Perspective}},
16972+ journal = {J. Chem. Inf. Model.},
16973+ year = 2024,
16974+ volume = 64,
16975+ number = 8,
16976+ pages = {2955--2970},
16977+ doi = {10.1021/acs.jcim.4c00004},
16978+ abstract = {Chemical reactions serve as foundational building blocks for organic
16979+ chemistry and drug design. In the era of large AI models, data-driven
16980+ approaches have emerged to innovate the design of novel reactions,
16981+ optimize existing ones for higher yields, and discover new pathways
16982+ for synthesizing chemical structures comprehensively. To effectively
16983+ address these challenges with machine learning models, it is
16984+ imperative to derive robust and informative representations or engage
16985+ in feature engineering using extensive data sets of reactions. This
16986+ work aims to provide a comprehensive review of established reaction
16987+ featurization approaches, offering insights into the selection of
16988+ representations and the design of features for a wide array of tasks.
16989+ The advantages and limitations of employing SMILES, molecular
16990+ fingerprints, molecular graphs, and physics-based properties are
16991+ meticulously elaborated. Solutions to bridge the gap between different
16992+ representations will also be critically evaluated. Additionally, we
16993+ introduce a new frontier in chemical reaction pretraining, holding
16994+ promise as an innovative yet unexplored avenue.},
16995+ }
16996+
16997+ @Article{Cui_NatMachIntell_2024_v6_p428,
16998+ author = {Taoyong Cui and Chenyu Tang and Mao Su and Shufei Zhang and Yuqiang Li
16999+ and Lei Bai and Yuhan Dong and Xingao Gong and Wanli Ouyang},
17000+ title = {{Geometry-enhanced pretraining on interatomic potentials}},
17001+ journal = {Nat Mach Intell},
17002+ year = 2024,
17003+ volume = 6,
17004+ number = 4,
17005+ pages = {428--436},
17006+ doi = {10.1038/s42256-024-00818-6},
17007+ }
0 commit comments