@@ -13078,3 +13078,154 @@ @Article{Xiao_arXiv_2024_p2411.10821
1307813078 collected dataset are available at
1307913079 {\textbackslash}url{\{}https://github.com/xiaocui3737/GeomCLIP{\}}},
1308013080}
13081+ @Article{Cheng_AdvOptMater_2023_v11,
13082+ author = {Zheng Cheng and Jiapeng Liu and Tong Jiang and Mohan Chen and Fuzhi
13083+ Dai and Zhifeng Gao and Guolin Ke and Zifeng Zhao and Qi Ou},
13084+ title = {{Automatic Screen{-}out of Ir(III) Complex Emitters by Combined Machine
13085+ Learning and Computational Analysis}},
13086+ journal = {Adv. Opt. Mater.},
13087+ year = 2023,
13088+ volume = 11,
13089+ number = 18,
13090+ doi = {10.1002/adom.202301093},
13091+ abstract = {AbstractThe organic light{-}emitting diode (OLED) has gained
13092+ widespread commercial use, yet there is a continuous need to identify
13093+ innovative emitters that offer higher efficiency and a broader color
13094+ gamut. To effectively screen out promising OLED molecules that are yet
13095+ to be synthesized, representation learning aided high throughput
13096+ virtual screening (HTVS) over millions of Ir(III) complexes, which are
13097+ prototypical types of phosphorescent OLED material constructed via a
13098+ random combination of 278 reported ligands. This study successfully
13099+ screens out a decent amount of promising candidates for both display
13100+ and lighting purposes, which are worth further experimental
13101+ investigation. The high efficiency and accuracy of this model are
13102+ largely attributed to the pioneering attempt of using representation
13103+ learning to organic luminescent molecules, which is initiated by a
13104+ pre{-}training procedure with over 1.6 million 3D molecular structures
13105+ and frontier orbital energies predicted via semi{-}empirical methods,
13106+ followed by a fine{-}tuning scheme via the quantum mechanical computed
13107+ properties over around 1500 candidates. Such workflow enables an
13108+ effective model construction process that is otherwise hindered by the
13109+ scarcity of labeled data and can be straightforwardly extended to the
13110+ discovery of other novel materials.},
13111+ }
13112+ @Article{Yao_JacsAu_2024_v4_p992,
13113+ author = {Lin Yao and Wentao Guo and Zhen Wang and Shang Xiang and Wentan Liu
13114+ and Guolin Ke},
13115+ title = {{Node-Aligned Graph-to-Graph: Elevating Template-free Deep Learning
13116+ Approaches in Single-Step Retrosynthesis}},
13117+ journal = {Jacs Au},
13118+ year = 2024,
13119+ volume = 4,
13120+ number = 3,
13121+ pages = {992--1003},
13122+ doi = {10.1021/jacsau.3c00737},
13123+ abstract = {Single-step retrosynthesis in organic chemistry increasingly benefits
13124+ from deep learning (DL) techniques in computer-aided synthesis design.
13125+ While template-free DL models are flexible and promising for
13126+ retrosynthesis prediction, they often ignore vital 2D molecular
13127+ information and struggle with atom alignment for node generation,
13128+ resulting in lower performance compared to the template-based and
13129+ semi-template-based methods. To address these issues, we introduce
13130+ node-aligned graph-to-graph (NAG2G), a transformer-based template-free
13131+ DL model. NAG2G combines 2D molecular graphs and 3D conformations to
13132+ retain comprehensive molecular details and incorporates product-
13133+ reactant atom mapping through node alignment, which determines the
13134+ order of the node-by-node graph outputs process in an autoregressive
13135+ manner. Through rigorous benchmarking and detailed case studies, we
13136+ have demonstrated that NAG2G stands out with its remarkable predictive
13137+ accuracy on the expansive data sets of USPTO-50k and USPTO-FULL.
13138+ Moreover, the model's practical utility is underscored by its
13139+ successful prediction of synthesis pathways for multiple drug
13140+ candidate molecules. This proves not only NAG2G's robustness but also
13141+ its potential to revolutionize the prediction of complex chemical
13142+ synthesis processes for future synthetic route design tasks.},
13143+ }
13144+ @Article{Lu_arXiv_2023_p2303.16982,
13145+ author = {Shuqi Lu and Zhifeng Gao and Di He and Linfeng Zhang and Guolin Ke},
13146+ title = {{Highly Accurate Quantum Chemical Property Prediction with Uni-Mol+}},
13147+ journal = {arXiv},
13148+ year = 2023,
13149+ pages = {2303.16982},
13150+ doi = {10.48550/arXiv.2303.16982},
13151+ abstract = {Recent developments in deep learning have made remarkable progress in
13152+ speeding up the prediction of quantum chemical (QC) properties by
13153+ removing the need for expensive electronic structure calculations like
13154+ density functional theory. However, previous methods learned from 1D
13155+ SMILES sequences or 2D molecular graphs failed to achieve high
13156+ accuracy as QC properties primarily depend on the 3D equilibrium
13157+ conformations optimized by electronic structure methods, far different
13158+ from the sequence-type and graph-type data. In this paper, we propose
13159+ a novel approach called Uni-Mol+ to tackle this challenge. Uni-Mol+
13160+ first generates a raw 3D molecule conformation from inexpensive
13161+ methods such as RDKit. Then, the raw conformation is iteratively
13162+ updated to its target DFT equilibrium conformation using neural
13163+ networks, and the learned conformation will be used to predict the QC
13164+ properties. To effectively learn this update process towards the
13165+ equilibrium conformation, we introduce a two-track Transformer model
13166+ backbone and train it with the QC property prediction task. We also
13167+ design a novel approach to guide the model's training process. Our
13168+ extensive benchmarking results demonstrate that the proposed Uni-Mol+
13169+ significantly improves the accuracy of QC property prediction in
13170+ various datasets. We have made the code and model publicly available
13171+ at {\textbackslash}url{\{}https://github.com/dptech-corp/Uni-Mol{\}}.},
13172+ }
13173+ @Article{Gao_arXiv_2023_p2304.12239,
13174+ author = {Zhifeng Gao and Xiaohong Ji and Guojiang Zhao and Hongshuai Wang and
13175+ Hang Zheng and Guolin Ke and Linfeng Zhang},
13176+ title = {{Uni-QSAR: an Auto-ML Tool for Molecular Property Prediction}},
13177+ journal = {arXiv},
13178+ year = 2023,
13179+ pages = {2304.12239},
13180+ doi = {10.48550/arXiv.2304.12239},
13181+ abstract = {Recently deep learning based quantitative structure-activity
13182+ relationship (QSAR) models has shown surpassing performance than
13183+ traditional methods for property prediction tasks in drug discovery.
13184+ However, most DL based QSAR models are restricted to limited labeled
13185+ data to achieve better performance, and also are sensitive to model
13186+ scale and hyper-parameters. In this paper, we propose Uni-QSAR, a
13187+ powerful Auto-ML tool for molecule property prediction tasks. Uni-QSAR
13188+ combines molecular representation learning (MRL) of 1D sequential
13189+ tokens, 2D topology graphs, and 3D conformers with pretraining models
13190+ to leverage rich representation from large-scale unlabeled data.
13191+ Without any manual fine-tuning or model selection, Uni-QSAR
13192+ outperforms SOTA in 21/22 tasks of the Therapeutic Data Commons (TDC)
13193+ benchmark under designed parallel workflow, with an average
13194+ performance improvement of 6.09{\textbackslash}{\%}. Furthermore, we
13195+ demonstrate the practical usefulness of Uni-QSAR in drug discovery
13196+ domains.},
13197+ }
13198+ @Article{Wang_arXiv_2024_p2406.04727,
13199+ author = {Fanmeng Wang and Wentao Guo and Minjie Cheng and Shen Yuan and
13200+ Hongteng Xu and Zhifeng Gao},
13201+ title = {{MMPolymer: A Multimodal Multitask Pretraining Framework for Polymer
13202+ Property Prediction}},
13203+ journal = {arXiv},
13204+ year = 2024,
13205+ pages = {2406.04727},
13206+ doi = {10.48550/arXiv.2406.04727},
13207+ abstract = {Polymers are high-molecular-weight compounds constructed by the
13208+ covalent bonding of numerous identical or similar monomers so that
13209+ their 3D structures are complex yet exhibit unignorable regularity.
13210+ Typically, the properties of a polymer, such as plasticity,
13211+ conductivity, bio-compatibility, and so on, are highly correlated with
13212+ its 3D structure. However, existing polymer property prediction
13213+ methods heavily rely on the information learned from polymer SMILES
13214+ sequences (P-SMILES strings) while ignoring crucial 3D structural
13215+ information, resulting in sub-optimal performance. In this work, we
13216+ propose MMPolymer, a novel multimodal multitask pretraining framework
13217+ incorporating polymer 1D sequential and 3D structural information to
13218+ encourage downstream polymer property prediction tasks. Besides,
13219+ considering the scarcity of polymer 3D data, we further introduce the
13220+ {''}Star Substitution{''} strategy to extract 3D structural
13221+ information effectively. During pretraining, in addition to predicting
13222+ masked tokens and recovering clear 3D coordinates, MMPolymer achieves
13223+ the cross-modal alignment of latent representations. Then we further
13224+ fine-tune the pretrained MMPolymer for downstream polymer property
13225+ prediction tasks in the supervised learning paradigm. Experiments show
13226+ that MMPolymer achieves state-of-the-art performance in downstream
13227+ property prediction tasks. Moreover, given the pretrained MMPolymer,
13228+ utilizing merely a single modality in the fine-tuning phase can also
13229+ outperform existing methods, showcasing the exceptional capability of
13230+ MMPolymer in polymer feature extraction and utilization.},
13231+ }
0 commit comments