Skip to content

Commit bee3dbf

Browse files
authored
Update pub.bib (#139)
1 parent 3457832 commit bee3dbf

File tree

1 file changed

+95
-1
lines changed

1 file changed

+95
-1
lines changed

source/_data/pub.bib

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12983,4 +12983,98 @@ @Article{Chang_JChemInfModel_2024_v64_p3149
1298312983
generalizability on novel compounds and different categories of human
1298412984
CYPs, which could greatly facilitate early stage drug design by
1298512985
avoiding CYP-reactive compounds.},
12986-
}
12986+
}
12987+
@Article{Luo_JacsAu_2024_v4_p3451,
12988+
author = {Weiliang Luo and Gengmo Zhou and Zhengdan Zhu and Yannan Yuan and
12989+
Guolin Ke and Zhewei Wei and Zhifeng Gao and Hang Zheng},
12990+
title = {{Bridging Machine Learning and Thermodynamics for Accurate pK a
12991+
Prediction}},
12992+
journal = {Jacs Au},
12993+
year = 2024,
12994+
volume = 4,
12995+
number = 9,
12996+
pages = {3451--3465},
12997+
doi = {10.1021/jacsau.4c00271},
12998+
abstract = {Integrating scientific principles into machine learning models to
12999+
enhance their predictive performance and generalizability is a central
13000+
challenge in the development of AI for Science. Herein, we introduce
13001+
Uni-pK a, a novel framework that successfully incorporates
13002+
thermodynamic principles into machine learning modeling, achieving
13003+
high-precision predictions of acid dissociation constants (pK a), a
13004+
crucial task in the rational design of drugs and catalysts, as well as
13005+
a modeling challenge in computational physical chemistry for small
13006+
organic molecules. Uni-pK a utilizes a comprehensive free energy model
13007+
to represent molecular protonation equilibria accurately. It features
13008+
a structure enumerator that reconstructs molecular configurations from
13009+
pK a data, coupled with a neural network that functions as a free
13010+
energy predictor, ensuring high-throughput, data-driven prediction
13011+
while preserving thermodynamic consistency. Employing a pretraining-
13012+
finetuning strategy with both predicted and experimental pK a data,
13013+
Uni-pK a not only achieves state-of-the-art accuracy in
13014+
chemoinformatics but also shows comparable precision to quantum
13015+
mechanics-based methods.},
13016+
}
13017+
@Article{Wang_NatCommun_2024_v15_p1904,
13018+
author = {Jingqi Wang and Jiapeng Liu and Hongshuai Wang and Musen Zhou and
13019+
Guolin Ke and Linfeng Zhang and Jianzhong Wu and Zhifeng Gao and
13020+
Diannan Lu},
13021+
title = {{A comprehensive transformer-based approach for high-accuracy gas
13022+
adsorption predictions in metal-organic frameworks}},
13023+
journal = {Nat. Commun.},
13024+
year = 2024,
13025+
volume = 15,
13026+
number = 1,
13027+
pages = 1904,
13028+
doi = {10.1038/s41467-024-46276-x},
13029+
abstract = {Gas separation is crucial for industrial production and environmental
13030+
protection, with metal-organic frameworks (MOFs) offering a promising
13031+
solution due to their tunable structural properties and chemical
13032+
compositions. Traditional simulation approaches, such as molecular
13033+
dynamics, are complex and computationally demanding. Although feature
13034+
engineering-based machine learning methods perform better, they are
13035+
susceptible to overfitting because of limited labeled data.
13036+
Furthermore, these methods are typically designed for single tasks,
13037+
such as predicting gas adsorption capacity under specific conditions,
13038+
which restricts the utilization of comprehensive datasets including
13039+
all adsorption capacities. To address these challenges, we propose
13040+
Uni-MOF, an innovative framework for large-scale, three-dimensional
13041+
MOF representation learning, designed for multi-purpose gas
13042+
prediction. Specifically, Uni-MOF serves as a versatile gas adsorption
13043+
estimator for MOF materials, employing pure three-dimensional
13044+
representations learned from over 631,000 collected MOF and COF
13045+
structures. Our experimental results show that Uni-MOF can
13046+
automatically extract structural representations and predict
13047+
adsorption capacities under various operating conditions using a
13048+
single model. For simulated data, Uni-MOF exhibits remarkably high
13049+
predictive accuracy across all datasets. Additionally, the values
13050+
predicted by Uni-MOF correspond with the outcomes of adsorption
13051+
experiments. Furthermore, Uni-MOF demonstrates considerable potential
13052+
for broad applicability in predicting a wide array of other
13053+
properties.},
13054+
}
13055+
@Article{Xiao_arXiv_2024_p2411.10821,
13056+
author = {Teng Xiao and Chao Cui and Huaisheng Zhu and Vasant G. Honavar},
13057+
title = {{GeomCLIP: Contrastive Geometry-Text Pre-training for Molecules}},
13058+
journal = {arXiv},
13059+
year = 2024,
13060+
pages = {2411.10821},
13061+
doi = {10.48550/arXiv.2411.10821},
13062+
abstract = {Pretraining molecular representations is crucial for drug and material
13063+
discovery. Recent methods focus on learning representations from
13064+
geometric structures, effectively capturing 3D position information.
13065+
Yet, they overlook the rich information in biomedical texts, which
13066+
detail molecules' properties and substructures. With this in mind, we
13067+
set up a data collection effort for 200K pairs of ground-state
13068+
geometric structures and biomedical texts, resulting in a PubChem3D
13069+
dataset. Based on this dataset, we propose the GeomCLIP framework to
13070+
enhance for multi-modal representation learning from molecular
13071+
structures and biomedical text. During pre-training, we design two
13072+
types of tasks, i.e., multimodal representation alignment and unimodal
13073+
denoising pretraining, to align the 3D geometric encoder with textual
13074+
information and, at the same time, preserve its original
13075+
representation power. Experimental results show the effectiveness of
13076+
GeomCLIP in various tasks such as molecular property prediction, zero-
13077+
shot text-molecule retrieval, and 3D molecule captioning. Our code and
13078+
collected dataset are available at
13079+
{\textbackslash}url{\{}https://github.com/xiaocui3737/GeomCLIP{\}}},
13080+
}

0 commit comments

Comments
 (0)