From 7088cb537c49062b1efb059abc0bcf99f605fecf Mon Sep 17 00:00:00 2001 From: Aleksandr Shoiko Date: Sun, 9 Nov 2025 20:32:57 +0700 Subject: [PATCH] Apply mdformat autoformatting from pre-commit --- README.md | 2 +- examples/llm/README.md | 18 +++++++++--------- examples/llm/molecule_gpt.py | 2 +- torch_geometric/llm/models/molecule_gpt.py | 2 +- torch_geometric/llm/models/protein_mpnn.py | 2 +- torch_geometric/nn/conv/fused_gat_conv.py | 4 ++-- 6 files changed, 15 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index df53e36d5ead..686888946919 100644 --- a/README.md +++ b/README.md @@ -217,7 +217,7 @@ These GNN layers can be stacked together to create Graph Neural Network models. - **[HGTConv](https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.nn.conv.HGTConv.html)** from Hu *et al.*: [Heterogeneous Graph Transformer](https://arxiv.org/abs/2003.01332) (WWW 2020) \[[**Example**](https://github.com/pyg-team/pytorch_geometric/blob/master/examples/hetero/hgt_dblp.py)\] - **[HEATConv](https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.nn.conv.HEATonv.html)** from Mo *et al.*: [Heterogeneous Edge-Enhanced Graph Attention Network For Multi-Agent Trajectory Prediction](https://arxiv.org/abs/2106.07161) (CoRR 2021) - **[SSGConv](https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.nn.conv.SSGConv.html)** from Zhu *et al.*: [Simple Spectral Graph Convolution](https://openreview.net/forum?id=CYO5T-YjWZV) (ICLR 2021) -- **[FusedGATConv](https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.nn.conv.FusedGATConv.html)** from Zhang *et al.*: [Understanding GNN Computational Graph: A Coordinated Computation, IO, and Memory Perspective](https://proceedings.mlsys.org/paper/2022/file/9a1158154dfa42caddbd0694a4e9bdc8-Paper.pdf) (MLSys 2022) +- **[FusedGATConv](https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.nn.conv.FusedGATConv.html)** from Zhang *et al.*: [Understanding GNN Computational Graph: A Coordinated Computation, IO, and Memory Perspective](https://proceedings.mlsys.org/paper_files/paper/2022/file/b559156047e50cf316207249d0b5a6c5-Paper.pdf) (MLSys 2022) - **[GPSConv](https://pytorch-geometric.readthedocs.io/en/latest/generated/torch_geometric.nn.conv.GPSConv.html)** from Rampášek *et al.*: [Recipe for a General, Powerful, Scalable Graph Transformer](https://arxiv.org/abs/2205.12454) (NeurIPS 2022) \[[**Example**](https://github.com/pyg-team/pytorch_geometric/blob/master/examples/graph_gps.py)\] diff --git a/examples/llm/README.md b/examples/llm/README.md index a6dc96eb93b5..40a1e902f2e0 100644 --- a/examples/llm/README.md +++ b/examples/llm/README.md @@ -1,11 +1,11 @@ # Examples for Co-training LLMs and GNNs -| Example | Description | -| -------------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| [`g_retriever.py`](./g_retriever.py) | Example helper functions for how to use the [G-retriever](https://arxiv.org/abs/2402.07630) GNN+LLM module in PyG. We also have an [example repo](https://github.com/neo4j-product-examples/neo4j-gnn-llm-example) for integration with [Neo4j Graph DBs][neo4j.com] along with an associated [blog](https://developer.nvidia.com/blog/boosting-qa-accuracy-with-graphrag-using-pyg-and-graph-databases/) showing 2x accuracy gains over LLMs on real medical data. See examples/llm/txt2kg_rag.py for e2e pipeline in PyG including: KG Creation, Subgraph Retrieval, GNN+LLM Finetuning, Testing, LLM Judge Eval. | -| [`nvtx_examples/`](./nvtx_examples/) | Contains examples of how to wrap functions using the NVTX profiler for CUDA runtime analysis. | -| [`molecule_gpt.py`](./molecule_gpt.py) | Example for MoleculeGPT: Instruction Following Large Language Models for Molecular Property Prediction. Supports MoleculeGPT and InstructMol dataset | -| [`glem.py`](./glem.py) | Example for [GLEM](https://arxiv.org/abs/2210.14709), a GNN+LLM co-training model via variational Expectation-Maximization (EM) framework on node classification tasks to achieve SOTA results | -| [`git_mol.py`](./git_mol.py) | Example for GIT-Mol: A Multi-modal Large Language Model for Molecular Science with Graph, Image, and Text | -| [`protein_mpnn.py`](./protein_mpnn.py) | Example for [Robust deep learning--based protein sequence design using ProteinMPNN](https://www.biorxiv.org/content/10.1101/2022.06.03.494563v1) | -| [`txt2kg_rag.py`](./txt2kg_rag.py) | Full end 2 end RAG pipeline using TXT2KG and Vector and Graph RAG with a GNN to achieve state of the art results. Uses the [techQA dataset](https://paperswithcode.com/dataset/techqa) but can be extended to handle any RAG dataset with a corpus of documents and an associated set of Q+A pairs to be split for train/eval/test. See [Stanford GNN+LLM Talk](https://www.nvidia.com/en-us/on-demand/session/other25-nv-0003/) for more details. Note that the TechQA data requires only a single document to answer each question so it can be viewed as a toy example. To see significant accuracy boosts from GNN+LLM TXT2KG based RAG, use data that requires multiple text chunks to answer a question. In cases where single document can answer, basic RAG should be sufficient. | +| Example | Description | +| -------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| [`g_retriever.py`](./g_retriever.py) | Example helper functions for how to use the [G-retriever](https://arxiv.org/abs/2402.07630) GNN+LLM module in PyG. We also have an [example repo](https://github.com/neo4j-product-examples/neo4j-gnn-llm-example) for integration with [Neo4j Graph DBs][neo4j.com] along with an associated [blog](https://developer.nvidia.com/blog/boosting-qa-accuracy-with-graphrag-using-pyg-and-graph-databases/) showing 2x accuracy gains over LLMs on real medical data. See examples/llm/txt2kg_rag.py for e2e pipeline in PyG including: KG Creation, Subgraph Retrieval, GNN+LLM Finetuning, Testing, LLM Judge Eval. | +| [`nvtx_examples/`](./nvtx_examples/) | Contains examples of how to wrap functions using the NVTX profiler for CUDA runtime analysis. | +| [`molecule_gpt.py`](./molecule_gpt.py) | Example for [MoleculeGPT: Instruction Following Large Language Models for Molecular Property Prediction](https://ai4d3.github.io/2023/papers/34.pdf). Supports MoleculeGPT and InstructMol dataset | +| [`glem.py`](./glem.py) | Example for [GLEM](https://arxiv.org/abs/2210.14709), a GNN+LLM co-training model via variational Expectation-Maximization (EM) framework on node classification tasks to achieve SOTA results | +| [`git_mol.py`](./git_mol.py) | Example for [GIT-Mol: A Multi-modal Large Language Model for Molecular Science with Graph, Image, and Text](https://arxiv.org/abs/2308.06911) | +| [`protein_mpnn.py`](./protein_mpnn.py) | Example for [Robust deep learning based protein sequence design using ProteinMPNN](https://www.biorxiv.org/content/10.1101/2022.06.03.494563v1) | +| [`txt2kg_rag.py`](./txt2kg_rag.py) | Full end 2 end RAG pipeline using TXT2KG and Vector and Graph RAG with a GNN to achieve state of the art results. Uses the [techQA dataset](https://arxiv.org/abs/1911.02984) but can be extended to handle any RAG dataset with a corpus of documents and an associated set of Q+A pairs to be split for train/eval/test. See [Stanford GNN+LLM Talk](https://www.nvidia.com/en-us/on-demand/session/other25-nv-0003/) for more details. Note that the TechQA data requires only a single document to answer each question so it can be viewed as a toy example. To see significant accuracy boosts from GNN+LLM TXT2KG based RAG, use data that requires multiple text chunks to answer a question. In cases where single document can answer, basic RAG should be sufficient. | diff --git a/examples/llm/molecule_gpt.py b/examples/llm/molecule_gpt.py index 8d3d027c7525..70bc78235eaa 100644 --- a/examples/llm/molecule_gpt.py +++ b/examples/llm/molecule_gpt.py @@ -1,5 +1,5 @@ """This example implements the MoleculeGPT model -(https://ai4d3.github.io/papers/34.pdf) using PyG. +(https://ai4d3.github.io/2023/papers/34.pdf) using PyG. """ import argparse import math diff --git a/torch_geometric/llm/models/molecule_gpt.py b/torch_geometric/llm/models/molecule_gpt.py index 85576098e0b4..a410ab11dbe4 100644 --- a/torch_geometric/llm/models/molecule_gpt.py +++ b/torch_geometric/llm/models/molecule_gpt.py @@ -26,7 +26,7 @@ def pad_or_truncate(embeddings: Tensor, max_seq_len: int, class MoleculeGPT(torch.nn.Module): r"""The MoleculeGPT model from the `"MoleculeGPT: Instruction Following Large Language Models for Molecular Property Prediction" - `_ paper. + `_ paper. Args: llm (LLM): The LLM to use. diff --git a/torch_geometric/llm/models/protein_mpnn.py b/torch_geometric/llm/models/protein_mpnn.py index d21ba3b88a1c..225974d0b121 100644 --- a/torch_geometric/llm/models/protein_mpnn.py +++ b/torch_geometric/llm/models/protein_mpnn.py @@ -156,7 +156,7 @@ def message(self, x_i: torch.Tensor, x_j: torch.Tensor, class ProteinMPNN(torch.nn.Module): - r"""The ProteinMPNN model from the `"Robust deep learning--based + r"""The ProteinMPNN model from the `"Robust deep learning based protein sequence design using ProteinMPNN" `_ paper. diff --git a/torch_geometric/nn/conv/fused_gat_conv.py b/torch_geometric/nn/conv/fused_gat_conv.py index a12fc31a0777..bb1419ea39bb 100644 --- a/torch_geometric/nn/conv/fused_gat_conv.py +++ b/torch_geometric/nn/conv/fused_gat_conv.py @@ -12,8 +12,8 @@ class FusedGATConv(GATConv): # pragma: no cover r"""The fused graph attention operator from the `"Understanding GNN Computational Graph: A Coordinated Computation, IO, and Memory Perspective" - `_ paper. + `_ paper. :class:`FusedGATConv` is an optimized version of :class:`~torch_geometric.nn.conv.GATConv` based on the :obj:`dgNN` package