Skip to content

Commit ace4968

Browse files
committed
add an lru cache onto parsing species id from msa description
1 parent 2535483 commit ace4968

File tree

2 files changed

+8
-0
lines changed

2 files changed

+8
-0
lines changed

alphafold3_pytorch/data/msa_parsing.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,9 @@
77
import re
88
import string
99

10+
import hashlib
11+
from cachetools import cached, LRUCache
12+
1013
from beartype.typing import Literal, Optional, Sequence, Tuple
1114

1215
from alphafold3_pytorch.tensor_typing import typecheck
@@ -117,7 +120,11 @@ def _extract_sequence_identifier(description: str) -> Optional[str]:
117120
else:
118121
return None
119122

123+
def _get_identifiers_make_key(description, tab_separated_alignment_headers):
124+
md5_digest = hashlib.md5(description.encode()).hexdigest()
125+
return f'{md5_digest}:{tab_separated_alignment_headers}'
120126

127+
@cached(cache = LRUCache(maxsize = 512), key = _get_identifiers_make_key)
121128
@typecheck
122129
def get_identifiers(
123130
description: str, tab_separated_alignment_headers: bool = False

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ dependencies = [
2828
"awscliv2>=2.3.1",
2929
"beartype",
3030
"biopython>=1.83",
31+
"cachetools",
3132
"click>=8.1",
3233
"CoLT5-attention>=0.11.0",
3334
"einops>=0.8.0",

0 commit comments

Comments
 (0)