Skip to content

Commit d96d9d4

Browse files
author
dmoi
committed
add cnn decoder
1 parent 5343e3e commit d96d9d4

File tree

9 files changed

+3944
-4194
lines changed

9 files changed

+3944
-4194
lines changed

foldtree2/encode_pdbs.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,31 +22,42 @@
2222
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
2323

2424
parser = argparse.ArgumentParser(description='Encode pdbs')
25-
parser.add_argument('input_dir', type=str, help='Input directory with pdbs')
26-
parser.add_argument('input_glob', type=str, help='Glob pattern for input pdbs')
25+
parser.add_argument('input_path', type=str, help='Input directory with pdbs or glob pattern (e.g., /path/to/pdbs or "/path/**/*.pdb")')
2726
parser.add_argument('output_h5', type=str, help='Output file with pytorch geometric graphs of pdbs')
2827
parser.add_argument('foldxdir', type=str, nargs='?', default=None, help='foldx directory with foldx output for all pdbs')
2928
parser.add_argument('--distance', type=float, default=15, help='Distance threshold for contact map (default: 15)')
3029
parser.add_argument('--add-prody', action='store_true', default=True, help='Add ProDy features (default: True)')
3130
parser.add_argument('--verbose', action='store_true', default=False, help='Verbose output')
3231
parser.add_argument('--multiprocessing', action='store_true', default=False, help='Use multiprocessing for parallel processing')
3332
parser.add_argument('--ncpu', type=int, default=25, help='Number of CPUs for multiprocessing (default: 25)')
33+
parser.add_argument('--nstructs', type=int, default=None, help='Number of structures to use (random subsample if specified)')
3434

3535
# Add help for the arguments
3636
parser.description = "Encode PDB files into PyTorch geometric graphs with optional FoldX data integration."
3737
parser.epilog = ("Example usage:\n"
38-
" python encode_pdbs.py /path/to/pdbs '*.pdb' output.h5 /path/to/foldx")
38+
" python encode_pdbs.py /path/to/pdbs output.h5\n"
39+
" python encode_pdbs.py '/path/**/*.pdb' output.h5 /path/to/foldx")
3940

4041
args = parser.parse_args()
4142

42-
if args.input_glob:
43-
files = glob.glob(args.input_glob)
43+
# Handle input path - can be directory or glob pattern
44+
if os.path.isdir(args.input_path):
45+
# It's a directory, find all PDB files in it
46+
files = glob.glob(os.path.join(args.input_path, '*.pdb'))
47+
input_source = args.input_path
4448
else:
45-
files = glob.glob(os.path.join(args.input_dir, '*.pdb'))
49+
# It's a glob pattern
50+
files = glob.glob(args.input_path, recursive=True)
51+
input_source = args.input_path
4652

53+
print(f"Found {len(files)} PDB files from {input_source}")
4754
# Shuffle the data for randomization
4855
np.random.shuffle(files)
4956

57+
# Subsample if nstructs is specified
58+
if args.nstructs is not None:
59+
files = files[:args.nstructs]
60+
5061
output_h5 = args.output_h5
5162
foldx = args.foldxdir
5263

0 commit comments

Comments
 (0)