@@ -5668,3 +5668,245 @@ def test_different_node_flags(self):
5668
5668
result = ts .sample_nodes_by_ploidy (2 )
5669
5669
assert result .shape == (1 , 2 )
5670
5670
assert_array_equal (result , np .array ([[0 , 2 ]]))
5671
+
5672
+
5673
+ class TestMapToVcfModel :
5674
+ def test_no_individuals_default_ploidy (self ):
5675
+ ts = tskit .Tree .generate_balanced (4 ).tree_sequence
5676
+ assert ts .num_individuals == 0
5677
+
5678
+ # Default ploidy should be 1
5679
+ result = ts .map_to_vcf_model ()
5680
+ assert isinstance (result , tskit .VcfModelMapping )
5681
+ assert result .individuals_nodes .shape == (4 , 1 )
5682
+ for i in range (4 ):
5683
+ assert result .individuals_nodes [i , 0 ] == i
5684
+ assert result .individuals_name .shape == (4 ,)
5685
+ for i in range (4 ):
5686
+ assert result .individuals_name [i ] == f"tsk_{ i } "
5687
+
5688
+ with pytest .raises (
5689
+ ValueError ,
5690
+ match = "Cannot include non-sample nodes when individuals are not present" ,
5691
+ ):
5692
+ ts .map_to_vcf_model (include_non_sample_nodes = True )
5693
+
5694
+ def test_no_individuals_custom_ploidy (self ):
5695
+ ts = tskit .Tree .generate_balanced (6 ).tree_sequence
5696
+ assert ts .num_individuals == 0
5697
+
5698
+ # Use ploidy = 2
5699
+ result = ts .map_to_vcf_model (ploidy = 2 )
5700
+ assert isinstance (result , tskit .VcfModelMapping )
5701
+ assert result .individuals_nodes .shape == (3 , 2 )
5702
+ for i in range (3 ):
5703
+ assert result .individuals_nodes [i , 0 ] == i * 2
5704
+ assert result .individuals_nodes [i , 1 ] == i * 2 + 1
5705
+ assert result .individuals_name .shape == (3 ,)
5706
+ for i in range (3 ):
5707
+ assert result .individuals_name [i ] == f"tsk_{ i } "
5708
+
5709
+ def test_no_individuals_uneven_ploidy (self ):
5710
+ ts = tskit .Tree .generate_balanced (5 ).tree_sequence
5711
+ # This tree sequence has no individuals
5712
+ assert ts .num_individuals == 0
5713
+
5714
+ # 5 samples cannot be evenly divided into ploidy=2
5715
+ with pytest .raises (ValueError , match = "not a multiple" ):
5716
+ ts .map_to_vcf_model (ploidy = 2 )
5717
+
5718
+ def test_with_individuals (self ):
5719
+ ts = msprime .sim_ancestry (
5720
+ 5 ,
5721
+ random_seed = 42 ,
5722
+ )
5723
+ result = ts .map_to_vcf_model ()
5724
+ assert isinstance (result , tskit .VcfModelMapping )
5725
+ assert result .individuals_nodes .shape == (5 , 2 )
5726
+ assert np .array_equal (
5727
+ result .individuals_nodes ,
5728
+ np .array ([[0 , 1 ], [2 , 3 ], [4 , 5 ], [6 , 7 ], [8 , 9 ]]),
5729
+ )
5730
+ assert result .individuals_name .shape == (5 ,)
5731
+ for i in range (5 ):
5732
+ assert result .individuals_name [i ] == f"tsk_{ i } "
5733
+
5734
+ def test_with_individuals_and_ploidy_error (self ):
5735
+ tables = tskit .TableCollection (1.0 )
5736
+ tables .individuals .add_row ()
5737
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5738
+ ts = tables .tree_sequence ()
5739
+
5740
+ with pytest .raises (ValueError , match = "Cannot specify ploidy when individuals" ):
5741
+ ts .map_to_vcf_model (ploidy = 2 )
5742
+
5743
+ def test_specific_individuals (self ):
5744
+ tables = tskit .TableCollection (1.0 )
5745
+ # Create 5 individuals with varying ploidy
5746
+ for i in range (5 ):
5747
+ tables .individuals .add_row ()
5748
+ # Individuals have ploidy i+1
5749
+ for _ in range (i + 1 ):
5750
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = i )
5751
+ ts = tables .tree_sequence ()
5752
+
5753
+ result = ts .map_to_vcf_model (individuals = [1 , 3 ])
5754
+ assert isinstance (result , tskit .VcfModelMapping )
5755
+ # Individual 1 has ploidy 2, individual 3 has ploidy 4
5756
+ assert result .individuals_nodes .shape == (2 , 5 )
5757
+ assert np .array_equal (result .individuals_nodes [0 ], [1 , 2 , - 1 , - 1 , - 1 ])
5758
+ assert np .array_equal (result .individuals_nodes [1 ], [6 , 7 , 8 , 9 , - 1 ])
5759
+
5760
+ assert result .individuals_name .shape == (2 ,)
5761
+ assert result .individuals_name [0 ] == "tsk_1"
5762
+ assert result .individuals_name [1 ] == "tsk_3"
5763
+
5764
+ def test_individual_with_no_nodes (self ):
5765
+ tables = tskit .TableCollection (1.0 )
5766
+ # Individual with no nodes
5767
+ tables .individuals .add_row ()
5768
+ # Individual with nodes
5769
+ tables .individuals .add_row ()
5770
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5771
+ ts = tables .tree_sequence ()
5772
+
5773
+ result = ts .map_to_vcf_model ()
5774
+ assert result .individuals_nodes .shape == (2 , 1 )
5775
+ assert np .array_equal (result .individuals_nodes , [[- 1 ], [0 ]])
5776
+
5777
+ def test_individual_with_no_nodes_only (self ):
5778
+ tables = tskit .TableCollection (1.0 )
5779
+ # Individual with no nodes
5780
+ tables .individuals .add_row ()
5781
+ # Individual with nodes
5782
+ tables .individuals .add_row ()
5783
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5784
+ ts = tables .tree_sequence ()
5785
+
5786
+ result = ts .map_to_vcf_model (individuals = [0 ])
5787
+ assert result .individuals_nodes .shape == (1 , 1 )
5788
+ assert np .array_equal (result .individuals_nodes , [[- 1 ]])
5789
+
5790
+ def test_invalid_individual_id (self ):
5791
+ tables = tskit .TableCollection (1.0 )
5792
+ tables .individuals .add_row ()
5793
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5794
+ ts = tables .tree_sequence ()
5795
+
5796
+ with pytest .raises (ValueError , match = "Invalid individual ID" ):
5797
+ ts .map_to_vcf_model (individuals = [- 1 ])
5798
+
5799
+ with pytest .raises (ValueError , match = "Invalid individual ID" ):
5800
+ ts .map_to_vcf_model (individuals = [1 ])
5801
+
5802
+ def test_mixed_sample_non_sample_ordering (self ):
5803
+ tables = tskit .TableCollection (1.0 )
5804
+ tables .individuals .add_row ()
5805
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5806
+ tables .nodes .add_row (flags = 0 , time = 0 , individual = 0 ) # Non-sample node
5807
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5808
+ tables .nodes .add_row (flags = 0 , time = 0 , individual = 0 ) # Non-sample node
5809
+ tables .individuals .add_row ()
5810
+ tables .nodes .add_row (flags = 0 , time = 0 , individual = 1 ) # Non-sample node
5811
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5812
+ ts = tables .tree_sequence ()
5813
+
5814
+ result = ts .map_to_vcf_model ()
5815
+ assert result .individuals_nodes .shape == (2 , 4 )
5816
+ assert np .array_equal (
5817
+ result .individuals_nodes ,
5818
+ np .array ([[0 , 2 , - 1 , - 1 ], [5 , - 1 , - 1 , - 1 ]]),
5819
+ )
5820
+
5821
+ result = ts .map_to_vcf_model (include_non_sample_nodes = True )
5822
+ assert result .individuals_nodes .shape == (2 , 4 )
5823
+ assert np .array_equal (
5824
+ result .individuals_nodes ,
5825
+ np .array ([[0 , 1 , 2 , 3 ], [4 , 5 , - 1 , - 1 ]]),
5826
+ )
5827
+
5828
+ def test_samples_without_individuals_warning (self ):
5829
+ tables = tskit .TableCollection (1.0 )
5830
+ tables .individuals .add_row ()
5831
+ # Node with individual
5832
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5833
+ # Node without individual
5834
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = tskit .NULL )
5835
+ ts = tables .tree_sequence ()
5836
+
5837
+ with warnings .catch_warnings (record = True ) as w :
5838
+ ts .map_to_vcf_model ()
5839
+ assert len (w ) == 1
5840
+ assert "At least one sample node does not have an individual ID" in str (
5841
+ w [0 ].message
5842
+ )
5843
+
5844
+ def test_metadata_key_for_names (self ):
5845
+ tables = tskit .TableCollection (1.0 )
5846
+
5847
+ # Add individuals with metadata
5848
+ tables .individuals .metadata_schema = tskit .MetadataSchema (
5849
+ {
5850
+ "codec" : "json" ,
5851
+ "type" : "object" ,
5852
+ "properties" : {"name" : {"type" : "string" }},
5853
+ }
5854
+ )
5855
+ tables .individuals .add_row (metadata = {"name" : "ind1" })
5856
+ tables .individuals .add_row (metadata = {"name" : "ind2" })
5857
+
5858
+ # Add nodes
5859
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5860
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5861
+ ts = tables .tree_sequence ()
5862
+
5863
+ result = ts .map_to_vcf_model (name_metadata_key = "name" )
5864
+ assert result .individuals_name .shape == (2 ,)
5865
+ assert result .individuals_name [0 ] == "ind1"
5866
+ assert result .individuals_name [1 ] == "ind2"
5867
+
5868
+ def test_custom_individual_names (self ):
5869
+ tables = tskit .TableCollection (1.0 )
5870
+ tables .individuals .add_row ()
5871
+ tables .individuals .add_row ()
5872
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5873
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5874
+ ts = tables .tree_sequence ()
5875
+
5876
+ custom_names = ["individual_A" , "individual_B" ]
5877
+ result = ts .map_to_vcf_model (individual_names = custom_names )
5878
+ assert result .individuals_name .shape == (2 ,)
5879
+ assert result .individuals_name [0 ] == "individual_A"
5880
+ assert result .individuals_name [1 ] == "individual_B"
5881
+
5882
+ def test_name_conflict_error (self ):
5883
+ tables = tskit .TableCollection (1.0 )
5884
+ ts = tables .tree_sequence ()
5885
+ with pytest .raises (
5886
+ ValueError ,
5887
+ match = "Cannot specify both name_metadata_key and individual_names" ,
5888
+ ):
5889
+ ts .map_to_vcf_model (
5890
+ name_metadata_key = "name" , individual_names = ["custom_name" ]
5891
+ )
5892
+
5893
+ def test_name_count_mismatch_error (self ):
5894
+ tables = tskit .TableCollection (1.0 )
5895
+ tables .individuals .add_row ()
5896
+ tables .individuals .add_row ()
5897
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 0 )
5898
+ tables .nodes .add_row (flags = tskit .NODE_IS_SAMPLE , time = 0 , individual = 1 )
5899
+ ts = tables .tree_sequence ()
5900
+
5901
+ with pytest .raises (
5902
+ ValueError , match = "number of individuals does not match the number of names"
5903
+ ):
5904
+ ts .map_to_vcf_model (individual_names = ["only_one_name" ])
5905
+
5906
+ def test_all_individuals_no_nodes (self ):
5907
+ tables = tskit .TableCollection (1.0 )
5908
+ tables .individuals .add_row ()
5909
+ tables .individuals .add_row ()
5910
+ ts = tables .tree_sequence ()
5911
+ result = ts .map_to_vcf_model ()
5912
+ assert result .individuals_nodes .shape == (2 , 0 )
0 commit comments