Skip to content

Commit a7bf7c3

Browse files
committed
DataFrame Accessor improvements in selecting genotype columns
1 parent ae0e589 commit a7bf7c3

File tree

2 files changed

+14
-16
lines changed

2 files changed

+14
-16
lines changed

pandas_genomics/accessors/dataframe_accessor.py

Lines changed: 13 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -42,45 +42,41 @@ def __init__(self, pandas_obj):
4242
######################
4343
# Variant Properties #
4444
######################
45+
# These methods generally only return a result for each GenotypeArray column, ignoring other columns
46+
4547
@property
4648
def variant_info(self) -> pd.DataFrame:
4749
"""Return a DataFrame with variant info indexed by the column name (one row per GenotypeArray)"""
50+
genotypes = self._obj.select_dtypes([GenotypeDtype])
4851
return pd.DataFrame.from_dict(
4952
{
5053
colname: series.genomics.variant_info
51-
if GenotypeDtype.is_dtype(series.dtype)
52-
else dict()
53-
for colname, series in self._obj.iteritems()
54+
for colname, series in genotypes.iteritems()
5455
},
5556
orient="index",
5657
)
5758

58-
#########################
59-
# Calculated Properties #
60-
#########################
6159
@property
6260
def maf(self):
63-
"""Return the minor allele frequency
61+
"""Return the minor allele frequency of each variant
6462
6563
See :py:attr:`GenotypeArray.maf`"""
66-
return self._obj.apply(
67-
lambda col: col.genomics.maf if GenotypeDtype.is_dtype(col.dtype) else pd.NA
68-
)
64+
genotypes = self._obj.select_dtypes([GenotypeDtype])
65+
return genotypes.apply(lambda col: col.genomics.maf)
6966

7067
@property
7168
def hwe_pval(self):
7269
"""Return the probability that the samples are in HWE
7370
7471
See :py:attr:`GenotypeArray.hwe_pval`"""
75-
return self._obj.apply(
76-
lambda col: col.genomics.hwe_pval
77-
if GenotypeDtype.is_dtype(col.dtype)
78-
else pd.NA
79-
)
72+
genotypes = self._obj.select_dtypes([GenotypeDtype])
73+
return genotypes.apply(lambda col: col.genomics.hwe_pval)
8074

8175
############
8276
# Encoding #
8377
############
78+
# These methods generally return encoded values for any GenotypeArray columns without modifying other columns
79+
8480
def encode_additive(self) -> pd.DataFrame:
8581
"""Additive encoding of genotypes.
8682
@@ -283,6 +279,8 @@ def generate_weighted_encodings(
283279
###########
284280
# Filters #
285281
###########
282+
# These methods drop genotypes that fail the filter, ignoring other columns
283+
286284
def filter_variants_maf(self, keep_min_freq: float = 0.01) -> pd.DataFrame:
287285
"""
288286
Drop variants with a MAF less than the specified value (0.01 by default)

tests/genotype_array/test_GenotypeArrayAccessors.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ def test_maf(data):
2929
for colname in "ABC":
3030
df[colname].genomics.variant.id = colname
3131
df["D"] = np.ones(len(data))
32-
expected = pd.Series({"A": data.maf, "B": data.maf, "C": data.maf, "D": pd.NA})
32+
expected = pd.Series({"A": data.maf, "B": data.maf, "C": data.maf})
3333
assert_series_equal(df.genomics.maf, expected)
3434

3535

0 commit comments

Comments
 (0)