|
24 | 24 |
|
25 | 25 | namespace ublas = boost::numeric::ublas; |
26 | 26 | bool VERBOSE=false; |
| 27 | +double DIGBY_EXPONENT=0.7853981633974483; // pi/4 |
27 | 28 | /* Matrix inversion routine. |
28 | 29 | Uses lu_factorize and lu_substitute in uBLAS to invert a matrix */ |
29 | 30 | template<class T> |
@@ -189,7 +190,7 @@ inline long double produit_reciproque_asymetrique(boost::numeric::ublas::matrix< |
189 | 190 | ret+= addendum; |
190 | 191 | } |
191 | 192 | } |
192 | | - return ret; |
| 193 | + return 2*ret; |
193 | 194 | } |
194 | 195 |
|
195 | 196 | // inline std::vector<long double> produit(std::vector<long double>& u, std::vector<long double>& v) { |
@@ -665,7 +666,7 @@ class studies_correlation { |
665 | 666 | for(unsigned int j=i+1;j<nstudies;j++){ |
666 | 667 | long double alpha=m(i,j).get_alpha(); |
667 | 668 | if(VERBOSE){info("between study ",i,"and",j, " the alpha is ",alpha);} |
668 | | - correlations[mask](i,j)=(pow(alpha, 0.75)-1)/(pow(alpha, 0.75)+1); |
| 669 | + correlations[mask](i,j)=(pow(alpha, DIGBY_EXPONENT)-1)/(pow(alpha, DIGBY_EXPONENT)+1); |
669 | 670 | if(correlations[mask](i,j)==-1){correlations[mask](i,j)=0;} |
670 | 671 | } |
671 | 672 | } |
@@ -970,6 +971,7 @@ NB:\tMETACARPA currently supports only one header line in input files, which is |
970 | 971 | ("stop,x", "Stop METACARPA after generating the matrix.") |
971 | 972 | ("debug,d", "Toggles an extremely verbose output, for debugging purposes only.") |
972 | 973 | ("use-beta-sign", "Use sign of beta for dichotomization in correlation matrix calculation (as described in Southam et al. 2017), instead of the default p-value transform.") |
| 974 | + ("digby-exponent", po::value<double>(), "Exponent used in Digby's tetrachoric correlation approximation: rho = (alpha^e - 1)/(alpha^e + 1). Default: pi/4 = 0.7854.") |
973 | 975 |
|
974 | 976 |
|
975 | 977 | // ("ss1,1", po::value<int>(), "Sample size for study 1 (in order of join).") |
@@ -1069,6 +1071,11 @@ if(vm.count("use-beta-sign")){ |
1069 | 1071 | info("Using sign of beta for dichotomization (Southam et al. 2017 method)."); |
1070 | 1072 | } |
1071 | 1073 |
|
| 1074 | +if(vm.count("digby-exponent")){ |
| 1075 | + DIGBY_EXPONENT=vm["digby-exponent"].as<double>(); |
| 1076 | + info("Using Digby exponent = "+to_string(DIGBY_EXPONENT)); |
| 1077 | +} |
| 1078 | + |
1072 | 1079 | if (vm.count("input") && vm.count("output")) { |
1073 | 1080 | ifiles=vm["input"].as<vector<string>>(); |
1074 | 1081 | if(ifiles.size()==1){error("Only one input file. Nothing to do.");} |
@@ -1600,307 +1607,3 @@ info("Goodbye."); |
1600 | 1607 | return 0; |
1601 | 1608 |
|
1602 | 1609 | } |
1603 | | - |
1604 | | - |
1605 | | - |
1606 | | -// DEFUNCT CODE |
1607 | | -// OLD BODY OF META_ANALYSE (WITH COMMENTS) |
1608 | | - |
1609 | | - |
1610 | | - // ord.chrpos=working_rs[0]; |
1611 | | - // ord.rsid=working_id[0]; |
1612 | | - // ord.a1=working_a1[0]; |
1613 | | - // ord.a2=working_a2[0]; |
1614 | | - |
1615 | | - // unsigned int j=0; |
1616 | | - // for(unsigned short int i=0; i<ifiles.size();i++){ |
1617 | | - // if((working_mask & (1<<i)) == 0){ |
1618 | | - // ord.status.push_back('?'); |
1619 | | - // }else{ |
1620 | | - // //if((mask & mask-1)==0){error("mask is a power of two: "+to_string(mask)+" whereas i="+to_string(i)+" and maskcheck is "+to_string(1<<i));} |
1621 | | - // char effect_dir=working_betas[j] > 0? '+' : '-'; |
1622 | | - // j++; |
1623 | | - // ord.status.push_back(effect_dir); |
1624 | | - // } |
1625 | | - // } |
1626 | | - |
1627 | | - // if(bit_count(working_mask)>1){ |
1628 | | - // // compute weights |
1629 | | - // int sum_ss=somme(working_weights); |
1630 | | - // i=0; |
1631 | | - // //for(long double l : working_weights){working_weights[i]/=sum_ss;i++;} |
1632 | | - |
1633 | | - // for(long double l : working_weights){working_weights[i]=sqrt(working_weights[i]);working_weights[i]/=sqrt(sum_ss);i++;} |
1634 | | - |
1635 | | - |
1636 | | - // // compute transforms and sum thereof |
1637 | | - // i=0; |
1638 | | - // vector<cpp_dec_float_1000> zs(working_weights.size()); |
1639 | | - // vector<cpp_dec_float_1000> zs_fess(working_weights.size()); |
1640 | | - // for(long double l : working_ps){ |
1641 | | - // zs[i]=z_transform_fess(working_ps[i], working_betas[i],working_rs[i]); |
1642 | | - // zs_fess[i]=z_transform_fess(working_ps[i], working_betas[i],working_rs[i]); |
1643 | | - // i++; |
1644 | | - // } |
1645 | | - // ord.z=somme(produit(zs, working_weights)); |
1646 | | - |
1647 | | - |
1648 | | - // ord.z_fess=somme(produit(zs_fess, working_weights)); |
1649 | | - // // compute p-value SD |
1650 | | - // // the first product in the line below sums to 1 by definition |
1651 | | - // //ord.zse=sqrt(somme(produit(working_weights, working_weights))+produit_reciproque_asymetrique(correlations.getmat(working_mask), working_weights)); |
1652 | | - // ord.zse=sqrt(1+produit_reciproque_asymetrique(correlations.getmat(working_mask), working_weights)); |
1653 | | - |
1654 | | - |
1655 | | - // //compute matrix for beta weights |
1656 | | - // //info("Working mask ", working_mask); |
1657 | | - // ublas::matrix<long double> c=correlations.getmat(working_mask); |
1658 | | - // for(i=0;i<c.size1();i++){ |
1659 | | - // for(unsigned short int j=0;j<c.size1();j++){ |
1660 | | - // if(i>j){c(i,j)=c(j,i);}else if (i==j){c(i,j)=1;} |
1661 | | - // c(i,j)=c(i,j)*working_betase[i]*working_betase[j]; |
1662 | | - // } |
1663 | | - // } |
1664 | | - // // calculate weights |
1665 | | - // // * sum of columns/total sum of matrix |
1666 | | - // ublas::matrix<long double> inverse(c.size1(),c.size1()); |
1667 | | - |
1668 | | - // if(!InvertMatrix(c,inverse)){correlations.print(c);error("Could not invert variance/covariance matrix.");} |
1669 | | - |
1670 | | - // working_weights=colsum(inverse); |
1671 | | - // long double totsum=somme(working_weights); |
1672 | | - // for(i=0;i<working_weights.size();i++) working_weights[i]/=totsum; |
1673 | | - |
1674 | | - // // calculate betase |
1675 | | - // std::vector<long double> working_var=produit(working_betase, working_betase); |
1676 | | - // ord.betase=sqrt(somme(produit(produit(working_weights, working_weights), working_var)) + produit_reciproque_asymetrique(c, working_weights)); |
1677 | | - |
1678 | | - // // meta-beta |
1679 | | - // ord.beta=somme(produit(working_weights, working_betas)); |
1680 | | - // // compute meta-analysis p-value |
1681 | | - // boost::math::normal_distribution<long double> correctednormal(0.0, ord.zse); |
1682 | | - // boost::math::normal_distribution<cpp_dec_float_1000> correctednormal_p(0.0, ord.zse); |
1683 | | - // boost::math::normal_distribution<long double> wald(0.0, 1); |
1684 | | - // boost::math::normal_distribution<cpp_dec_float_1000> wald_p(0.0, 1); |
1685 | | - |
1686 | | - // try{ |
1687 | | - // ord.p=2*(cdf(correctednormal, -1*abs(static_cast<long double>(ord.z)))); |
1688 | | - // //ord.p_uncorrected=2*(1-cdf(wald, abs(static_cast<long double>(ord.z)))); |
1689 | | - // //ord.p=1-cdf(correctednormal, -1*abs(static_cast<long double>(ord.z))); |
1690 | | - // // ord.p_uncorrected=1-cdf(wald, -1*abs(static_cast<long double>(ord.z))); |
1691 | | - // ord.p_fess=2*cdf(wald, -1*abs(static_cast<long double>(ord.z_fess))); |
1692 | | - // //ord.p=1-cdf(correctednormal, static_cast<long double>(ord.z)); |
1693 | | - // // ord.p_uncorrected=1-cdf(wald, static_cast<long double>(ord.z)); |
1694 | | - // ord.p_wald=2*cdf(wald, -1*abs(ord.beta/ord.betase)); |
1695 | | - // if(ord.p==0){ |
1696 | | - |
1697 | | - // //info("Warning, position "+minimum+" meta-analyses below long double precision. Analysing with multiprecision..."); |
1698 | | - // //ord.p =1-cdf(correctednormal_p, -1*abs(ord.z)); |
1699 | | - // //ord.p =1-cdf(correctednormal_p, ord.z); |
1700 | | - // // ord.p_uncorrected=1-cdf(wald_p, abs(ord.z)); |
1701 | | - // ord.p_wald=2*cdf(wald_p, -1*abs(ord.beta/ord.betase)); |
1702 | | - // ord.p=2*(cdf(correctednormal_p, -1*abs(ord.z))); |
1703 | | - // ord.p_fess=2*cdf(wald_p, -1*abs(ord.z_fess)); |
1704 | | - // //ord.p_uncorrected=2*(1-cdf(correctednormal_p, abs(ord.z))); |
1705 | | - |
1706 | | - // } |
1707 | | - // }catch(exception e){ |
1708 | | - // // ord.p=1-cdf(correctednormal_p, -1*abs(ord.z)); |
1709 | | - // //ord.p=1-cdf(correctednormal_p, ord.z); |
1710 | | - // // ord.p_uncorrected=1-cdf(wald_p, -1*abs(ord.z)); |
1711 | | - // ord.p=2*(cdf(correctednormal, -1*abs(ord.z))); |
1712 | | - // //ord.p_uncorrected=2*(1-cdf(wald_p, abs(ord.z))); |
1713 | | - // ord.p_wald=2*cdf(wald_p, -1*abs(ord.beta/ord.betase)); |
1714 | | - // ord.p_fess=2*cdf(wald_p, -1*abs(ord.z_fess)); |
1715 | | - // } |
1716 | | - |
1717 | | - // }else{ |
1718 | | - // // SNP present in only 1 study, skip all calculations |
1719 | | - // ord.rsid=working_id[0]; |
1720 | | - // ord.beta=working_betas[0]; |
1721 | | - // ord.betase=working_betase[0]; |
1722 | | - // ord.p=-1; |
1723 | | - // ord.p_wald=-1; |
1724 | | - // ord.p_fess=-1; |
1725 | | - // ord.z=-1; |
1726 | | - // ord.zse=-1; |
1727 | | - // ord.p_uncorrected=-1; |
1728 | | - // ord.a1=working_a1[0]; |
1729 | | - // ord.a2=working_a2[0]; |
1730 | | - // } |
1731 | | - // ord.print(ofs); |
1732 | | - /* |
1733 | | - map <string, position_info, cmp_rsid> thisstudy; |
1734 | | - bool firstline=true; |
1735 | | - info("Calculating z-scores and binomial transformation for file "+filename+"..."); |
1736 | | - string tempLine; |
1737 | | - boost::math::normal_distribution<long double> standardnormal; |
1738 | | - boost::math::normal_distribution<cpp_dec_float_1000> standardnormal_p; |
1739 | | - means[i]=0; |
1740 | | - while ( getline(inputFile, tempLine, '\n') ) { |
1741 | | - if(firstline==true){firstline=false;continue;} |
1742 | | - vector <string> line; |
1743 | | - stringstream ss(tempLine); |
1744 | | - string temp; |
1745 | | - while (getline(ss, temp, '\t')) { |
1746 | | - line.push_back(temp); |
1747 | | - } |
1748 | | - string chrpos=line[0]+":"+line[2]; |
1749 | | - position_info thisposition; |
1750 | | - //thisposition.beta=stold(line[7]); |
1751 | | - //means[i]=means[i]+thisposition.beta; |
1752 | | - //meancount[i]++; |
1753 | | - //thisposition.sdbeta=stold(line[8]); |
1754 | | - thisposition.pvalue=stold(line[13]); |
1755 | | - thisposition.rsid=line[1]; |
1756 | | - try |
1757 | | - { |
1758 | | - thisposition.zf=(cpp_dec_float_1000)(quantile(standardnormal, 1-thisposition.pvalue/2)*sign(thisposition.beta)); |
1759 | | - thisposition.bpval=(thisposition.zf<=0); |
1760 | | - thisposition.need_precision=false; |
1761 | | - }catch(exception &ex){ |
1762 | | - thisposition.need_precision=true; |
1763 | | - info("\tPosition "+chrpos+" could not be transformed with normal machine precision. Using arbitrary precision (p="+line[13]+")"); |
1764 | | - cpp_dec_float_1000 p1=1-(cpp_dec_float_1000)thisposition.pvalue/2; |
1765 | | - thisposition.zf=quantile(standardnormal_p, p1)*sign(thisposition.beta); |
1766 | | - thisposition.bpval=(thisposition.zf<=0); |
1767 | | - } |
1768 | | -
|
1769 | | - thisstudy[chrpos]=thisposition; |
1770 | | - if(where_present.count(chrpos)==0){ |
1771 | | - where_present[chrpos]=current_mask; |
1772 | | - }else{ |
1773 | | - where_present[chrpos]=where_present[chrpos] | current_mask; |
1774 | | - } |
1775 | | -
|
1776 | | - } |
1777 | | - studydata.push_back(thisstudy); |
1778 | | - means[i]=means[i]/meancount[i]; |
1779 | | - // This is the mean beta for the whole of the file |
1780 | | - // It can be quite different from the mean calculated on overlapping positions only |
1781 | | - info("Mean effect for file "+s+" is "+to_string(means[i])+" ("+to_string(meancount[i])+" rows)"); |
1782 | | - i++; |
1783 | | -}); |
1784 | | -int removed=0; |
1785 | | -
|
1786 | | -
|
1787 | | -
|
1788 | | - // All studies have been read. Do we have positions only present in one study? |
1789 | | -// typedef map <string, unsigned short int, cmp_rsid>::iterator it_type; |
1790 | | -// for(it_type it =where_present.begin(); it != where_present.end();){ |
1791 | | -// unsigned short int mask=it->second; |
1792 | | -// if((mask & mask-1)==0){removed++;} |
1793 | | -// ++it; |
1794 | | -// } |
1795 | | -// info(to_string(removed)+" positions were only genotyped in one study. Sadly, they will be lost."); |
1796 | | -
|
1797 | | -
|
1798 | | -// Time to compute relatedness matrix. |
1799 | | -// This calculates the correlations using the binomially tansformed betas. |
1800 | | -// It is the main innovation introduced by Province and Borecki |
1801 | | -info("Calculating p-value correlation matrix..."); |
1802 | | -vector<vector<long double>> cormat(ifiles.size(), vector<long double>(ifiles.size(), 0)); |
1803 | | -i=0; |
1804 | | -vector<long double> sds(ifiles.size(), 0); |
1805 | | -vector<bool> done(ifiles.size(), false); |
1806 | | -vector<int> counts(ifiles.size(), 0); |
1807 | | -for_each(ifiles.begin(), ifiles.end(), [&](string s)-> void { |
1808 | | - unsigned short int j=i+1; |
1809 | | - unsigned short int expj=1<<j; |
1810 | | - unsigned short int expi=1<<i; |
1811 | | - for_each(next(ifiles.begin(),i+1), ifiles.end(), [&](string t)-> void { |
1812 | | - // get all positions that have at least these two studies in common |
1813 | | - info("between file "+s+" and file "+t); |
1814 | | - if (i==j){cormat[i][j]=1;}else{ |
1815 | | - typedef map <string, unsigned short int, cmp_rsid>::iterator it_type; |
1816 | | - vector<bool> a; |
1817 | | - vector<bool> b; |
1818 | | - for(it_type it =where_present.begin(); it != where_present.end();){ |
1819 | | - unsigned short int mask=it->second; |
1820 | | - string chrpos=it->first; |
1821 | | - // Here we calculate SDs for entire studies... |
1822 | | - // info("Treating position "+chrpos+" with mask "+to_string(mask)); |
1823 | | - if(!done[j] && (mask & expj)==expj){sds[j]=sds[j]+(studydata[j][chrpos].beta-means[j])*(studydata[j][chrpos].beta-means[j]);counts[j]++;} |
1824 | | - if(!done[i] && (mask & expi)==expi){sds[i]=sds[i]+(studydata[i][chrpos].beta-means[i])*(studydata[i][chrpos].beta-means[i]);counts[i]++;} |
1825 | | - if((mask & expj)==expj && (mask & expi)==expi){ |
1826 | | - // info("Position "+chrpos+" is present in both studies."); |
1827 | | - a.push_back(studydata[i][chrpos].bpval); |
1828 | | - b.push_back(studydata[j][chrpos].bpval); |
1829 | | -
|
1830 | | - } |
1831 | | - ++it; |
1832 | | - } |
1833 | | - done[j]=true; |
1834 | | - done[i]=true; |
1835 | | -
|
1836 | | - cormat[i][j]=tetrachoric(a,b); |
1837 | | -
|
1838 | | - } |
1839 | | - info("\t...is "+to_string(cormat[i][j])); |
1840 | | - j++; |
1841 | | - }); |
1842 | | -i++; |
1843 | | -}); |
1844 | | -studies_correlation sc=studies_correlation(); |
1845 | | -sc.add_mask((unsigned short int)3); |
1846 | | -bool v[]={true, true}; |
1847 | | -sc.update_mask((unsigned short int)3, v); |
1848 | | -v[1]=false; |
1849 | | -sc.update_mask((unsigned short int)3, v); |
1850 | | -v[0]=false; |
1851 | | -sc.update_mask((unsigned short int)3, v); |
1852 | | -v[0]=true; |
1853 | | -sc.update_mask((unsigned short int)3, v); |
1854 | | -v[1]=true; |
1855 | | -sc.update_mask((unsigned short int)3, v); |
1856 | | -v[0]=false; |
1857 | | -sc.update_mask((unsigned short int)3, v); |
1858 | | -v[1]=false; |
1859 | | -sc.update_mask((unsigned short int)3, v); |
1860 | | -sc.update_mask((unsigned short int)3, v); |
1861 | | -v[0]=true; |
1862 | | -v[1]=true; |
1863 | | -sc.update_mask((unsigned short int)3, v); |
1864 | | -v[0]=false; |
1865 | | -sc.update_mask((unsigned short int)3, v); |
1866 | | -v[1]=true; |
1867 | | -v[0]=true; |
1868 | | -sc.update_mask((unsigned short int)3, v); |
1869 | | -v[0]=false; |
1870 | | -sc.update_mask((unsigned short int)3, v); |
1871 | | -v[1]=false; |
1872 | | -sc.update_mask((unsigned short int)3, v); |
1873 | | -sc.update_mask((unsigned short int)3, v); |
1874 | | -
|
1875 | | -
|
1876 | | -
|
1877 | | -
|
1878 | | -sc.compute_correlations(); |
1879 | | -
|
1880 | | -sc.write("test_serial"); |
1881 | | -
|
1882 | | -studies_correlation scr=studies_correlation(); |
1883 | | -scr.read("test_serial"); |
1884 | | -info(scr.getmat((unsigned short int)3)); |
1885 | | -*/ |
1886 | | - |
1887 | | - |
1888 | | - |
1889 | | - |
1890 | | - |
1891 | | - |
1892 | | - |
1893 | | - |
1894 | | - |
1895 | | - |
1896 | | - |
1897 | | - |
1898 | | - |
1899 | | - |
1900 | | - |
1901 | | - |
1902 | | - |
1903 | | - |
1904 | | - |
1905 | | - |
1906 | | - |
0 commit comments