-
Notifications
You must be signed in to change notification settings - Fork 222
Implement Principal Component Analysis (PCA) module #1086
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from 73 commits
dc4aaac
27599e1
d77fb0e
24358d1
1dd44ad
7f79ef6
0d2738c
20b0e98
654edba
b7c2be1
63a0a1f
cfbcdee
db19731
d9ba548
11902b6
d7f8790
f8bbd27
75db887
d72f72c
44ee2e7
6d2a4fd
b3ea627
0e94be3
05d4968
d3d1c71
7b49baa
0659b39
ac3b0e9
4751866
cc21db0
4ac725c
7348faf
c58f515
c776e8d
c47e2b6
436a526
143c211
17cf473
6d0506d
67c7ddf
720298c
1c2fc75
c43704c
9509dca
36fc211
19f55b6
8c4dcd8
1c97f51
2e87b76
e665dce
1e6cef7
f5f0c60
57b3cc5
f014baf
9dd3212
202e656
c61eb79
6daccc2
41a3690
074d34e
bcabe8f
a769f25
587abf7
83fe1d0
5d0c88e
9979449
b23a670
6dd0b39
ecbccd1
cc10e95
496d744
6c48366
f1d5182
a837b6b
baf8ff5
f931908
53bb939
3c98dee
cb9a5ea
8c83389
3b9b085
25e4eab
9604ccb
a66aee6
25bf66f
641243f
ef2f624
33c0270
23257e9
cb908bf
e2885ae
775ccc3
1ac24a0
3b179e8
4d2d7a1
f4c8245
b267c33
8d4cd08
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,247 @@ | ||
| #:include "common.fypp" | ||
| #! PCA supports all real kinds available in stdlib's internal BLAS/LAPACK. | ||
| #! When WITH_XDP or WITH_QP are enabled, extended precision is also supported. | ||
| #:set PCA_KINDS_TYPES = [("sp", "real(sp)"), ("dp", "real(dp)")] | ||
| #:if WITH_XDP | ||
| #:set PCA_KINDS_TYPES = PCA_KINDS_TYPES + [("xdp", "real(xdp)")] | ||
| #:endif | ||
| #:if WITH_QP | ||
| #:set PCA_KINDS_TYPES = PCA_KINDS_TYPES + [("qp", "real(qp)")] | ||
| #:endif | ||
JAi-SATHVIK marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| submodule (stdlib_stats) stdlib_stats_pca | ||
| use stdlib_kinds, only: sp, dp, xdp, qp | ||
| use stdlib_optval, only: optval | ||
| use stdlib_linalg, only: svd, eigh | ||
| use stdlib_linalg_constants, only: ilp | ||
| use stdlib_linalg_blas, only: gemm, syrk | ||
| use stdlib_linalg_state, only: linalg_state_type, LINALG_ERROR, LINALG_VALUE_ERROR | ||
| use stdlib_sorting, only: sort_index | ||
| implicit none | ||
|
|
||
| contains | ||
|
|
||
| ! Helper subroutine: Centers data in-place by subtracting the mean from each column | ||
| #:for k1, t1 in PCA_KINDS_TYPES | ||
| subroutine center_data_${k1}$(x, mu) | ||
| ${t1}$, intent(inout) :: x(:,:) | ||
| ${t1}$, intent(in) :: mu(:) | ||
| integer(ilp) :: i, j, m, n | ||
| m = size(x, 1, kind=ilp) | ||
| n = size(x, 2, kind=ilp) | ||
| do j = 1, n | ||
| do i = 1, m | ||
| x(i, j) = x(i, j) - mu(j) | ||
| end do | ||
| end do | ||
JAi-SATHVIK marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| end subroutine center_data_${k1}$ | ||
| #:endfor | ||
|
|
||
| ! SVD-based PCA driver: computes principal components via SVD of centered data | ||
| #:for k1, t1 in PCA_KINDS_TYPES | ||
| subroutine pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err) | ||
| use stdlib_blas_constants_${k1}$, only: zero | ||
| ${t1}$, intent(inout) :: x_centered(:,:) | ||
| integer(ilp), intent(in) :: n, p | ||
| ${t1}$, intent(out) :: components(:,:) | ||
| real(${k1}$), intent(out) :: singular_values(:) | ||
JAi-SATHVIK marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| type(linalg_state_type), intent(out) :: err | ||
|
|
||
| integer(ilp) :: n_s, m | ||
| ${t1}$, allocatable :: s_tmp(:), vt_tmp(:,:) | ||
|
|
||
| ! Initialize outputs to zero to prevent uninitialized memory access | ||
| components = zero | ||
| singular_values = zero | ||
|
|
||
| n_s = min(n, p) | ||
| allocate(s_tmp(n_s)) | ||
| allocate(vt_tmp(n_s, p)) | ||
|
|
||
| call svd(x_centered, s_tmp, vt=vt_tmp, overwrite_a=.true., full_matrices=.false., err=err) | ||
|
|
||
| if (err%ok()) then | ||
| m = min(size(components, 1, kind=ilp), n_s) | ||
| components(1:m, :) = vt_tmp(1:m, :) | ||
| m = min(size(singular_values, 1, kind=ilp), n_s) | ||
| singular_values(1:m) = s_tmp(1:m) | ||
| end if | ||
| end subroutine pca_svd_driver_${k1}$ | ||
| #:endfor | ||
|
|
||
| ! Eigendecomposition-based PCA driver: computes principal components via covariance matrix | ||
| #:for k1, t1 in PCA_KINDS_TYPES | ||
| subroutine pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err) | ||
| use stdlib_blas_constants_${k1}$, only: zero | ||
| ${t1}$, intent(in) :: x_centered(:,:) | ||
| integer(ilp), intent(in) :: n, p | ||
| ${t1}$, intent(out) :: components(:,:) | ||
| real(${k1}$), intent(out) :: singular_values(:) | ||
| type(linalg_state_type), intent(out) :: err | ||
|
|
||
| integer(ilp) :: i, j, m | ||
| integer(ilp), allocatable :: idx(:) | ||
| ${t1}$ :: alpha, beta | ||
| real(${k1}$) :: scale_factor | ||
| real(${k1}$), allocatable :: lambda(:), lambda_copy(:) | ||
| ${t1}$, allocatable :: c(:,:), vectors(:,:) | ||
|
|
||
| ! Initialize outputs to zero to prevent uninitialized memory access | ||
| components = zero | ||
| singular_values = zero | ||
|
|
||
| ! Compute covariance matrix using BLAS syrk: C = (1/(n-1)) * X^T * X | ||
| scale_factor = 1.0_${k1}$ / real(max(n-1, 1), ${k1}$) | ||
| alpha = scale_factor | ||
JAi-SATHVIK marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| beta = zero | ||
| allocate(c(p, p)) | ||
| c = zero | ||
| call syrk('U', 'T', p, n, alpha, x_centered, n, beta, c, p) | ||
JAi-SATHVIK marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| ! Fill lower triangle from upper triangle (syrk only fills upper) | ||
|
||
| do j = 1, p-1 | ||
| do i = j+1, p | ||
| c(i, j) = c(j, i) | ||
| end do | ||
| end do | ||
JAi-SATHVIK marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| allocate(lambda(p)) | ||
| allocate(vectors(p, p)) | ||
|
||
| call eigh(c, lambda, vectors=vectors, err=err) | ||
|
|
||
| if (err%ok()) then | ||
| ! Sort eigenvalues in descending order | ||
| allocate(lambda_copy(p)) | ||
| lambda_copy = -lambda | ||
| allocate(idx(p)) | ||
| call sort_index(lambda_copy, idx) | ||
JAi-SATHVIK marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
|
|
||
| ! Assign sorted results with safety bounds checks | ||
| m = min(size(components, 1, kind=ilp), p) | ||
| m = min(m, size(singular_values, 1, kind=ilp)) | ||
JAi-SATHVIK marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| do i = 1, m | ||
| components(i, :) = vectors(:, idx(i)) | ||
| if (lambda(idx(i)) > 0.0_${k1}$) then | ||
| singular_values(i) = sqrt(lambda(idx(i)) * real(n-1, ${k1}$)) | ||
| else | ||
| singular_values(i) = 0.0_${k1}$ | ||
| end if | ||
JAi-SATHVIK marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| end do | ||
| end if | ||
| end subroutine pca_eigh_driver_${k1}$ | ||
| #:endfor | ||
|
|
||
| #:for k1, t1 in PCA_KINDS_TYPES | ||
| module subroutine pca_${k1}$(x, components, singular_values, x_mean, & | ||
| method, overwrite_x, err) | ||
| ${t1}$, intent(inout) :: x(:,:) | ||
| ${t1}$, intent(out) :: components(:,:) | ||
| real(${k1}$), intent(out) :: singular_values(:) | ||
| ${t1}$, intent(out), optional :: x_mean(:) | ||
JAi-SATHVIK marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| character(*), intent(in), optional :: method | ||
| logical, intent(in), optional :: overwrite_x | ||
| type(linalg_state_type), intent(out), optional :: err | ||
|
|
||
| type(linalg_state_type) :: err0 | ||
| integer(ilp) :: n, p | ||
| ${t1}$, allocatable :: mu(:), x_centered(:,:) | ||
| character(len=:), allocatable :: method_ | ||
|
|
||
| n = size(x, 1, kind=ilp) | ||
| p = size(x, 2, kind=ilp) | ||
| method_ = trim(adjustl(optval(method, "svd"))) | ||
|
|
||
| ! Calculate mean along dimension 1 (column means) using stdlib mean | ||
| allocate(mu(p)) | ||
| mu = mean(x, 1) | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think you can get rid of the |
||
|
|
||
| ! Validate and assign x_mean if present | ||
| if (present(x_mean)) then | ||
| if (size(x_mean) < p) then | ||
| err0 = linalg_state_type("pca", LINALG_VALUE_ERROR, & | ||
| "x_mean array has insufficient size:", size(x_mean), ", expected:", p) | ||
| call err0%handle(err) | ||
| return | ||
| end if | ||
| x_mean(1:p) = mu | ||
| end if | ||
|
|
||
| ! Method dispatch | ||
| select case (method_) | ||
| case ("svd") | ||
| if (optval(overwrite_x, .false.)) then | ||
| call center_data_${k1}$(x, mu) | ||
| call pca_svd_driver_${k1}$(x, n, p, components, singular_values, err0) | ||
| else | ||
| allocate(x_centered, source=x) | ||
|
||
| call center_data_${k1}$(x_centered, mu) | ||
| call pca_svd_driver_${k1}$(x_centered, n, p, components, singular_values, err0) | ||
| end if | ||
|
|
||
| case ("eig", "cov") | ||
| allocate(x_centered, source=x) | ||
|
||
| call center_data_${k1}$(x_centered, mu) | ||
| call pca_eigh_driver_${k1}$(x_centered, n, p, components, singular_values, err0) | ||
|
|
||
| case default | ||
| err0 = linalg_state_type("pca", LINALG_ERROR, "Unknown method: "//method_) | ||
JAi-SATHVIK marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
JAi-SATHVIK marked this conversation as resolved.
Outdated
Show resolved
Hide resolved
|
||
| end select | ||
JAi-SATHVIK marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| ! Handle error state | ||
| call err0%handle(err) | ||
|
|
||
| end subroutine pca_${k1}$ | ||
| #:endfor | ||
|
|
||
|
|
||
| #:for k1, t1 in PCA_KINDS_TYPES | ||
| module subroutine pca_transform_${k1}$(x, components, x_mean, x_transformed) | ||
| use stdlib_blas_constants_${k1}$, only: one, zero | ||
| ${t1}$, intent(in) :: x(:,:) | ||
| ${t1}$, intent(in) :: components(:,:) | ||
| ${t1}$, intent(in), optional :: x_mean(:) | ||
| ${t1}$, intent(out) :: x_transformed(:,:) | ||
JAi-SATHVIK marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
|
||
| integer(ilp) :: n, p, nc | ||
| ${t1}$, allocatable :: x_centered(:,:) | ||
|
|
||
| n = size(x, 1, kind=ilp) | ||
| p = size(x, 2, kind=ilp) | ||
| nc = size(components, 1, kind=ilp) | ||
|
|
||
| allocate(x_centered, source=x) | ||
| if (present(x_mean)) call center_data_${k1}$(x_centered, x_mean) | ||
|
|
||
| ! x_transformed = x_centered * components^T using GEMM | ||
| call gemm('N', 'T', n, nc, p, one, x_centered, n, components, nc, zero, x_transformed, n) | ||
JAi-SATHVIK marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| end subroutine pca_transform_${k1}$ | ||
| #:endfor | ||
|
|
||
|
|
||
| #:for k1, t1 in PCA_KINDS_TYPES | ||
| module subroutine pca_inverse_transform_${k1}$(x_reduced, components, x_mean, x_reconstructed) | ||
| use stdlib_blas_constants_${k1}$, only: one, zero | ||
| ${t1}$, intent(in) :: x_reduced(:,:) | ||
| ${t1}$, intent(in) :: components(:,:) | ||
| ${t1}$, intent(in), optional :: x_mean(:) | ||
| ${t1}$, intent(out) :: x_reconstructed(:,:) | ||
|
|
||
| integer(ilp) :: i, j, n, nc, p | ||
|
|
||
| n = size(x_reduced, 1, kind=ilp) | ||
| nc = size(x_reduced, 2, kind=ilp) | ||
| p = size(components, 2, kind=ilp) | ||
|
|
||
| ! x_reconstructed = x_reduced * components using GEMM | ||
| call gemm('N', 'N', n, p, nc, one, x_reduced, n, components, nc, zero, x_reconstructed, n) | ||
|
|
||
| if (present(x_mean)) then | ||
| do j = 1, p | ||
| do i = 1, n | ||
| x_reconstructed(i, j) = x_reconstructed(i, j) + x_mean(j) | ||
| end do | ||
| end do | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Any reasons why you use nested do concurrent(i=1:n, j=1:p)
x_reconstructed(i, j) = x_reconstructed(i, j) + x_mean(j)
end do
JAi-SATHVIK marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| end if | ||
| end subroutine pca_inverse_transform_${k1}$ | ||
| #:endfor | ||
|
|
||
| end submodule stdlib_stats_pca | ||
Uh oh!
There was an error while loading. Please reload this page.