Skip to content

Commit 5c1d117

Browse files
Lucas NogueiraLucas Nogueira
authored andcommitted
Add complete implementation of the classical PCA algorithm with covariance matrix and power iteration with a very simple test file
1 parent 09ecbcb commit 5c1d117

File tree

4 files changed

+438
-2
lines changed

4 files changed

+438
-2
lines changed

Makefile

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ BUILD_TARGETS = \
3838
llama-tokenize \
3939
llama-vdot \
4040
llama-cvector-generator \
41+
llama-test-vanilla-pca \
4142
llama-gen-docs \
4243
tests/test-c.o
4344

@@ -1479,6 +1480,12 @@ llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp \
14791480
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
14801481
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
14811482

1483+
# TODO: Move to tests
1484+
llama-test-vanilla-pca: examples/cvector-generator/mini-tests/test-vanilla-pca.cpp \
1485+
$(OBJ_ALL)
1486+
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1487+
$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1488+
14821489
llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp \
14831490
$(OBJ_ALL)
14841491
$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)

examples/cvector-generator/cvector-generator.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,7 @@
22
#include "common.h"
33
#include "llama.h"
44
#include "ggml.h"
5-
#include "pca.hpp"
6-
#include "mean.hpp"
5+
#include "vanilla_pca.hpp"
76

87
#ifdef GGML_USE_CUDA
98
#include "ggml-cuda.h"
Lines changed: 116 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,116 @@
1+
2+
#include "common.h"
3+
#include "llama.h"
4+
#include "ggml.h"
5+
#include "../vanilla_pca.hpp"
6+
7+
#ifdef GGML_USE_CUDA
8+
#include "ggml-cuda.h"
9+
#endif
10+
11+
#ifdef GGML_USE_METAL
12+
#include "ggml-metal.h"
13+
#endif
14+
15+
#include <cstdio>
16+
#include <cstring>
17+
18+
// Function to initialize ggml with optional GPU backend support
19+
struct ggml_context *initialize_ggml_context() {
20+
#ifdef GGML_USE_CUDA
21+
struct ggml_init_params params = { .mem_size = 1024 * 1024, .mem_buffer = NULL, .use_gpu = true };
22+
printf("Initializing with GPU backend...\n");
23+
#else
24+
struct ggml_init_params params = { .mem_size = 1024 * 1024, .mem_buffer = NULL };
25+
printf("Initializing with CPU backend...\n");
26+
#endif
27+
return ggml_init(params);
28+
}
29+
30+
// Helper function to create a tensor from a matrix
31+
struct ggml_tensor *create_tensor(struct ggml_context *ctx, float *data, int rows, int cols) {
32+
struct ggml_tensor *tensor = ggml_new_tensor_2d(ctx, GGML_TYPE_F32, cols, rows);
33+
memcpy(tensor->data, data, ggml_nbytes(tensor));
34+
return tensor;
35+
}
36+
37+
// Function to run PCA and print results
38+
void run_pca_test(struct ggml_context *ctx, float *matrix, int rows, int cols) {
39+
struct ggml_tensor *input_tensor = create_tensor(ctx, matrix, rows, cols);
40+
41+
PCA::pca_params pca_params;
42+
pca_params.n_threads = 8;
43+
pca_params.n_batch = 20;
44+
pca_params.n_iterations = 1000;
45+
pca_params.tolerance = 1e-5;
46+
47+
PCA::pca_result result;
48+
PCA::run_single_pca(pca_params, input_tensor, result);
49+
50+
printf("\nPrincipal components:\n");
51+
float *b = (float *)result.principal_component->data;
52+
for (int i = 0; i < result.principal_component->ne[0]; i++) {
53+
printf("%f ", b[i]);
54+
}
55+
printf("\nEigenvalue: %f\n", result.explained_variance);
56+
}
57+
58+
int main() {
59+
// Initialize ggml context
60+
struct ggml_context *ctx = initialize_ggml_context();
61+
if (ctx == NULL) {
62+
printf("Failed to initialize ggml context\n");
63+
return 1;
64+
}
65+
66+
// Define matrices
67+
float input_matrix1[16] = {
68+
-0.124132, 0.740341, -0.452462, 0.777050,
69+
1.045571, -0.342142, -0.926047, -0.512965,
70+
0.710109, 0.092479, 0.630075, 1.762937,
71+
0.230954, -0.808937, 1.057424, 0.051361
72+
};
73+
74+
float input_matrix2[100] = {
75+
440152.493740, 122038.234845, 495176.910111, 34388.521115, 909320.402079, 258779.981600, 662522.284354, 311711.076089, 520068.021178, 546710.279343,
76+
184854.455526, 969584.627765, 775132.823361, 939498.941564, 894827.350428, 597899.978811, 921874.235023, 88492.502052, 195982.862419, 45227.288911,
77+
325330.330763, 388677.289689, 271349.031774, 828737.509152, 356753.326694, 280934.509687, 542696.083158, 140924.224975, 802196.980754, 74550.643680,
78+
986886.936601, 772244.769297, 198715.681534, 5522.117124, 815461.428455, 706857.343848, 729007.168041, 771270.346686, 74044.651734, 358465.728544,
79+
115869.059525, 863103.425876, 623298.126828, 330898.024853, 63558.350286, 310982.321716, 325183.322027, 729606.178338, 637557.471355, 887212.742576,
80+
472214.925162, 119594.245938, 713244.787223, 760785.048617, 561277.197569, 770967.179955, 493795.596364, 522732.829382, 427541.018359, 25419.126744,
81+
107891.426993, 31429.185687, 636410.411264, 314355.981076, 508570.691165, 907566.473926, 249292.229149, 410382.923036, 755551.138543, 228798.165492,
82+
76979.909829, 289751.452914, 161221.287254, 929697.652343, 808120.379564, 633403.756510, 871460.590188, 803672.076899, 186570.058886, 892558.998490,
83+
539342.241916, 807440.155164, 896091.299923, 318003.474972, 110051.924528, 227935.162542, 427107.788626, 818014.765922, 860730.583256, 6952.130531,
84+
510747.302578, 417411.003149, 222107.810471, 119865.367334, 337615.171404, 942909.703913, 323202.932021, 518790.621743, 703018.958895, 363629.602379
85+
};
86+
87+
float input_matrix3[9] = {
88+
0.374540, 0.950714, 0.731994,
89+
0.598658, 0.156019, 0.155995,
90+
0.058084, 0.866176, 0.601115
91+
};
92+
93+
float input_matrix4[9] = {
94+
10.000000, 0.000000, 0.000000,
95+
0.000000, 5.000000, 0.000000,
96+
0.000000, 0.000000, 1.000000
97+
};
98+
99+
// Run PCA for each matrix
100+
printf("Testing Matrix 1:\n");
101+
run_pca_test(ctx, input_matrix1, 4, 4);
102+
103+
printf("\nTesting Matrix 2:\n");
104+
run_pca_test(ctx, input_matrix2, 10, 10);
105+
106+
printf("\nTesting Matrix 3:\n");
107+
run_pca_test(ctx, input_matrix3, 3, 3);
108+
109+
printf("\nTesting Matrix 4:\n");
110+
run_pca_test(ctx, input_matrix4, 3, 3);
111+
112+
// Cleanup
113+
ggml_free(ctx);
114+
return 0;
115+
}
116+

0 commit comments

Comments
 (0)