Skip to content

SIGSEGV when device_type=cuda #91

@stfnrzz

Description

@stfnrzz

I compiled lightgbm with support for GPU and CUDA, and it correctly works (with both CPU and CUDA device type) when used from CLI reading datasets from csv files.

The same lightgbm library correctly works in java using lightgbm4j, for regression tasks, using "device_type=cpu".

Same Java library causes SIGSEGV error when "device_type=cuda".

Below my code:

package ai;

import com.microsoft.ml.lightgbm.PredictionType;
import io.github.metarank.lightgbm4j.LGBMBooster;
import io.github.metarank.lightgbm4j.LGBMDataset;
import io.github.metarank.lightgbm4j.LGBMException;

import java.util.Random;

public class ParametricRegressionExample {

public static void main(String[] args) {
    try {
        // Generazione del dataset parametrico
        int numSamples = 10000;  // Numero di campioni
        int numFeatures = 3;    // Numero di caratteristiche
        double slope = 2.0f;     // Pendenza della regressione
        double intercept = 5.0f; // Intercetta
        double noiseLevel = 0.5f; // Livello di rumore casuale

        double[] data = generateData(numSamples, numFeatures);
        float[] labels = generateLabels(data, numSamples, numFeatures, slope, intercept, noiseLevel);

        // Creazione del dataset LightGBM
        LGBMDataset dataset = LGBMDataset.createFromMat(data, numSamples, numFeatures, true, "", null);
        dataset.setField("label", labels);

        // Configurazione dei parametri per il modello di regressione
        String parameters = "device_type=cuda\n"
                + "task=train\n"
                + "boosting_type=dart\n"
                + "objective=regression\n"
                + "metric=l2\n"
                + "metric_freq=1\n"
                + "is_training_metric=true\n"
                + "max_bin=255\n"
                + "data=train_data.txt\n"
                + "valid_data=test_data.txt\n"
                + "num_trees=65\n"
                + "learning_rate=0.05\n"
                + "num_leaves=35\n"
                + "device_type=cpu\n"
                + "tree_learner=serial\n"
                + "feature_fraction=0.6\n"
                + "bagging_freq=5\n"
                + "bagging_fraction=0.8\n"
                + "min_data_in_leaf=200\n"
                + "min_sum_hessian_in_leaf=10.0\n"
                + "use_two_round_loading=false\n"
                + "is_save_binary_file=false\n"
                + "output_model=path/to/model\n"
                + "num_machines=1";

        // Inizializzazione del booster per l'addestramento
        LGBMBooster booster = LGBMBooster.create(dataset, parameters);

        // Numero di iterazioni per l'addestramento
        int numIterations = 100;
        for (int i = 0; i < numIterations; i++) {
            booster.updateOneIter();
            System.out.println("Iterazione " + (i + 1) + " completata.");
        }

        // Predizione su nuovi dati
        double[] newData = {2.0f, 3.0f, 4.0f};
        double[] predictions = booster.predictForMat(newData, 1, numFeatures, true, PredictionType.C_API_PREDICT_NORMAL);

        System.out.println("Predizioni per nuovo dato: " + predictions[0]);

        // Chiusura delle risorse
        booster.close();
        dataset.close();

    } catch (LGBMException e) {
        System.err.println("Errore durante l'esecuzione di LightGBM: " + e.getMessage());
    }
}

private static double[] generateData(int numSamples, int numFeatures) {
    Random random = new Random();
    double[] data = new double[numSamples * numFeatures];
    for (int i = 0; i < data.length; i++) {
        data[i] = (double) random.nextFloat() * 10;
    }
    return data;
}

private static float[] generateLabels(double[] data, int numSamples, int numFeatures, double slope, double intercept, double noiseLevel) {
    Random random = new Random();
    float[] labels = new float[numSamples];
    for (int i = 0; i < numSamples; i++) {
        double sum = intercept;
        for (int j = 0; j < numFeatures; j++) {
            sum += slope * data[i * numFeatures + j];
        }
        labels[i] = (float) (sum + (random.nextGaussian() * noiseLevel));
    }
    return labels;
}

}

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions