|
| 1 | +# import bnlearn as bn |
| 2 | +# bn.system_info() |
| 3 | + |
| 4 | +# %% |
| 5 | +import bnlearn as bn |
| 6 | +import numpy as np |
| 7 | + |
| 8 | +n = np.random.randint(10, 1000) |
| 9 | +DAG = bn.import_DAG('sprinkler') |
| 10 | +df = bn.sampling(DAG, n=n, methodtype='gibbs') |
| 11 | +model = bn.structure_learning.fit(df, start_dag=DAG) |
| 12 | +model = bn.structure_learning.fit(df, start_dag='test') |
| 13 | + |
| 14 | +bn.plot(model) |
| 15 | + |
| 16 | +# %% |
1 | 17 | import bnlearn as bn |
2 | | -bn.system_info() |
| 18 | + |
| 19 | +# Define the causal dependencies based on your expert/domain knowledge. |
| 20 | +# Left is the source, and right is the target node. |
| 21 | +edges = [('Cloudy', 'Sprinkler'), |
| 22 | + ('Cloudy', 'Rain'), |
| 23 | + ('Sprinkler', 'Wet_Grass'), |
| 24 | + ('Rain', 'Wet_Grass')] |
| 25 | + |
| 26 | +# Create the DAG |
| 27 | +# DAG = bn.make_DAG(edges) |
| 28 | +DAG = bn.make_DAG(edges, methodtype='bayes') |
| 29 | +# bn.plot(DAG) |
| 30 | + |
| 31 | +# Generate data |
| 32 | +df = bn.sampling(DAG, n=10) |
| 33 | +model = bn.structure_learning.fit(df, start_dag=DAG) |
| 34 | +bn.plot(model) |
| 35 | + |
| 36 | +# Nothing is changed for the DAG. Only the CPDs are estimated now. |
| 37 | +# bn.compare_networks(DAG, model) |
| 38 | + |
3 | 39 |
|
4 | 40 | # %% |
5 | 41 | import bnlearn as bn |
|
401 | 437 | # from impute import knn_imputer, mice_imputer |
402 | 438 |
|
403 | 439 | # Load the dataset |
404 | | -df = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data-original', delim_whitespace=True, header=None, names=['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model year', 'origin', 'car name']) |
| 440 | +df = pd.read_fwf('http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data-original', header=None) |
| 441 | +df.columns = ["mpg","cylinders","displacement","horsepower", "weight","acceleration","model_year","origin_car"] |
| 442 | +df[["origin","car_name"]] = df["origin_car"].str.split("\t", expand=True) |
| 443 | +df["origin"] = df["origin"].str.replace(".", "", regex=False).astype(int) |
| 444 | +df["car_name"] = df["car_name"].str.strip('"') |
| 445 | +df = df.drop(columns="origin_car") |
405 | 446 |
|
406 | 447 | df.loc[1]=df.loc[0] |
407 | 448 | df.loc[11]=df.loc[10] |
408 | 449 | df.loc[50]=df.loc[20] |
409 | 450 |
|
410 | 451 | index_nan = [0, 10, 20] |
411 | | -carnames = df['car name'].loc[index_nan] |
| 452 | +carnames = df['car_name'].loc[index_nan] |
412 | 453 |
|
413 | | -df['car name'].loc[index_nan]=None |
| 454 | +df['car_name'].loc[index_nan]=None |
414 | 455 | df.isna().sum() |
415 | 456 |
|
416 | 457 | # KNN imputer |
417 | | -dfnew = bn.knn_imputer(df, n_neighbors=3, weights='distance', string_columns=['car name']) |
| 458 | +dfnew = bn.knn_imputer(df, n_neighbors=3, weights='distance', string_columns=['car_name']) |
418 | 459 | # Results |
419 | | -np.all(dfnew['car name'].loc[index_nan].values==carnames.values) |
| 460 | +np.all(dfnew['car_name'].loc[index_nan].values==carnames.values) |
420 | 461 |
|
421 | 462 | # MICE imputer |
422 | | -dfnew = bn.mice_imputer(df, max_iter=5, string_columns='car name') |
| 463 | +dfnew = bn.mice_imputer(df, max_iter=5, string_columns='car_name') |
423 | 464 | # Results |
424 | | -np.all(dfnew['car name'].loc[index_nan].values==carnames.values) |
| 465 | +np.all(dfnew['car_name'].loc[index_nan].values==carnames.values) |
425 | 466 |
|
426 | 467 |
|
427 | 468 |
|
|
719 | 760 |
|
720 | 761 | # Structure learning |
721 | 762 | # model = bn.structure_learning.fit(df, methodtype='hc') |
722 | | -model = bn.structure_learning.fit(df, methodtype='pc', params_pc={'pearsonr': cii_test,'alpha': 0.05}) |
| 763 | +model = bn.structure_learning.fit(df, methodtype='pc', params_pc={'pearsonr': 'chi_square','alpha': 0.05}) |
723 | 764 |
|
724 | 765 | # Compute edge strength |
725 | 766 | model = bn.independence_test(model, df) |
|
744 | 785 | import matplotlib.pyplot as plt |
745 | 786 | import bnlearn as bn |
746 | 787 |
|
747 | | -df = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data-original', |
748 | | - delim_whitespace=True, header=None, |
749 | | - names = ['mpg', 'cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'model year', 'origin', 'car name']) |
| 788 | +df = pd.read_fwf('http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data-original', header=None) |
| 789 | +df.columns = ["mpg","cylinders","displacement","horsepower", "weight","acceleration","model_year","origin_car"] |
| 790 | +df[["origin","car_name"]] = df["origin_car"].str.split("\t", expand=True) |
| 791 | +df["origin"] = df["origin"].str.replace(".", "", regex=False).astype(int) |
| 792 | +df["car_name"] = df["car_name"].str.strip('"') |
| 793 | +df = df.drop(columns="origin_car") |
750 | 794 |
|
751 | 795 | # df = bn.import_example(data='auto_mpg') |
752 | 796 |
|
|
902 | 946 |
|
903 | 947 |
|
904 | 948 | # %% |
905 | | -df = pd.read_csv('http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data-original', |
906 | | - delim_whitespace=True, header=None, |
907 | | - names = ['mpg', 'cylinders', 'displacement', |
908 | | - 'horsepower', 'weight', 'acceleration', |
909 | | - 'model year', 'origin', 'car name']) |
| 949 | +df = pd.read_fwf('http://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data-original', header=None) |
| 950 | +df.columns = ["mpg","cylinders","displacement","horsepower", "weight","acceleration","model_year","origin_car"] |
| 951 | +df[["origin","car_name"]] = df["origin_car"].str.split("\t", expand=True) |
| 952 | +df["origin"] = df["origin"].str.replace(".", "", regex=False).astype(int) |
| 953 | +df["car_name"] = df["car_name"].str.strip('"') |
| 954 | +df = df.drop(columns="origin_car") |
| 955 | + |
910 | 956 | df.dropna(inplace=True) |
911 | 957 | df.drop(['model year', 'origin', 'car name'], axis=1, inplace=True) |
912 | 958 |
|
|
0 commit comments