UoG-DA_Test/simplified_analysis.py at main · Voidstorm012/UoG-DA_Test · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
"""
Simplified UWB LOS/NLOS Classification and Range Prediction Project

This script performs a basic analysis of the UWB dataset:
1. Load the dataset
2. Train a simple classifier for LOS/NLOS classification
3. Train a simple regressor for range prediction
4. Evaluate and visualize results
"""
import os
import sys
import time
import random

# Try to import required packages
try:
    import numpy as np
    import pandas as pd
    import matplotlib.pyplot as plt
    from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
    from sklearn.model_selection import train_test_split
    from sklearn.metrics import (accuracy_score, confusion_matrix, classification_report,
                                mean_squared_error, mean_absolute_error, r2_score)
    HAVE_PACKAGES = True
except ImportError:
    print("Some packages are missing. Running in limited mode.")
    HAVE_PACKAGES = False

def load_dataset():
    """Load UWB dataset directly from CSV files."""
    print("Loading UWB dataset...")

    dataset_dir = os.path.join(os.path.dirname(__file__), 'data', 'dataset')
    print(f"Looking for dataset in: {dataset_dir}")

    all_data = []
    feature_names = None

    # Check each CSV file
    for i in range(1, 8):
        filename = f"uwb_dataset_part{i}.csv"
        filepath = os.path.join(dataset_dir, filename)

        if os.path.exists(filepath):
            print(f"Loading {filename}...")
            try:
                # Read CSV file
                df = pd.read_csv(filepath)

                # Save feature names from first file
                if feature_names is None:
                    feature_names = df.columns.tolist()

                # Add data to our collection
                all_data.append(df)
                print(f"  Loaded {len(df)} samples")
            except Exception as e:
                print(f"  Error loading {filename}: {e}")
        else:
            print(f"File not found: {filepath}")

    # Combine all dataframes
    if all_data:
        combined_df = pd.concat(all_data, ignore_index=True)
        print(f"Total samples loaded: {len(combined_df)}")

        # Get class distribution
        class_dist = combined_df.iloc[:, 0].value_counts()
        print(f"Class distribution:\n{class_dist}")

        return combined_df, feature_names
    else:
        print("No data loaded!")
        return None, None

def simple_analysis(data, feature_names):
    """Perform basic analysis on the dataset."""
    print("\nPerforming basic analysis...")

    # Get basic statistics for each feature
    print("\nFeature Statistics:")
    stats = data.describe()
    print(stats)

    # Create plots directory
    plots_dir = os.path.join(os.path.dirname(__file__), 'results', 'plots')
    os.makedirs(plots_dir, exist_ok=True)

    if HAVE_PACKAGES:
        try:
            # Plot class distribution
            plt.figure(figsize=(8, 5))
            ax = data.iloc[:, 0].value_counts().plot(kind='bar')
            plt.title('Class Distribution (0=LOS, 1=NLOS)')
            plt.xlabel('Class')
            plt.ylabel('Count')
            plt.tight_layout()
            plt.savefig(os.path.join(plots_dir, 'class_distribution.png'))
            plt.close()

            # Plot distributions of key features
            for i, feature in enumerate(feature_names[1:15]):  # Skip class column and CIR values
                plt.figure(figsize=(10, 6))

                # Split by class
                los_data = data[data.iloc[:, 0] == 0][feature]
                nlos_data = data[data.iloc[:, 0] == 1][feature]

                plt.hist([los_data, nlos_data], bins=30, alpha=0.6,
                         label=['LOS', 'NLOS'])

                plt.title(f'Distribution of {feature}')
                plt.xlabel(feature)
                plt.ylabel('Frequency')
                plt.legend()
                plt.tight_layout()
                plt.savefig(os.path.join(plots_dir, f'feature_dist_{feature}.png'))
                plt.close()

                print(f"Created plot for {feature}")

            print(f"All plots saved to {plots_dir}")
        except Exception as e:
            print(f"Error creating plots: {e}")

def train_simple_model(data):
    """Train a simple classifier on the dataset."""
    if not HAVE_PACKAGES:
        print("\nSkipping model training (required packages not available)")
        return

    print("\nTraining a simple Random Forest classifier...")

    # Prepare data
    X = data.iloc[:, 1:15]  # Features (without CIR)
    y = data.iloc[:, 0]     # Target (LOS/NLOS)

    # Split into train/test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print(f"Training set: {X_train.shape[0]} samples")
    print(f"Testing set: {X_test.shape[0]} samples")

    # Train Random Forest classifier
    model = RandomForestClassifier(n_estimators=100, random_state=42, n_jobs=-1)

    print("Training model...")
    start_time = time.time()
    model.fit(X_train, y_train)
    train_time = time.time() - start_time
    print(f"Training completed in {train_time:.2f} seconds")

    # Evaluate model
    print("\nEvaluating model...")
    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    print(f"Accuracy: {accuracy:.4f}")

    print("\nConfusion Matrix:")
    cm = confusion_matrix(y_test, y_pred)
    print(cm)

    print("\nClassification Report:")
    report = classification_report(y_test, y_pred)
    print(report)

    # Feature importance
    print("\nFeature Importance:")
    importance = model.feature_importances_
    feature_names = X.columns

    # Sort features by importance
    indices = np.argsort(importance)[::-1]

    for i, idx in enumerate(indices):
        print(f"{i+1}. {feature_names[idx]}: {importance[idx]:.4f}")

    # Create plots directory
    plots_dir = os.path.join(os.path.dirname(__file__), 'results', 'plots')
    os.makedirs(plots_dir, exist_ok=True)

    # Plot feature importance
    plt.figure(figsize=(12, 8))
    plt.bar(range(len(indices)), importance[indices], align='center')
    plt.xticks(range(len(indices)), [feature_names[i] for i in indices], rotation=90)
    plt.title('Feature Importance')
    plt.tight_layout()
    plt.savefig(os.path.join(plots_dir, 'feature_importance.png'))
    plt.close()

    # Plot confusion matrix
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
    plt.title('Confusion Matrix')
    plt.colorbar()
    tick_marks = [0, 1]
    plt.xticks(tick_marks, ['LOS (0)', 'NLOS (1)'])
    plt.yticks(tick_marks, ['LOS (0)', 'NLOS (1)'])
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')

    # Add text to confusion matrix cells
    thresh = cm.max() / 2.0
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, format(cm[i, j], 'd'),
                     ha="center", va="center",
                     color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.savefig(os.path.join(plots_dir, 'confusion_matrix.png'))
    plt.close()

    print(f"Model evaluation plots saved to {plots_dir}")

def train_range_prediction_model(data):
    """Train a simple regression model for range prediction."""
    if not HAVE_PACKAGES:
        print("\nSkipping range prediction model training (required packages not available)")
        return

    print("\nTraining a simple Random Forest regressor for range prediction...")

    # Prepare data
    # For range prediction, we use the 'Range' column as target and include NLOS flag as a feature
    X = pd.concat([data.iloc[:, 0:1], data.iloc[:, 2:15]], axis=1)  # NLOS flag + other features (without Range and CIR)
    y = data.iloc[:, 1]  # Target (Range)

    # Split into train/test sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    print(f"Training set: {X_train.shape[0]} samples")
    print(f"Testing set: {X_test.shape[0]} samples")

    # Train Random Forest regressor
    model = RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)

    print("Training model...")
    start_time = time.time()
    model.fit(X_train, y_train)
    train_time = time.time() - start_time
    print(f"Training completed in {train_time:.2f} seconds")

    # Evaluate model
    print("\nEvaluating model...")
    y_pred = model.predict(X_test)

    # Calculate regression metrics
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    print(f"Mean Squared Error: {mse:.4f}")
    print(f"Root Mean Squared Error: {rmse:.4f}")
    print(f"Mean Absolute Error: {mae:.4f}")
    print(f"R² Score: {r2:.4f}")

    # Feature importance
    print("\nFeature Importance for Range Prediction:")
    importance = model.feature_importances_
    feature_names = X.columns

    # Sort features by importance
    indices = np.argsort(importance)[::-1]

    for i, idx in enumerate(indices):
        print(f"{i+1}. {feature_names[idx]}: {importance[idx]:.4f}")

    # Create plots directory
    plots_dir = os.path.join(os.path.dirname(__file__), 'results', 'plots')
    os.makedirs(plots_dir, exist_ok=True)

    # Plot feature importance
    plt.figure(figsize=(12, 8))
    plt.bar(range(len(indices)), importance[indices], align='center')
    plt.xticks(range(len(indices)), [feature_names[i] for i in indices], rotation=90)
    plt.title('Feature Importance for Range Prediction')
    plt.tight_layout()
    plt.savefig(os.path.join(plots_dir, 'feature_importance_range.png'))
    plt.close()

    # Plot actual vs predicted
    plt.figure(figsize=(10, 8))
    plt.scatter(y_test, y_pred, alpha=0.3)

    # Add perfect prediction line
    min_val = min(np.min(y_test), np.min(y_pred))
    max_val = max(np.max(y_test), np.max(y_pred))
    plt.plot([min_val, max_val], [min_val, max_val], 'r--')

    plt.title('Actual vs Predicted Range')
    plt.xlabel('Actual Range')
    plt.ylabel('Predicted Range')
    plt.tight_layout()
    plt.savefig(os.path.join(plots_dir, 'actual_vs_predicted_range.png'))
    plt.close()

    # Plot residuals
    residuals = y_test - y_pred
    plt.figure(figsize=(10, 8))

    # Histogram of residuals
    plt.subplot(2, 1, 1)
    plt.hist(residuals, bins=30)
    plt.title('Histogram of Residuals')
    plt.xlabel('Residual Value')
    plt.ylabel('Frequency')

    # Scatter plot of residuals vs predicted
    plt.subplot(2, 1, 2)
    plt.scatter(y_pred, residuals, alpha=0.3)
    plt.axhline(y=0, color='r', linestyle='--')
    plt.title('Residuals vs Predicted Values')
    plt.xlabel('Predicted Range')
    plt.ylabel('Residual')

    plt.tight_layout()
    plt.savefig(os.path.join(plots_dir, 'residuals_range.png'))
    plt.close()

    print(f"Range prediction model plots saved to {plots_dir}")

def main():
    """Main function to run the analysis."""
    print("=" * 80)
    print("UWB LOS/NLOS Classification and Range Prediction - Simplified Analysis")
    print("=" * 80)

    # Load dataset
    data, feature_names = load_dataset()

    if data is not None:
        # Basic analysis
        simple_analysis(data, feature_names)

        # Train classification model
        print("\n" + "=" * 50)
        print("TASK 1: LOS/NLOS CLASSIFICATION")
        print("=" * 50)
        train_simple_model(data)

        # Train regression model
        print("\n" + "=" * 50)
        print("TASK 2: RANGE PREDICTION")
        print("=" * 50)
        train_range_prediction_model(data)

        print("\nAnalysis completed successfully!")
    else:
        print("\nAnalysis failed: Could not load dataset")

if __name__ == "__main__":
    main()