1335 рядки
53 KiB
Python
1335 рядки
53 KiB
Python
import numpy as np
|
|
import pandas as pd
|
|
import matplotlib.pyplot as plt
|
|
import seaborn as sns
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.preprocessing import StandardScaler
|
|
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
|
|
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
|
|
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
|
|
from sklearn.svm import SVR
|
|
from sklearn.tree import DecisionTreeRegressor
|
|
from sklearn.neighbors import KNeighborsRegressor
|
|
from sklearn.base import clone
|
|
import xgboost as xgb
|
|
import lightgbm as lgb
|
|
from sklearn.neural_network import MLPRegressor
|
|
from sklearn.gaussian_process import GaussianProcessRegressor
|
|
from sklearn.gaussian_process.kernels import RBF, Matern, WhiteKernel, ConstantKernel as C
|
|
import warnings
|
|
import time
|
|
|
|
# Import tuning functionality from shared module
|
|
from sintering_tuning import tune_hyperparameters, get_param_grids, ensure_finite
|
|
|
|
# Try to import tqdm for progress bars, but continue if not available
|
|
try:
|
|
from tqdm import tqdm
|
|
TQDM_AVAILABLE = True
|
|
except ImportError:
|
|
TQDM_AVAILABLE = False
|
|
# Create a simple alternative to tqdm
|
|
def tqdm(iterable, desc=None):
|
|
print(f"{desc if desc else 'Progress'}...")
|
|
return iterable
|
|
|
|
|
|
# ensure_finite function is now imported from sintering_tuning.py
|
|
|
|
warnings.filterwarnings('ignore')
|
|
|
|
# Set random seed for reproducibility
|
|
np.random.seed(42)
|
|
|
|
# Define file paths
|
|
file_paths = [
|
|
'160508-1021-1000,0min,56kN.csv',
|
|
'160508-1022-900,0min,56kN.csv',
|
|
'200508-1023-1350,0min,56kN.csv',
|
|
'200508-1024-1200,0min,56kN.csv'
|
|
]
|
|
|
|
# Configuration
|
|
VALIDATION_FILE_INDEX = 3 # Use the 4th file for validation (0-indexed)
|
|
TARGET_COLUMN = 'Rel. Piston Trav'
|
|
EXCLUDED_COLUMNS = ['Abs. Piston Trav', 'Nr.', 'Datum', 'Zeit']
|
|
|
|
# Feature selection (manual control)
|
|
SELECTED_FEATURES = [
|
|
'MTC1', 'MTC2', 'MTC3', 'Pyrometer', 'SV Temperature',
|
|
'SV Power', 'SV Force', 'AV Force', 'AV Rel. Pressure',
|
|
'I RMS', 'U RMS', 'Heating power'
|
|
]
|
|
|
|
# Model selection (we'll focus on a subset for the multi-step training)
|
|
MODELS_TO_EVALUATE = {
|
|
'Linear Regression': True,
|
|
'Ridge': True,
|
|
'Lasso': True,
|
|
'ElasticNet': True,
|
|
'Decision Tree': True, # More basic model, can be skipped for speed
|
|
'Random Forest': True,
|
|
'Gradient Boosting': True,
|
|
'XGBoost': True,
|
|
'SVR': True, # More time-consuming
|
|
'KNN': True, # Simple but not as effective for this problem
|
|
'MLP': True, # Time-consuming to train
|
|
'GPR': True # Very time-consuming for large datasets
|
|
}
|
|
|
|
# Hyperparameter tuning settings
|
|
TUNING_METHOD = 'bayesian' # 'grid', 'random', 'bayesian'
|
|
CV_FOLDS = 3 # Reduced from 5 for faster training
|
|
N_ITER = 10 # Reduced from 20 for faster training
|
|
USE_OPTIMIZED_MODELS = True # Whether to use hyperparameter-optimized models
|
|
|
|
# Multi-step training parameters
|
|
WINDOW_SIZE = 1
|
|
MAX_EPOCHS = 10
|
|
CURRICULUM_STEPS = [1, 2, 5, 10, 20, 50, 100] # Gradually increase prediction length
|
|
TEACHER_FORCING_RATIO_START = 1.0 # Start with 100% ground truth
|
|
TEACHER_FORCING_RATIO_END = 0.0 # End with 0% ground truth (all predictions)
|
|
BATCH_SIZE = 128
|
|
|
|
|
|
def load_data(file_paths, validation_index):
|
|
"""
|
|
Load and preprocess the CSV files.
|
|
|
|
Args:
|
|
file_paths: List of CSV file paths
|
|
validation_index: Index of the file to use for validation
|
|
|
|
Returns:
|
|
train_data: Combined DataFrame of training data
|
|
validation_data: DataFrame for validation
|
|
"""
|
|
all_data = []
|
|
|
|
for i, file_path in enumerate(file_paths):
|
|
print(f"Loading file: {file_path}")
|
|
try:
|
|
df = pd.read_csv(file_path, sep=';', decimal=',', header=0)
|
|
print(f" File shape: {df.shape}")
|
|
|
|
# Add a file identifier column
|
|
df['file_id'] = i
|
|
|
|
all_data.append(df)
|
|
except Exception as e:
|
|
print(f" Error loading {file_path}: {e}")
|
|
|
|
if not all_data:
|
|
raise ValueError("No data files could be loaded!")
|
|
|
|
# Split into training and validation
|
|
validation_data = all_data.pop(validation_index)
|
|
print(f"Validation data shape: {validation_data.shape}")
|
|
|
|
train_data = pd.concat(all_data, ignore_index=True)
|
|
print(f"Training data shape: {train_data.shape}")
|
|
|
|
return train_data, validation_data
|
|
|
|
|
|
def preprocess_data(df, target_col, excluded_cols, selected_features=None):
|
|
"""
|
|
Preprocess the data for regression.
|
|
|
|
Args:
|
|
df: Input DataFrame
|
|
target_col: Name of the target column
|
|
excluded_cols: List of columns to exclude
|
|
selected_features: List of features to include (None = use all)
|
|
|
|
Returns:
|
|
X: Feature matrix
|
|
y: Target vector
|
|
feature_names: List of feature names used
|
|
"""
|
|
# Make a copy to avoid modifying the original
|
|
data = df.copy()
|
|
|
|
# Check if target column exists
|
|
if target_col not in data.columns:
|
|
raise ValueError(f"Target column '{target_col}' not found in data. Available columns: {data.columns.tolist()}")
|
|
|
|
print(f"Preprocessing data with shape: {data.shape}")
|
|
print(f"Target column: {target_col}")
|
|
|
|
# Drop rows with NaN in target column
|
|
original_count = len(data)
|
|
data = data.dropna(subset=[target_col])
|
|
dropped_count = original_count - len(data)
|
|
print(f"Dropped {dropped_count} rows with missing target values")
|
|
|
|
# Extract target
|
|
y = data[target_col].values
|
|
|
|
# Convert -999 values to NaN (likely error codes in the dataset)
|
|
data = data.replace(-999, np.nan)
|
|
|
|
# Drop specified columns and the target
|
|
columns_to_drop = excluded_cols + [target_col, 'file_id']
|
|
X_data = data.drop(columns=columns_to_drop, errors='ignore')
|
|
|
|
# Select only specified features if provided
|
|
if selected_features is not None:
|
|
available_features = [col for col in selected_features if col in X_data.columns]
|
|
missing_features = [col for col in selected_features if col not in X_data.columns]
|
|
if missing_features:
|
|
print(f"Warning: Some selected features are not in the data: {missing_features}")
|
|
X_data = X_data[available_features]
|
|
|
|
print(f"Selected features: {X_data.columns.tolist()}")
|
|
|
|
# Check for non-numeric columns
|
|
non_numeric = X_data.select_dtypes(exclude=[np.number]).columns.tolist()
|
|
if non_numeric:
|
|
print(f"Warning: Non-numeric columns found: {non_numeric}")
|
|
print("Converting to numeric or dropping...")
|
|
|
|
for col in non_numeric:
|
|
try:
|
|
# Try to convert to numeric
|
|
X_data[col] = pd.to_numeric(X_data[col], errors='coerce')
|
|
except:
|
|
# If conversion fails, drop the column
|
|
print(f" Dropping column: {col}")
|
|
X_data = X_data.drop(columns=[col])
|
|
|
|
# Check for NaN values
|
|
nan_count = X_data.isna().sum().sum()
|
|
if nan_count > 0:
|
|
print(f"Found {nan_count} NaN values in features. Filling with column means...")
|
|
|
|
# Fill remaining NaNs with column means
|
|
X_data = X_data.fillna(X_data.mean())
|
|
|
|
# Get feature names for later use
|
|
feature_names = X_data.columns.tolist()
|
|
|
|
# Convert to numpy array for modeling
|
|
X = X_data.values
|
|
|
|
# Improve precision of target variable (if needed)
|
|
y = y.astype(np.float64)
|
|
|
|
print(f"Preprocessed data: X shape: {X.shape}, y shape: {y.shape}")
|
|
|
|
return X, y, feature_names
|
|
|
|
|
|
def prepare_window_data(X, y, window_size=1):
|
|
"""
|
|
Prepare data for window-based approach.
|
|
|
|
Args:
|
|
X: Feature matrix
|
|
y: Target vector
|
|
window_size: Number of previous steps to include
|
|
|
|
Returns:
|
|
X_window: Feature matrix with window features
|
|
y_window: Target vector aligned with the window features
|
|
"""
|
|
n_samples, n_features = X.shape
|
|
|
|
# We need at least window_size+1 samples to create a valid window
|
|
if n_samples <= window_size:
|
|
raise ValueError(f"Not enough samples ({n_samples}) for window size {window_size}")
|
|
|
|
# Initialize arrays for the windowed data
|
|
X_window = np.zeros((n_samples - window_size, n_features * (window_size + 1) + window_size))
|
|
y_window = np.zeros(n_samples - window_size)
|
|
|
|
# Fill in the arrays
|
|
for i in range(window_size, n_samples):
|
|
# Current features
|
|
X_window[i - window_size, :n_features] = X[i]
|
|
|
|
# Add previous features and targets
|
|
for w in range(1, window_size + 1):
|
|
# Previous features
|
|
start_idx = n_features + (w - 1) * n_features
|
|
end_idx = start_idx + n_features
|
|
X_window[i - window_size, start_idx:end_idx] = X[i - w]
|
|
|
|
# Previous target
|
|
X_window[i - window_size, n_features * (window_size + 1) + (w - 1)] = y[i - w]
|
|
|
|
# Current target
|
|
y_window[i - window_size] = y[i]
|
|
|
|
return X_window, y_window
|
|
|
|
|
|
def create_batches(X, y, batch_size):
|
|
"""
|
|
Create batches from data.
|
|
|
|
Args:
|
|
X: Feature matrix
|
|
y: Target vector
|
|
batch_size: Size of each batch
|
|
|
|
Returns:
|
|
batches: List of (X_batch, y_batch) tuples
|
|
"""
|
|
n_samples = X.shape[0]
|
|
indices = np.arange(n_samples)
|
|
np.random.shuffle(indices)
|
|
|
|
batches = []
|
|
for start_idx in range(0, n_samples, batch_size):
|
|
end_idx = min(start_idx + batch_size, n_samples)
|
|
batch_indices = indices[start_idx:end_idx]
|
|
batches.append((X[batch_indices], y[batch_indices]))
|
|
|
|
return batches
|
|
|
|
|
|
def create_sequences(X_window, y_window, seq_len, n_features_per_window, window_size):
|
|
"""
|
|
Create sequences for sequential prediction.
|
|
|
|
Args:
|
|
X_window: Windowed feature matrix
|
|
y_window: Corresponding target values
|
|
seq_len: Length of each sequence
|
|
n_features_per_window: Number of features per window
|
|
window_size: Window size
|
|
|
|
Returns:
|
|
sequences: List of (features, targets) tuples
|
|
"""
|
|
n_samples = X_window.shape[0]
|
|
|
|
sequences = []
|
|
for i in range(n_samples - seq_len + 1):
|
|
# Extract sequence of inputs and outputs
|
|
X_seq = X_window[i:i+seq_len]
|
|
y_seq = y_window[i:i+seq_len]
|
|
|
|
sequences.append((X_seq, y_seq))
|
|
|
|
return sequences
|
|
|
|
|
|
def virtual_experiment_predict(model, X_input, y_prev_actual, n_features_per_window, window_size,
|
|
use_predictions=False, prev_prediction=None):
|
|
"""
|
|
Make predictions using either actual values or previous predictions.
|
|
|
|
Args:
|
|
model: Trained model
|
|
X_input: Current input features
|
|
y_prev_actual: Previous actual target values
|
|
n_features_per_window: Number of features per window
|
|
window_size: Window size
|
|
use_predictions: Whether to use model's predictions instead of actual values
|
|
prev_prediction: Previous prediction (if use_predictions is True)
|
|
|
|
Returns:
|
|
prediction: Model's prediction
|
|
"""
|
|
# Make a copy to avoid modifying the original
|
|
X_modified = X_input.copy()
|
|
|
|
# If using predictions, replace the previous target value in the input
|
|
if use_predictions and prev_prediction is not None:
|
|
# Find the index of the previous target value
|
|
prev_target_idx = n_features_per_window * (window_size + 1) + (window_size - 1)
|
|
X_modified[prev_target_idx] = prev_prediction
|
|
|
|
# Ensure finite values
|
|
X_modified = ensure_finite(X_modified)
|
|
|
|
# Make prediction - ensure 2D array (samples, features)
|
|
try:
|
|
prediction = model.predict(X_modified.reshape(1, -1))[0]
|
|
|
|
# Ensure prediction is finite
|
|
if not np.isfinite(prediction):
|
|
print("Warning: Non-finite prediction detected, using default value.")
|
|
if prev_prediction is not None:
|
|
prediction = prev_prediction # Use previous prediction as fallback
|
|
else:
|
|
prediction = 0.0 # Default fallback
|
|
except Exception as e:
|
|
print(f"Error making prediction: {e}")
|
|
# Fallback to a reasonable value
|
|
if prev_prediction is not None:
|
|
prediction = prev_prediction
|
|
else:
|
|
prediction = 0.0
|
|
|
|
return prediction
|
|
|
|
|
|
def create_base_models(X_train, y_train, use_optimized=True):
|
|
"""
|
|
Create base models for training with optional hyperparameter optimization.
|
|
|
|
Args:
|
|
X_train: Training feature matrix
|
|
y_train: Training target vector
|
|
use_optimized: Whether to optimize hyperparameters
|
|
|
|
Returns:
|
|
models: Dictionary of model instances
|
|
"""
|
|
models = {}
|
|
|
|
# Get parameter grids for tuning
|
|
param_grids, param_ranges = get_param_grids()
|
|
|
|
# Create and potentially tune models
|
|
if MODELS_TO_EVALUATE.get('Linear Regression', False):
|
|
if use_optimized:
|
|
print("Tuning Linear Regression...")
|
|
model_class = LinearRegression
|
|
# Select parameter grid based on tuning method
|
|
param_config = param_ranges['Linear Regression'] if TUNING_METHOD == 'bayesian' else param_grids['Linear Regression']
|
|
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
|
|
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
|
|
model_name='Linear Regression')
|
|
models['Linear Regression'] = model
|
|
else:
|
|
models['Linear Regression'] = LinearRegression()
|
|
|
|
if MODELS_TO_EVALUATE.get('Ridge', False):
|
|
if use_optimized:
|
|
print("Tuning Ridge...")
|
|
model_class = Ridge
|
|
param_config = param_ranges['Ridge'] if TUNING_METHOD == 'bayesian' else param_grids['Ridge']
|
|
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
|
|
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
|
|
model_name='Ridge')
|
|
models['Ridge'] = model
|
|
else:
|
|
models['Ridge'] = Ridge(alpha=1.0)
|
|
|
|
if MODELS_TO_EVALUATE.get('Lasso', False):
|
|
if use_optimized:
|
|
print("Tuning Lasso...")
|
|
model_class = Lasso
|
|
param_config = param_ranges['Lasso'] if TUNING_METHOD == 'bayesian' else param_grids['Lasso']
|
|
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
|
|
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
|
|
model_name='Lasso')
|
|
models['Lasso'] = model
|
|
else:
|
|
models['Lasso'] = Lasso(alpha=0.01)
|
|
|
|
if MODELS_TO_EVALUATE.get('ElasticNet', False):
|
|
if use_optimized:
|
|
print("Tuning ElasticNet...")
|
|
model_class = ElasticNet
|
|
param_config = param_ranges['ElasticNet'] if TUNING_METHOD == 'bayesian' else param_grids['ElasticNet']
|
|
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
|
|
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
|
|
model_name='ElasticNet')
|
|
models['ElasticNet'] = model
|
|
else:
|
|
models['ElasticNet'] = ElasticNet(alpha=0.01, l1_ratio=0.5)
|
|
|
|
if MODELS_TO_EVALUATE.get('Decision Tree', False):
|
|
if use_optimized:
|
|
print("Tuning Decision Tree...")
|
|
model_class = DecisionTreeRegressor
|
|
param_config = param_ranges['Decision Tree'] if TUNING_METHOD == 'bayesian' else param_grids['Decision Tree']
|
|
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
|
|
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
|
|
model_name='Decision Tree')
|
|
models['Decision Tree'] = model
|
|
else:
|
|
models['Decision Tree'] = DecisionTreeRegressor(random_state=42)
|
|
|
|
if MODELS_TO_EVALUATE.get('Random Forest', False):
|
|
if use_optimized:
|
|
print("Tuning Random Forest...")
|
|
model_class = RandomForestRegressor
|
|
param_config = param_ranges['Random Forest'] if TUNING_METHOD == 'bayesian' else param_grids['Random Forest']
|
|
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
|
|
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
|
|
model_name='Random Forest')
|
|
models['Random Forest'] = model
|
|
else:
|
|
models['Random Forest'] = RandomForestRegressor(n_estimators=100, random_state=42)
|
|
|
|
if MODELS_TO_EVALUATE.get('Gradient Boosting', False):
|
|
if use_optimized:
|
|
print("Tuning Gradient Boosting...")
|
|
model_class = GradientBoostingRegressor
|
|
param_config = param_ranges['Gradient Boosting'] if TUNING_METHOD == 'bayesian' else param_grids['Gradient Boosting']
|
|
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
|
|
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
|
|
model_name='Gradient Boosting')
|
|
models['Gradient Boosting'] = model
|
|
else:
|
|
models['Gradient Boosting'] = GradientBoostingRegressor(n_estimators=100, random_state=42)
|
|
|
|
if MODELS_TO_EVALUATE.get('XGBoost', False):
|
|
if use_optimized:
|
|
print("Tuning XGBoost...")
|
|
model_class = xgb.XGBRegressor
|
|
param_config = param_ranges['XGBoost'] if TUNING_METHOD == 'bayesian' else param_grids['XGBoost']
|
|
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
|
|
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
|
|
model_name='XGBoost')
|
|
models['XGBoost'] = model
|
|
else:
|
|
models['XGBoost'] = xgb.XGBRegressor(n_estimators=100, random_state=42)
|
|
|
|
if MODELS_TO_EVALUATE.get('LightGBM', False):
|
|
if use_optimized:
|
|
print("Tuning LightGBM...")
|
|
model_class = lgb.LGBMRegressor
|
|
param_config = param_ranges['LightGBM'] if TUNING_METHOD == 'bayesian' else param_grids['LightGBM']
|
|
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
|
|
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
|
|
model_name='LightGBM')
|
|
models['LightGBM'] = model
|
|
else:
|
|
models['LightGBM'] = lgb.LGBMRegressor(n_estimators=100, random_state=42)
|
|
|
|
if MODELS_TO_EVALUATE.get('SVR', False):
|
|
if use_optimized:
|
|
print("Tuning SVR...")
|
|
model_class = SVR
|
|
param_config = param_ranges['SVR'] if TUNING_METHOD == 'bayesian' else param_grids['SVR']
|
|
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
|
|
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
|
|
model_name='SVR')
|
|
models['SVR'] = model
|
|
else:
|
|
models['SVR'] = SVR(kernel='rbf', C=10)
|
|
|
|
if MODELS_TO_EVALUATE.get('KNN', False):
|
|
if use_optimized:
|
|
print("Tuning KNN...")
|
|
model_class = KNeighborsRegressor
|
|
param_config = param_ranges['KNN'] if TUNING_METHOD == 'bayesian' else param_grids['KNN']
|
|
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
|
|
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
|
|
model_name='KNN')
|
|
models['KNN'] = model
|
|
else:
|
|
models['KNN'] = KNeighborsRegressor(n_neighbors=5)
|
|
|
|
if MODELS_TO_EVALUATE.get('MLP', False):
|
|
if use_optimized:
|
|
print("Tuning MLP...")
|
|
model_class = MLPRegressor
|
|
param_config = param_ranges['MLP'] if TUNING_METHOD == 'bayesian' else param_grids['MLP']
|
|
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
|
|
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
|
|
model_name='MLP')
|
|
models['MLP'] = model
|
|
else:
|
|
models['MLP'] = MLPRegressor(random_state=42, max_iter=1000)
|
|
|
|
if MODELS_TO_EVALUATE.get('GPR', False):
|
|
if use_optimized:
|
|
print("Tuning GPR...")
|
|
model_class = GaussianProcessRegressor
|
|
param_config = param_ranges['GPR'] if TUNING_METHOD == 'bayesian' else param_grids['GPR']
|
|
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
|
|
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
|
|
model_name='GPR')
|
|
models['GPR'] = model
|
|
else:
|
|
kernel = C(1.0) * RBF(1.0)
|
|
models['GPR'] = GaussianProcessRegressor(kernel=kernel, random_state=42)
|
|
|
|
return models
|
|
|
|
|
|
def multi_step_train(model, X_train, y_train, X_val, y_val, n_features_per_window, window_size,
|
|
max_epochs=10, curriculum_steps=None, tf_ratio_start=1.0, tf_ratio_end=0.0,
|
|
batch_size=128, verbose=True):
|
|
"""
|
|
Train a model using multi-step approach with scheduled sampling.
|
|
|
|
Args:
|
|
model: Base model to train
|
|
X_train, y_train: Training data
|
|
X_val, y_val: Validation data
|
|
n_features_per_window: Number of features per window
|
|
window_size: Window size
|
|
max_epochs: Maximum number of epochs to train
|
|
curriculum_steps: List of sequence lengths for curriculum learning
|
|
tf_ratio_start: Initial teacher forcing ratio (1.0 = always use ground truth)
|
|
tf_ratio_end: Final teacher forcing ratio (0.0 = always use predictions)
|
|
batch_size: Batch size for training
|
|
verbose: Whether to print progress
|
|
|
|
Returns:
|
|
trained_model: Trained model
|
|
history: Training history
|
|
"""
|
|
# Clone the model to start fresh
|
|
trained_model = clone(model)
|
|
|
|
# Initialize training history
|
|
history = {
|
|
'train_loss': [],
|
|
'val_loss': [],
|
|
'train_r2': [],
|
|
'val_r2': [],
|
|
'tf_ratio': []
|
|
}
|
|
|
|
# Default curriculum if not provided
|
|
if curriculum_steps is None:
|
|
curriculum_steps = [1, 2, 5, 10, 20]
|
|
|
|
# Train with curriculum learning (increasing sequence length)
|
|
for step_idx, seq_len in enumerate(curriculum_steps):
|
|
print(f"\nTraining with sequence length: {seq_len}")
|
|
|
|
# Calculate effective number of epochs for this step
|
|
step_epochs = max(1, int(max_epochs / len(curriculum_steps)))
|
|
|
|
# Create training sequences
|
|
train_sequences = create_sequences(X_train, y_train, seq_len, n_features_per_window, window_size)
|
|
|
|
# Train for multiple epochs
|
|
for epoch in range(step_epochs):
|
|
# Calculate current teacher forcing ratio
|
|
progress = (step_idx * step_epochs + epoch) / (len(curriculum_steps) * step_epochs)
|
|
tf_ratio = tf_ratio_start - progress * (tf_ratio_start - tf_ratio_end)
|
|
history['tf_ratio'].append(tf_ratio)
|
|
|
|
print(f"Epoch {epoch+1}/{step_epochs}, Teacher Forcing Ratio: {tf_ratio:.3f}")
|
|
|
|
# Shuffle sequences for this epoch
|
|
np.random.shuffle(train_sequences)
|
|
|
|
# Process sequences in batches
|
|
train_losses = []
|
|
train_r2s = []
|
|
|
|
n_batches = (len(train_sequences) + batch_size - 1) // batch_size
|
|
|
|
# Use tqdm if available, otherwise use simple progress updates
|
|
batch_range = tqdm(range(n_batches), desc="Training") if TQDM_AVAILABLE else range(n_batches)
|
|
if not TQDM_AVAILABLE and n_batches > 10:
|
|
print(f"Processing {n_batches} batches...")
|
|
|
|
for batch_idx in batch_range:
|
|
batch_start = batch_idx * batch_size
|
|
batch_end = min(batch_start + batch_size, len(train_sequences))
|
|
batch_sequences = train_sequences[batch_start:batch_end]
|
|
|
|
batch_losses = []
|
|
batch_r2s = []
|
|
|
|
for X_seq, y_seq in batch_sequences:
|
|
seq_predictions = []
|
|
seq_actuals = []
|
|
prev_prediction = None
|
|
|
|
# Process each step in the sequence
|
|
for step in range(len(X_seq)):
|
|
# Decide whether to use ground truth or prediction
|
|
use_prediction = (step > 0) and (np.random.random() > tf_ratio)
|
|
|
|
# Make prediction
|
|
try:
|
|
prediction = virtual_experiment_predict(
|
|
trained_model, X_seq[step], y_seq[:step] if step > 0 else [],
|
|
n_features_per_window, window_size,
|
|
use_prediction, prev_prediction
|
|
)
|
|
except Exception as e:
|
|
print(f"Error making prediction: {e}")
|
|
# Fallback: use simple prediction without modifications
|
|
prediction = trained_model.predict(X_seq[step].reshape(1, -1))[0]
|
|
|
|
# Store prediction and actual values
|
|
seq_predictions.append(prediction)
|
|
seq_actuals.append(y_seq[step])
|
|
|
|
# Update for next step
|
|
prev_prediction = prediction
|
|
|
|
# Calculate loss for this sequence
|
|
mse = mean_squared_error(seq_actuals, seq_predictions)
|
|
batch_losses.append(mse)
|
|
|
|
# Calculate R² for this sequence
|
|
r2 = r2_score(seq_actuals, seq_predictions)
|
|
batch_r2s.append(r2)
|
|
|
|
# Update model based on batch losses
|
|
if hasattr(trained_model, 'partial_fit'):
|
|
# For models that support incremental learning
|
|
for X_seq, y_seq in batch_sequences:
|
|
trained_model.partial_fit(X_seq, y_seq)
|
|
else:
|
|
# For models that require full batch training
|
|
# Gather all training data from this batch with scheduled sampling
|
|
X_batch = []
|
|
y_batch = []
|
|
|
|
for X_seq, y_seq in batch_sequences:
|
|
# Process each sequence and collect inputs/outputs with teacher forcing
|
|
seq_X = []
|
|
seq_y = []
|
|
seq_preds = []
|
|
|
|
for step in range(len(X_seq)):
|
|
X_modified = X_seq[step].copy()
|
|
|
|
# Apply teacher forcing for previous step's target if needed
|
|
if step > 0 and np.random.random() > tf_ratio:
|
|
# Use prediction for previous step
|
|
prev_target_idx = n_features_per_window * (window_size + 1) + (window_size - 1)
|
|
# Handle edge case where seq_preds might be empty
|
|
if seq_preds:
|
|
X_modified[prev_target_idx] = seq_preds[-1]
|
|
|
|
# Clean input to ensure finite values
|
|
X_modified = ensure_finite(X_modified)
|
|
|
|
# Make prediction for this step (for next step's input if needed)
|
|
try:
|
|
pred = trained_model.predict(X_modified.reshape(1, -1))[0]
|
|
|
|
# Ensure prediction is finite
|
|
if not np.isfinite(pred):
|
|
print("Warning: Non-finite batch prediction detected, using actual value.")
|
|
pred = y_seq[step] # Use actual value as fallback
|
|
|
|
seq_preds.append(pred)
|
|
except Exception as e:
|
|
print(f"Error in batch prediction: {e}")
|
|
# If prediction fails, use actual value
|
|
seq_preds.append(y_seq[step])
|
|
|
|
seq_X.append(X_modified)
|
|
seq_y.append(y_seq[step])
|
|
|
|
# Add this sequence's data to the batch
|
|
X_batch.extend(seq_X)
|
|
y_batch.extend(seq_y)
|
|
|
|
# Convert to numpy arrays
|
|
X_batch = np.array(X_batch)
|
|
y_batch = np.array(y_batch)
|
|
|
|
# Update the model (only if we have data)
|
|
if len(X_batch) > 0:
|
|
try:
|
|
trained_model.fit(X_batch, y_batch)
|
|
except Exception as e:
|
|
print(f"Error fitting model: {e}")
|
|
# If batch fitting fails, try individual fitting
|
|
for i in range(len(X_batch)):
|
|
try:
|
|
trained_model.partial_fit(X_batch[i:i+1], y_batch[i:i+1])
|
|
except:
|
|
pass # Skip if partial_fit isn't available
|
|
|
|
# Track batch metrics
|
|
avg_batch_loss = np.mean(batch_losses)
|
|
avg_batch_r2 = np.mean(batch_r2s)
|
|
train_losses.append(avg_batch_loss)
|
|
train_r2s.append(avg_batch_r2)
|
|
|
|
# Calculate overall metrics for this epoch
|
|
epoch_train_loss = np.mean(train_losses)
|
|
epoch_train_r2 = np.mean(train_r2s)
|
|
|
|
# Evaluate on validation data
|
|
val_predictions = []
|
|
val_actuals = []
|
|
|
|
# Create validation sequences
|
|
val_sequences = create_sequences(X_val, y_val, seq_len, n_features_per_window, window_size)
|
|
|
|
# Process validation sequences
|
|
for X_seq, y_seq in val_sequences[:100]: # Limit to 100 sequences for speed
|
|
seq_predictions = []
|
|
prev_prediction = None
|
|
|
|
for step in range(len(X_seq)):
|
|
# Always use previous predictions for validation
|
|
use_prediction = step > 0
|
|
|
|
try:
|
|
prediction = virtual_experiment_predict(
|
|
trained_model, X_seq[step], y_seq[:step] if step > 0 else [],
|
|
n_features_per_window, window_size,
|
|
use_prediction, prev_prediction
|
|
)
|
|
except Exception as e:
|
|
print(f"Error in validation prediction: {e}")
|
|
# Fallback: use simple prediction without modifications
|
|
prediction = trained_model.predict(X_seq[step].reshape(1, -1))[0]
|
|
|
|
seq_predictions.append(prediction)
|
|
prev_prediction = prediction
|
|
|
|
val_predictions.extend(seq_predictions)
|
|
val_actuals.extend(y_seq)
|
|
|
|
# Calculate validation metrics
|
|
epoch_val_loss = mean_squared_error(val_actuals, val_predictions)
|
|
epoch_val_r2 = r2_score(val_actuals, val_predictions)
|
|
|
|
# Record history
|
|
history['train_loss'].append(epoch_train_loss)
|
|
history['train_r2'].append(epoch_train_r2)
|
|
history['val_loss'].append(epoch_val_loss)
|
|
history['val_r2'].append(epoch_val_r2)
|
|
|
|
print(f" Train Loss: {epoch_train_loss:.6f}, R²: {epoch_train_r2:.4f}")
|
|
print(f" Val Loss: {epoch_val_loss:.6f}, R²: {epoch_val_r2:.4f}")
|
|
|
|
return trained_model, history
|
|
|
|
|
|
def virtual_experiment(model, X_val, y_val, n_features_per_window, window_size=1):
|
|
"""
|
|
Run a virtual experiment with the trained model.
|
|
|
|
Args:
|
|
model: Trained regression model
|
|
X_val: Validation feature matrix
|
|
y_val: Validation target vector
|
|
n_features_per_window: Number of features per window
|
|
window_size: Window size used in training
|
|
|
|
Returns:
|
|
y_pred: Predicted target values
|
|
y_true: Actual target values
|
|
"""
|
|
n_samples = X_val.shape[0]
|
|
|
|
# We'll store actual and predicted values
|
|
y_true = []
|
|
y_pred = []
|
|
|
|
# Initialize with the first few actual values
|
|
prev_y_values = y_val[:window_size].tolist()
|
|
|
|
# Process each time step
|
|
for i in range(window_size, n_samples):
|
|
# Extract current input
|
|
X_input = X_val[i].copy() # Make a copy to avoid modifying the original
|
|
|
|
# Use previous predictions instead of actual values
|
|
for w in range(1, window_size + 1):
|
|
prev_y_idx = n_features_per_window * (window_size + 1) + (w - 1)
|
|
X_input[prev_y_idx] = prev_y_values[-w]
|
|
|
|
# Clean input data to ensure finite values
|
|
X_input = ensure_finite(X_input)
|
|
|
|
# Make prediction - reshape to 2D array (samples, features)
|
|
try:
|
|
prediction = model.predict(X_input.reshape(1, -1))[0]
|
|
|
|
# Ensure prediction is finite
|
|
if not np.isfinite(prediction):
|
|
print("Warning: Non-finite prediction detected, using default value.")
|
|
prediction = prev_y_values[-1] # Use previous value as fallback
|
|
|
|
except Exception as e:
|
|
print(f"Error making prediction: {e}")
|
|
# Fallback: use previous value as prediction
|
|
prediction = prev_y_values[-1]
|
|
|
|
# Store actual and predicted values
|
|
y_true.append(y_val[i])
|
|
y_pred.append(prediction)
|
|
|
|
# Update previous y values for next iteration
|
|
prev_y_values.append(prediction)
|
|
|
|
return np.array(y_pred), np.array(y_true)
|
|
|
|
|
|
def evaluate_model(model, X_test, y_test, model_name):
|
|
"""
|
|
Evaluate model performance on test data.
|
|
|
|
Args:
|
|
model: Trained model
|
|
X_test: Test feature matrix
|
|
y_test: Test target vector
|
|
model_name: Name of the model for display
|
|
|
|
Returns:
|
|
metrics: Dictionary with evaluation metrics
|
|
y_pred: Predicted values
|
|
"""
|
|
# Make predictions
|
|
y_pred = model.predict(X_test)
|
|
|
|
# Calculate metrics
|
|
mse = mean_squared_error(y_test, y_pred)
|
|
rmse = np.sqrt(mse)
|
|
mae = mean_absolute_error(y_test, y_pred)
|
|
r2 = r2_score(y_test, y_pred)
|
|
|
|
# Return as dictionary
|
|
metrics = {
|
|
'model': model_name,
|
|
'mse': mse,
|
|
'rmse': rmse,
|
|
'mae': mae,
|
|
'r2': r2
|
|
}
|
|
|
|
return metrics, y_pred
|
|
|
|
|
|
def plot_learning_curves(history, model_name):
|
|
"""
|
|
Plot learning curves from training history.
|
|
|
|
Args:
|
|
history: Training history dictionary
|
|
model_name: Name of the model
|
|
"""
|
|
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
|
|
|
|
# Plot 1: Loss curves
|
|
axes[0].plot(history['train_loss'], label='Training Loss')
|
|
axes[0].plot(history['val_loss'], label='Validation Loss')
|
|
axes[0].set_xlabel('Epoch')
|
|
axes[0].set_ylabel('Loss (MSE)')
|
|
axes[0].set_title('Loss Curves')
|
|
axes[0].legend()
|
|
axes[0].grid(True, alpha=0.3)
|
|
|
|
# Plot 2: R² curves
|
|
axes[1].plot(history['train_r2'], label='Training R²')
|
|
axes[1].plot(history['val_r2'], label='Validation R²')
|
|
axes[1].set_xlabel('Epoch')
|
|
axes[1].set_ylabel('R² Score')
|
|
axes[1].set_title('R² Curves')
|
|
axes[1].legend()
|
|
axes[1].grid(True, alpha=0.3)
|
|
|
|
# Plot 3: Teacher forcing ratio
|
|
axes[2].plot(history['tf_ratio'], label='Teacher Forcing Ratio')
|
|
axes[2].set_xlabel('Epoch')
|
|
axes[2].set_ylabel('Ratio')
|
|
axes[2].set_title('Teacher Forcing Ratio')
|
|
axes[2].set_ylim(0, 1)
|
|
axes[2].grid(True, alpha=0.3)
|
|
|
|
plt.suptitle(f'Learning Curves - {model_name}', fontsize=16)
|
|
plt.tight_layout()
|
|
plt.subplots_adjust(top=0.9)
|
|
plt.show()
|
|
plt.close()
|
|
|
|
|
|
def plot_time_series_prediction(y_true, y_pred, model_name, title=None):
|
|
"""
|
|
Plot time series of actual and predicted values.
|
|
|
|
Args:
|
|
y_true: Actual target values
|
|
y_pred: Predicted target values
|
|
model_name: Name of the model
|
|
title: Custom title (optional)
|
|
"""
|
|
plt.figure(figsize=(15, 8))
|
|
|
|
# Create time series plot
|
|
plt.plot(range(len(y_true)), y_true, label='Actual', color='blue', alpha=0.7)
|
|
plt.plot(range(len(y_pred)), y_pred, label='Predicted', color='red', alpha=0.7)
|
|
|
|
# Add labels and title
|
|
plt.xlabel('Time Step')
|
|
plt.ylabel('Rel. Piston Trav')
|
|
if title:
|
|
plt.title(title)
|
|
else:
|
|
plt.title(f'Time Series Prediction - {model_name}')
|
|
plt.legend()
|
|
|
|
# Calculate metrics for the plot
|
|
mse = mean_squared_error(y_true, y_pred)
|
|
rmse = np.sqrt(mse)
|
|
r2 = r2_score(y_true, y_pred)
|
|
|
|
# Add text with metrics
|
|
plt.annotate(f'RMSE: {rmse:.4f}\nR²: {r2:.4f}',
|
|
xy=(0.05, 0.9), xycoords='axes fraction',
|
|
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.8))
|
|
|
|
plt.grid(True, alpha=0.3)
|
|
plt.tight_layout()
|
|
plt.show()
|
|
plt.close()
|
|
|
|
|
|
def plot_actual_vs_predicted(y_true, y_pred, model_name, title=None):
|
|
"""
|
|
Plot actual vs predicted values.
|
|
|
|
Args:
|
|
y_true: Actual target values
|
|
y_pred: Predicted target values
|
|
model_name: Name of the model
|
|
title: Custom title (optional)
|
|
"""
|
|
plt.figure(figsize=(12, 8))
|
|
|
|
# Create scatter plot
|
|
plt.scatter(y_true, y_pred, alpha=0.5)
|
|
|
|
# Add perfect prediction line
|
|
max_val = max(np.max(y_true), np.max(y_pred))
|
|
min_val = min(np.min(y_true), np.min(y_pred))
|
|
plt.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2)
|
|
|
|
# Add labels and title
|
|
plt.xlabel('Actual Values')
|
|
plt.ylabel('Predicted Values')
|
|
if title:
|
|
plt.title(title)
|
|
else:
|
|
plt.title(f'Actual vs Predicted - {model_name}')
|
|
|
|
# Calculate metrics for the plot
|
|
mse = mean_squared_error(y_true, y_pred)
|
|
rmse = np.sqrt(mse)
|
|
r2 = r2_score(y_true, y_pred)
|
|
|
|
# Add text with metrics
|
|
plt.annotate(f'RMSE: {rmse:.4f}\nR²: {r2:.4f}',
|
|
xy=(0.05, 0.9), xycoords='axes fraction',
|
|
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.8))
|
|
|
|
plt.grid(True, alpha=0.3)
|
|
plt.tight_layout()
|
|
plt.show()
|
|
plt.close()
|
|
|
|
|
|
def plot_model_comparison(standard_metrics, multistep_metrics, approach="Virtual Experiment"):
|
|
"""
|
|
Plot comparison between standard and multi-step trained models.
|
|
|
|
Args:
|
|
standard_metrics: Dictionary of metrics for standard models
|
|
multistep_metrics: Dictionary of metrics for multi-step models
|
|
approach: The approach name for the title
|
|
"""
|
|
# Get all model names
|
|
model_names = sorted(set(list(standard_metrics.keys()) + list(multistep_metrics.keys())))
|
|
|
|
# Prepare data for plotting
|
|
rmse_standard = [standard_metrics.get(name, {}).get('rmse', 0) for name in model_names]
|
|
rmse_multistep = [multistep_metrics.get(name, {}).get('rmse', 0) for name in model_names]
|
|
|
|
r2_standard = [standard_metrics.get(name, {}).get('r2', 0) for name in model_names]
|
|
r2_multistep = [multistep_metrics.get(name, {}).get('r2', 0) for name in model_names]
|
|
|
|
# Compute improvement percentages
|
|
rmse_improvement = []
|
|
r2_improvement = []
|
|
|
|
for i, name in enumerate(model_names):
|
|
if name in standard_metrics and name in multistep_metrics:
|
|
std_rmse = standard_metrics[name]['rmse']
|
|
ms_rmse = multistep_metrics[name]['rmse']
|
|
rmse_imp = ((std_rmse - ms_rmse) / std_rmse) * 100
|
|
rmse_improvement.append(rmse_imp)
|
|
|
|
std_r2 = standard_metrics[name]['r2']
|
|
ms_r2 = multistep_metrics[name]['r2']
|
|
r2_imp = ((ms_r2 - std_r2) / abs(std_r2)) * 100 if std_r2 != 0 else 0
|
|
r2_improvement.append(r2_imp)
|
|
else:
|
|
rmse_improvement.append(0)
|
|
r2_improvement.append(0)
|
|
|
|
# Create figure with 2 subplots
|
|
fig, axes = plt.subplots(2, 1, figsize=(15, 14))
|
|
|
|
# Plot 1: RMSE comparison
|
|
x = np.arange(len(model_names))
|
|
width = 0.35
|
|
|
|
axes[0].bar(x - width/2, rmse_standard, width, label='Standard Training', color='red', alpha=0.7)
|
|
axes[0].bar(x + width/2, rmse_multistep, width, label='Multi-step Training', color='green', alpha=0.7)
|
|
|
|
# Add labels and annotations
|
|
axes[0].set_xlabel('Model')
|
|
axes[0].set_ylabel('RMSE (lower is better)')
|
|
axes[0].set_title('RMSE Comparison')
|
|
axes[0].set_xticks(x)
|
|
axes[0].set_xticklabels(model_names, rotation=45, ha='right')
|
|
axes[0].legend()
|
|
|
|
# Add value labels on the bars
|
|
for i, (v1, v2, imp) in enumerate(zip(rmse_standard, rmse_multistep, rmse_improvement)):
|
|
if v1 > 0 and v2 > 0: # Only label bars with valid data
|
|
axes[0].text(i - width/2, v1 + 0.02, f'{v1:.3f}', ha='center', va='bottom', fontsize=8)
|
|
axes[0].text(i + width/2, v2 + 0.02, f'{v2:.3f}', ha='center', va='bottom', fontsize=8)
|
|
|
|
# Add improvement percentage
|
|
if imp != 0:
|
|
color = 'green' if imp > 0 else 'red'
|
|
axes[0].text(i, min(v1, v2) / 2, f'{imp:.1f}%', ha='center', va='center',
|
|
fontsize=9, fontweight='bold', color=color,
|
|
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.8))
|
|
|
|
# Plot 2: R² comparison
|
|
axes[1].bar(x - width/2, r2_standard, width, label='Standard Training', color='blue', alpha=0.7)
|
|
axes[1].bar(x + width/2, r2_multistep, width, label='Multi-step Training', color='purple', alpha=0.7)
|
|
|
|
# Add labels and annotations
|
|
axes[1].set_xlabel('Model')
|
|
axes[1].set_ylabel('R² (higher is better)')
|
|
axes[1].set_title('R² Comparison')
|
|
axes[1].set_xticks(x)
|
|
axes[1].set_xticklabels(model_names, rotation=45, ha='right')
|
|
axes[1].legend()
|
|
|
|
# Add value labels on the bars
|
|
for i, (v1, v2, imp) in enumerate(zip(r2_standard, r2_multistep, r2_improvement)):
|
|
if v1 != 0 or v2 != 0: # Only label bars with valid data
|
|
axes[1].text(i - width/2, v1 + 0.02, f'{v1:.3f}', ha='center', va='bottom', fontsize=8)
|
|
axes[1].text(i + width/2, v2 + 0.02, f'{v2:.3f}', ha='center', va='bottom', fontsize=8)
|
|
|
|
# Add improvement percentage
|
|
if imp != 0:
|
|
color = 'green' if imp > 0 else 'red'
|
|
y_pos = max(0.1, (v1 + v2) / 2)
|
|
axes[1].text(i, y_pos, f'{imp:.1f}%', ha='center', va='center',
|
|
fontsize=9, fontweight='bold', color=color,
|
|
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.8))
|
|
|
|
# Add horizontal line at 0 for R² reference
|
|
axes[1].axhline(y=0, color='gray', linestyle='--', alpha=0.7)
|
|
|
|
plt.suptitle(f'Standard vs Multi-step Training Comparison ({approach})', fontsize=16)
|
|
plt.tight_layout()
|
|
plt.subplots_adjust(top=0.9)
|
|
plt.show()
|
|
plt.close()
|
|
|
|
|
|
def main():
|
|
"""Main execution function"""
|
|
start_time = time.time()
|
|
print("SPS Sintering Multi-step Regression Analysis")
|
|
|
|
# Create output directory for results
|
|
import os
|
|
results_dir = "multistep_results"
|
|
os.makedirs(results_dir, exist_ok=True)
|
|
|
|
# Load data
|
|
print("\nLoading data...")
|
|
train_data, validation_data = load_data(file_paths, VALIDATION_FILE_INDEX)
|
|
|
|
# Preprocess data
|
|
print("\nPreprocessing data...")
|
|
X_train, y_train, feature_names = preprocess_data(
|
|
train_data, TARGET_COLUMN, EXCLUDED_COLUMNS, SELECTED_FEATURES)
|
|
X_val, y_val, _ = preprocess_data(
|
|
validation_data, TARGET_COLUMN, EXCLUDED_COLUMNS, SELECTED_FEATURES)
|
|
|
|
# Create windowed data
|
|
print("\nCreating windowed data...")
|
|
X_train_window, y_train_window = prepare_window_data(X_train, y_train, WINDOW_SIZE)
|
|
X_val_window, y_val_window = prepare_window_data(X_val, y_val, WINDOW_SIZE)
|
|
|
|
# Split data for initial training
|
|
print("\nSplitting data...")
|
|
X_train_split, X_test, y_train_split, y_test = train_test_split(
|
|
X_train_window, y_train_window, test_size=0.2, random_state=42)
|
|
|
|
# Scale the data
|
|
print("\nScaling data...")
|
|
scaler = StandardScaler()
|
|
X_train_scaled = scaler.fit_transform(X_train_split)
|
|
X_test_scaled = scaler.transform(X_test)
|
|
X_val_scaled = scaler.transform(X_val_window)
|
|
|
|
# Train models with standard approach first
|
|
print("\nTraining models with standard approach...")
|
|
|
|
model_status = "optimized" if USE_OPTIMIZED_MODELS else "default"
|
|
print(f"Using {model_status} hyperparameters")
|
|
|
|
base_models = create_base_models(X_train_scaled, y_train_split, USE_OPTIMIZED_MODELS)
|
|
standard_models = {}
|
|
standard_metrics = {}
|
|
|
|
for name, model in base_models.items():
|
|
print(f"\nTraining {name}...")
|
|
if not USE_OPTIMIZED_MODELS: # If we're using default models, we need to fit them here
|
|
model.fit(X_train_scaled, y_train_split)
|
|
|
|
# Evaluate on test set
|
|
test_metrics, _ = evaluate_model(model, X_test_scaled, y_test, name)
|
|
print(f" Test - RMSE: {test_metrics['rmse']:.4f}, R²: {test_metrics['r2']:.4f}")
|
|
|
|
# Run virtual experiment
|
|
print(f"Running virtual experiment for {name}...")
|
|
y_virtual_pred, y_virtual_true = virtual_experiment(
|
|
model, X_val_scaled, y_val_window, X_train.shape[1], WINDOW_SIZE)
|
|
|
|
# Calculate metrics
|
|
virtual_mse = mean_squared_error(y_virtual_true, y_virtual_pred)
|
|
virtual_rmse = np.sqrt(virtual_mse)
|
|
virtual_r2 = r2_score(y_virtual_true, y_virtual_pred)
|
|
|
|
print(f" Virtual - RMSE: {virtual_rmse:.4f}, R²: {virtual_r2:.4f}")
|
|
|
|
# Store model and metrics
|
|
standard_models[name] = model
|
|
standard_metrics[name] = {
|
|
'rmse': virtual_rmse,
|
|
'r2': virtual_r2
|
|
}
|
|
|
|
# Plot results
|
|
plot_time_series_prediction(
|
|
y_virtual_true, y_virtual_pred, name,
|
|
title=f"Virtual Experiment - {name} (Standard {model_status} Training)"
|
|
)
|
|
|
|
# Train models with multi-step approach
|
|
print("\nTraining models with multi-step approach...")
|
|
n_features_per_window = X_train.shape[1]
|
|
multistep_models = {}
|
|
multistep_metrics = {}
|
|
training_histories = {}
|
|
|
|
for name, base_model in base_models.items():
|
|
print(f"\nMulti-step training for {name}...")
|
|
|
|
# Use a fresh model instance
|
|
model_to_train = clone(base_model)
|
|
|
|
# First fit with standard approach to have a starting point
|
|
if not USE_OPTIMIZED_MODELS: # Only need to refit if we're using default models
|
|
model_to_train.fit(X_train_scaled, y_train_split)
|
|
|
|
# Then apply multi-step training
|
|
trained_model, history = multi_step_train(
|
|
model_to_train, X_train_scaled, y_train_split, X_val_scaled, y_val_window,
|
|
n_features_per_window, WINDOW_SIZE,
|
|
max_epochs=MAX_EPOCHS,
|
|
curriculum_steps=CURRICULUM_STEPS,
|
|
tf_ratio_start=TEACHER_FORCING_RATIO_START,
|
|
tf_ratio_end=TEACHER_FORCING_RATIO_END,
|
|
batch_size=BATCH_SIZE,
|
|
verbose=True
|
|
)
|
|
|
|
# Run virtual experiment with multi-step trained model
|
|
print(f"Running virtual experiment for multi-step trained {name}...")
|
|
y_ms_virtual_pred, y_ms_virtual_true = virtual_experiment(
|
|
trained_model, X_val_scaled, y_val_window, n_features_per_window, WINDOW_SIZE)
|
|
|
|
# Calculate metrics
|
|
ms_virtual_mse = mean_squared_error(y_ms_virtual_true, y_ms_virtual_pred)
|
|
ms_virtual_rmse = np.sqrt(ms_virtual_mse)
|
|
ms_virtual_r2 = r2_score(y_ms_virtual_true, y_ms_virtual_pred)
|
|
|
|
print(f" Multi-step Virtual - RMSE: {ms_virtual_rmse:.4f}, R²: {ms_virtual_r2:.4f}")
|
|
|
|
# Store model, metrics, and history
|
|
multistep_models[name] = trained_model
|
|
multistep_metrics[name] = {
|
|
'rmse': ms_virtual_rmse,
|
|
'r2': ms_virtual_r2
|
|
}
|
|
training_histories[name] = history
|
|
|
|
# Plot results
|
|
plot_learning_curves(history, name)
|
|
|
|
plot_time_series_prediction(
|
|
y_ms_virtual_true, y_ms_virtual_pred, name,
|
|
title=f"Virtual Experiment - {name} (Multi-step {model_status} Training)"
|
|
)
|
|
|
|
# Compare standard vs multi-step for this model
|
|
if name in standard_metrics:
|
|
# Get predictions from both models
|
|
y_std_pred, _ = virtual_experiment(
|
|
standard_models[name], X_val_scaled, y_val_window,
|
|
n_features_per_window, WINDOW_SIZE)
|
|
|
|
# Plot comparison
|
|
plt.figure(figsize=(15, 8))
|
|
plt.plot(y_virtual_true, label='Actual', color='blue', alpha=0.7)
|
|
plt.plot(y_std_pred, label=f'Standard (RMSE={standard_metrics[name]["rmse"]:.4f}, R²={standard_metrics[name]["r2"]:.4f})',
|
|
color='red', alpha=0.7)
|
|
plt.plot(y_ms_virtual_pred, label=f'Multi-step (RMSE={ms_virtual_rmse:.4f}, R²={ms_virtual_r2:.4f})',
|
|
color='green', alpha=0.7)
|
|
|
|
plt.xlabel('Time Step')
|
|
plt.ylabel('Rel. Piston Trav')
|
|
plt.title(f'Standard vs Multi-step Training Comparison - {name} ({model_status})')
|
|
plt.legend()
|
|
plt.grid(True, alpha=0.3)
|
|
plt.tight_layout()
|
|
plt.show()
|
|
plt.close()
|
|
|
|
# Overall comparison between standard and multi-step approaches
|
|
plot_model_comparison(standard_metrics, multistep_metrics)
|
|
|
|
# Calculate overall improvement if we have data
|
|
if standard_metrics and multistep_metrics:
|
|
std_rmse = np.mean([m['rmse'] for m in standard_metrics.values()])
|
|
ms_rmse = np.mean([m['rmse'] for m in multistep_metrics.values()])
|
|
rmse_improvement = ((std_rmse - ms_rmse) / std_rmse) * 100 if std_rmse != 0 else 0
|
|
|
|
std_r2 = np.mean([m['r2'] for m in standard_metrics.values()])
|
|
ms_r2 = np.mean([m['r2'] for m in multistep_metrics.values()])
|
|
r2_improvement = ((ms_r2 - std_r2) / abs(std_r2)) * 100 if std_r2 != 0 else 0
|
|
|
|
print("\nOverall Improvement:")
|
|
print(f" Standard Training ({model_status}) - Average RMSE: {std_rmse:.4f}, Average R²: {std_r2:.4f}")
|
|
print(f" Multi-step Training ({model_status}) - Average RMSE: {ms_rmse:.4f}, Average R²: {ms_r2:.4f}")
|
|
print(f" RMSE Improvement: {rmse_improvement:.2f}%")
|
|
print(f" R² Improvement: {r2_improvement:.2f}%")
|
|
else:
|
|
print("\nNot enough data to calculate improvement metrics.")
|
|
|
|
# Save summary of results to file
|
|
summary_file = os.path.join(results_dir, f"summary_{model_status}.txt")
|
|
with open(summary_file, 'w') as f:
|
|
f.write(f"SPS Sintering Multi-step Regression Analysis Summary ({model_status} models)\n\n")
|
|
f.write(f"Standard Training Results:\n")
|
|
for name, metrics in standard_metrics.items():
|
|
f.write(f" {name}: RMSE={metrics['rmse']:.4f}, R²={metrics['r2']:.4f}\n")
|
|
|
|
f.write(f"\nMulti-step Training Results:\n")
|
|
for name, metrics in multistep_metrics.items():
|
|
f.write(f" {name}: RMSE={metrics['rmse']:.4f}, R²={metrics['r2']:.4f}\n")
|
|
|
|
if standard_metrics and multistep_metrics:
|
|
f.write(f"\nOverall Improvement:\n")
|
|
f.write(f" Standard Training - Average RMSE: {std_rmse:.4f}, Average R²: {std_r2:.4f}\n")
|
|
f.write(f" Multi-step Training - Average RMSE: {ms_rmse:.4f}, Average R²: {ms_r2:.4f}\n")
|
|
f.write(f" RMSE Improvement: {rmse_improvement:.2f}%\n")
|
|
f.write(f" R² Improvement: {r2_improvement:.2f}%\n")
|
|
|
|
elapsed_time = time.time() - start_time
|
|
print(f"\nTotal execution time: {elapsed_time/60:.2f} minutes")
|
|
print(f"\nResults saved to {summary_file}")
|
|
print("\nAnalysis complete!")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|