SPS/sintering-regression-multis...

1335 рядки
53 KiB
Python

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from sklearn.base import clone
import xgboost as xgb
import lightgbm as lgb
from sklearn.neural_network import MLPRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import RBF, Matern, WhiteKernel, ConstantKernel as C
import warnings
import time
# Import tuning functionality from shared module
from sintering_tuning import tune_hyperparameters, get_param_grids, ensure_finite
# Try to import tqdm for progress bars, but continue if not available
try:
from tqdm import tqdm
TQDM_AVAILABLE = True
except ImportError:
TQDM_AVAILABLE = False
# Create a simple alternative to tqdm
def tqdm(iterable, desc=None):
print(f"{desc if desc else 'Progress'}...")
return iterable
# ensure_finite function is now imported from sintering_tuning.py
warnings.filterwarnings('ignore')
# Set random seed for reproducibility
np.random.seed(42)
# Define file paths
file_paths = [
'160508-1021-1000,0min,56kN.csv',
'160508-1022-900,0min,56kN.csv',
'200508-1023-1350,0min,56kN.csv',
'200508-1024-1200,0min,56kN.csv'
]
# Configuration
VALIDATION_FILE_INDEX = 3 # Use the 4th file for validation (0-indexed)
TARGET_COLUMN = 'Rel. Piston Trav'
EXCLUDED_COLUMNS = ['Abs. Piston Trav', 'Nr.', 'Datum', 'Zeit']
# Feature selection (manual control)
SELECTED_FEATURES = [
'MTC1', 'MTC2', 'MTC3', 'Pyrometer', 'SV Temperature',
'SV Power', 'SV Force', 'AV Force', 'AV Rel. Pressure',
'I RMS', 'U RMS', 'Heating power'
]
# Model selection (we'll focus on a subset for the multi-step training)
MODELS_TO_EVALUATE = {
'Linear Regression': True,
'Ridge': True,
'Lasso': True,
'ElasticNet': True,
'Decision Tree': True, # More basic model, can be skipped for speed
'Random Forest': True,
'Gradient Boosting': True,
'XGBoost': True,
'SVR': True, # More time-consuming
'KNN': True, # Simple but not as effective for this problem
'MLP': True, # Time-consuming to train
'GPR': True # Very time-consuming for large datasets
}
# Hyperparameter tuning settings
TUNING_METHOD = 'bayesian' # 'grid', 'random', 'bayesian'
CV_FOLDS = 3 # Reduced from 5 for faster training
N_ITER = 10 # Reduced from 20 for faster training
USE_OPTIMIZED_MODELS = True # Whether to use hyperparameter-optimized models
# Multi-step training parameters
WINDOW_SIZE = 1
MAX_EPOCHS = 10
CURRICULUM_STEPS = [1, 2, 5, 10, 20, 50, 100] # Gradually increase prediction length
TEACHER_FORCING_RATIO_START = 1.0 # Start with 100% ground truth
TEACHER_FORCING_RATIO_END = 0.0 # End with 0% ground truth (all predictions)
BATCH_SIZE = 128
def load_data(file_paths, validation_index):
"""
Load and preprocess the CSV files.
Args:
file_paths: List of CSV file paths
validation_index: Index of the file to use for validation
Returns:
train_data: Combined DataFrame of training data
validation_data: DataFrame for validation
"""
all_data = []
for i, file_path in enumerate(file_paths):
print(f"Loading file: {file_path}")
try:
df = pd.read_csv(file_path, sep=';', decimal=',', header=0)
print(f" File shape: {df.shape}")
# Add a file identifier column
df['file_id'] = i
all_data.append(df)
except Exception as e:
print(f" Error loading {file_path}: {e}")
if not all_data:
raise ValueError("No data files could be loaded!")
# Split into training and validation
validation_data = all_data.pop(validation_index)
print(f"Validation data shape: {validation_data.shape}")
train_data = pd.concat(all_data, ignore_index=True)
print(f"Training data shape: {train_data.shape}")
return train_data, validation_data
def preprocess_data(df, target_col, excluded_cols, selected_features=None):
"""
Preprocess the data for regression.
Args:
df: Input DataFrame
target_col: Name of the target column
excluded_cols: List of columns to exclude
selected_features: List of features to include (None = use all)
Returns:
X: Feature matrix
y: Target vector
feature_names: List of feature names used
"""
# Make a copy to avoid modifying the original
data = df.copy()
# Check if target column exists
if target_col not in data.columns:
raise ValueError(f"Target column '{target_col}' not found in data. Available columns: {data.columns.tolist()}")
print(f"Preprocessing data with shape: {data.shape}")
print(f"Target column: {target_col}")
# Drop rows with NaN in target column
original_count = len(data)
data = data.dropna(subset=[target_col])
dropped_count = original_count - len(data)
print(f"Dropped {dropped_count} rows with missing target values")
# Extract target
y = data[target_col].values
# Convert -999 values to NaN (likely error codes in the dataset)
data = data.replace(-999, np.nan)
# Drop specified columns and the target
columns_to_drop = excluded_cols + [target_col, 'file_id']
X_data = data.drop(columns=columns_to_drop, errors='ignore')
# Select only specified features if provided
if selected_features is not None:
available_features = [col for col in selected_features if col in X_data.columns]
missing_features = [col for col in selected_features if col not in X_data.columns]
if missing_features:
print(f"Warning: Some selected features are not in the data: {missing_features}")
X_data = X_data[available_features]
print(f"Selected features: {X_data.columns.tolist()}")
# Check for non-numeric columns
non_numeric = X_data.select_dtypes(exclude=[np.number]).columns.tolist()
if non_numeric:
print(f"Warning: Non-numeric columns found: {non_numeric}")
print("Converting to numeric or dropping...")
for col in non_numeric:
try:
# Try to convert to numeric
X_data[col] = pd.to_numeric(X_data[col], errors='coerce')
except:
# If conversion fails, drop the column
print(f" Dropping column: {col}")
X_data = X_data.drop(columns=[col])
# Check for NaN values
nan_count = X_data.isna().sum().sum()
if nan_count > 0:
print(f"Found {nan_count} NaN values in features. Filling with column means...")
# Fill remaining NaNs with column means
X_data = X_data.fillna(X_data.mean())
# Get feature names for later use
feature_names = X_data.columns.tolist()
# Convert to numpy array for modeling
X = X_data.values
# Improve precision of target variable (if needed)
y = y.astype(np.float64)
print(f"Preprocessed data: X shape: {X.shape}, y shape: {y.shape}")
return X, y, feature_names
def prepare_window_data(X, y, window_size=1):
"""
Prepare data for window-based approach.
Args:
X: Feature matrix
y: Target vector
window_size: Number of previous steps to include
Returns:
X_window: Feature matrix with window features
y_window: Target vector aligned with the window features
"""
n_samples, n_features = X.shape
# We need at least window_size+1 samples to create a valid window
if n_samples <= window_size:
raise ValueError(f"Not enough samples ({n_samples}) for window size {window_size}")
# Initialize arrays for the windowed data
X_window = np.zeros((n_samples - window_size, n_features * (window_size + 1) + window_size))
y_window = np.zeros(n_samples - window_size)
# Fill in the arrays
for i in range(window_size, n_samples):
# Current features
X_window[i - window_size, :n_features] = X[i]
# Add previous features and targets
for w in range(1, window_size + 1):
# Previous features
start_idx = n_features + (w - 1) * n_features
end_idx = start_idx + n_features
X_window[i - window_size, start_idx:end_idx] = X[i - w]
# Previous target
X_window[i - window_size, n_features * (window_size + 1) + (w - 1)] = y[i - w]
# Current target
y_window[i - window_size] = y[i]
return X_window, y_window
def create_batches(X, y, batch_size):
"""
Create batches from data.
Args:
X: Feature matrix
y: Target vector
batch_size: Size of each batch
Returns:
batches: List of (X_batch, y_batch) tuples
"""
n_samples = X.shape[0]
indices = np.arange(n_samples)
np.random.shuffle(indices)
batches = []
for start_idx in range(0, n_samples, batch_size):
end_idx = min(start_idx + batch_size, n_samples)
batch_indices = indices[start_idx:end_idx]
batches.append((X[batch_indices], y[batch_indices]))
return batches
def create_sequences(X_window, y_window, seq_len, n_features_per_window, window_size):
"""
Create sequences for sequential prediction.
Args:
X_window: Windowed feature matrix
y_window: Corresponding target values
seq_len: Length of each sequence
n_features_per_window: Number of features per window
window_size: Window size
Returns:
sequences: List of (features, targets) tuples
"""
n_samples = X_window.shape[0]
sequences = []
for i in range(n_samples - seq_len + 1):
# Extract sequence of inputs and outputs
X_seq = X_window[i:i+seq_len]
y_seq = y_window[i:i+seq_len]
sequences.append((X_seq, y_seq))
return sequences
def virtual_experiment_predict(model, X_input, y_prev_actual, n_features_per_window, window_size,
use_predictions=False, prev_prediction=None):
"""
Make predictions using either actual values or previous predictions.
Args:
model: Trained model
X_input: Current input features
y_prev_actual: Previous actual target values
n_features_per_window: Number of features per window
window_size: Window size
use_predictions: Whether to use model's predictions instead of actual values
prev_prediction: Previous prediction (if use_predictions is True)
Returns:
prediction: Model's prediction
"""
# Make a copy to avoid modifying the original
X_modified = X_input.copy()
# If using predictions, replace the previous target value in the input
if use_predictions and prev_prediction is not None:
# Find the index of the previous target value
prev_target_idx = n_features_per_window * (window_size + 1) + (window_size - 1)
X_modified[prev_target_idx] = prev_prediction
# Ensure finite values
X_modified = ensure_finite(X_modified)
# Make prediction - ensure 2D array (samples, features)
try:
prediction = model.predict(X_modified.reshape(1, -1))[0]
# Ensure prediction is finite
if not np.isfinite(prediction):
print("Warning: Non-finite prediction detected, using default value.")
if prev_prediction is not None:
prediction = prev_prediction # Use previous prediction as fallback
else:
prediction = 0.0 # Default fallback
except Exception as e:
print(f"Error making prediction: {e}")
# Fallback to a reasonable value
if prev_prediction is not None:
prediction = prev_prediction
else:
prediction = 0.0
return prediction
def create_base_models(X_train, y_train, use_optimized=True):
"""
Create base models for training with optional hyperparameter optimization.
Args:
X_train: Training feature matrix
y_train: Training target vector
use_optimized: Whether to optimize hyperparameters
Returns:
models: Dictionary of model instances
"""
models = {}
# Get parameter grids for tuning
param_grids, param_ranges = get_param_grids()
# Create and potentially tune models
if MODELS_TO_EVALUATE.get('Linear Regression', False):
if use_optimized:
print("Tuning Linear Regression...")
model_class = LinearRegression
# Select parameter grid based on tuning method
param_config = param_ranges['Linear Regression'] if TUNING_METHOD == 'bayesian' else param_grids['Linear Regression']
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
model_name='Linear Regression')
models['Linear Regression'] = model
else:
models['Linear Regression'] = LinearRegression()
if MODELS_TO_EVALUATE.get('Ridge', False):
if use_optimized:
print("Tuning Ridge...")
model_class = Ridge
param_config = param_ranges['Ridge'] if TUNING_METHOD == 'bayesian' else param_grids['Ridge']
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
model_name='Ridge')
models['Ridge'] = model
else:
models['Ridge'] = Ridge(alpha=1.0)
if MODELS_TO_EVALUATE.get('Lasso', False):
if use_optimized:
print("Tuning Lasso...")
model_class = Lasso
param_config = param_ranges['Lasso'] if TUNING_METHOD == 'bayesian' else param_grids['Lasso']
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
model_name='Lasso')
models['Lasso'] = model
else:
models['Lasso'] = Lasso(alpha=0.01)
if MODELS_TO_EVALUATE.get('ElasticNet', False):
if use_optimized:
print("Tuning ElasticNet...")
model_class = ElasticNet
param_config = param_ranges['ElasticNet'] if TUNING_METHOD == 'bayesian' else param_grids['ElasticNet']
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
model_name='ElasticNet')
models['ElasticNet'] = model
else:
models['ElasticNet'] = ElasticNet(alpha=0.01, l1_ratio=0.5)
if MODELS_TO_EVALUATE.get('Decision Tree', False):
if use_optimized:
print("Tuning Decision Tree...")
model_class = DecisionTreeRegressor
param_config = param_ranges['Decision Tree'] if TUNING_METHOD == 'bayesian' else param_grids['Decision Tree']
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
model_name='Decision Tree')
models['Decision Tree'] = model
else:
models['Decision Tree'] = DecisionTreeRegressor(random_state=42)
if MODELS_TO_EVALUATE.get('Random Forest', False):
if use_optimized:
print("Tuning Random Forest...")
model_class = RandomForestRegressor
param_config = param_ranges['Random Forest'] if TUNING_METHOD == 'bayesian' else param_grids['Random Forest']
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
model_name='Random Forest')
models['Random Forest'] = model
else:
models['Random Forest'] = RandomForestRegressor(n_estimators=100, random_state=42)
if MODELS_TO_EVALUATE.get('Gradient Boosting', False):
if use_optimized:
print("Tuning Gradient Boosting...")
model_class = GradientBoostingRegressor
param_config = param_ranges['Gradient Boosting'] if TUNING_METHOD == 'bayesian' else param_grids['Gradient Boosting']
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
model_name='Gradient Boosting')
models['Gradient Boosting'] = model
else:
models['Gradient Boosting'] = GradientBoostingRegressor(n_estimators=100, random_state=42)
if MODELS_TO_EVALUATE.get('XGBoost', False):
if use_optimized:
print("Tuning XGBoost...")
model_class = xgb.XGBRegressor
param_config = param_ranges['XGBoost'] if TUNING_METHOD == 'bayesian' else param_grids['XGBoost']
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
model_name='XGBoost')
models['XGBoost'] = model
else:
models['XGBoost'] = xgb.XGBRegressor(n_estimators=100, random_state=42)
if MODELS_TO_EVALUATE.get('LightGBM', False):
if use_optimized:
print("Tuning LightGBM...")
model_class = lgb.LGBMRegressor
param_config = param_ranges['LightGBM'] if TUNING_METHOD == 'bayesian' else param_grids['LightGBM']
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
model_name='LightGBM')
models['LightGBM'] = model
else:
models['LightGBM'] = lgb.LGBMRegressor(n_estimators=100, random_state=42)
if MODELS_TO_EVALUATE.get('SVR', False):
if use_optimized:
print("Tuning SVR...")
model_class = SVR
param_config = param_ranges['SVR'] if TUNING_METHOD == 'bayesian' else param_grids['SVR']
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
model_name='SVR')
models['SVR'] = model
else:
models['SVR'] = SVR(kernel='rbf', C=10)
if MODELS_TO_EVALUATE.get('KNN', False):
if use_optimized:
print("Tuning KNN...")
model_class = KNeighborsRegressor
param_config = param_ranges['KNN'] if TUNING_METHOD == 'bayesian' else param_grids['KNN']
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
model_name='KNN')
models['KNN'] = model
else:
models['KNN'] = KNeighborsRegressor(n_neighbors=5)
if MODELS_TO_EVALUATE.get('MLP', False):
if use_optimized:
print("Tuning MLP...")
model_class = MLPRegressor
param_config = param_ranges['MLP'] if TUNING_METHOD == 'bayesian' else param_grids['MLP']
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
model_name='MLP')
models['MLP'] = model
else:
models['MLP'] = MLPRegressor(random_state=42, max_iter=1000)
if MODELS_TO_EVALUATE.get('GPR', False):
if use_optimized:
print("Tuning GPR...")
model_class = GaussianProcessRegressor
param_config = param_ranges['GPR'] if TUNING_METHOD == 'bayesian' else param_grids['GPR']
model, _ = tune_hyperparameters(model_class, param_config, X_train, y_train,
method=TUNING_METHOD, cv=CV_FOLDS, n_iter=N_ITER,
model_name='GPR')
models['GPR'] = model
else:
kernel = C(1.0) * RBF(1.0)
models['GPR'] = GaussianProcessRegressor(kernel=kernel, random_state=42)
return models
def multi_step_train(model, X_train, y_train, X_val, y_val, n_features_per_window, window_size,
max_epochs=10, curriculum_steps=None, tf_ratio_start=1.0, tf_ratio_end=0.0,
batch_size=128, verbose=True):
"""
Train a model using multi-step approach with scheduled sampling.
Args:
model: Base model to train
X_train, y_train: Training data
X_val, y_val: Validation data
n_features_per_window: Number of features per window
window_size: Window size
max_epochs: Maximum number of epochs to train
curriculum_steps: List of sequence lengths for curriculum learning
tf_ratio_start: Initial teacher forcing ratio (1.0 = always use ground truth)
tf_ratio_end: Final teacher forcing ratio (0.0 = always use predictions)
batch_size: Batch size for training
verbose: Whether to print progress
Returns:
trained_model: Trained model
history: Training history
"""
# Clone the model to start fresh
trained_model = clone(model)
# Initialize training history
history = {
'train_loss': [],
'val_loss': [],
'train_r2': [],
'val_r2': [],
'tf_ratio': []
}
# Default curriculum if not provided
if curriculum_steps is None:
curriculum_steps = [1, 2, 5, 10, 20]
# Train with curriculum learning (increasing sequence length)
for step_idx, seq_len in enumerate(curriculum_steps):
print(f"\nTraining with sequence length: {seq_len}")
# Calculate effective number of epochs for this step
step_epochs = max(1, int(max_epochs / len(curriculum_steps)))
# Create training sequences
train_sequences = create_sequences(X_train, y_train, seq_len, n_features_per_window, window_size)
# Train for multiple epochs
for epoch in range(step_epochs):
# Calculate current teacher forcing ratio
progress = (step_idx * step_epochs + epoch) / (len(curriculum_steps) * step_epochs)
tf_ratio = tf_ratio_start - progress * (tf_ratio_start - tf_ratio_end)
history['tf_ratio'].append(tf_ratio)
print(f"Epoch {epoch+1}/{step_epochs}, Teacher Forcing Ratio: {tf_ratio:.3f}")
# Shuffle sequences for this epoch
np.random.shuffle(train_sequences)
# Process sequences in batches
train_losses = []
train_r2s = []
n_batches = (len(train_sequences) + batch_size - 1) // batch_size
# Use tqdm if available, otherwise use simple progress updates
batch_range = tqdm(range(n_batches), desc="Training") if TQDM_AVAILABLE else range(n_batches)
if not TQDM_AVAILABLE and n_batches > 10:
print(f"Processing {n_batches} batches...")
for batch_idx in batch_range:
batch_start = batch_idx * batch_size
batch_end = min(batch_start + batch_size, len(train_sequences))
batch_sequences = train_sequences[batch_start:batch_end]
batch_losses = []
batch_r2s = []
for X_seq, y_seq in batch_sequences:
seq_predictions = []
seq_actuals = []
prev_prediction = None
# Process each step in the sequence
for step in range(len(X_seq)):
# Decide whether to use ground truth or prediction
use_prediction = (step > 0) and (np.random.random() > tf_ratio)
# Make prediction
try:
prediction = virtual_experiment_predict(
trained_model, X_seq[step], y_seq[:step] if step > 0 else [],
n_features_per_window, window_size,
use_prediction, prev_prediction
)
except Exception as e:
print(f"Error making prediction: {e}")
# Fallback: use simple prediction without modifications
prediction = trained_model.predict(X_seq[step].reshape(1, -1))[0]
# Store prediction and actual values
seq_predictions.append(prediction)
seq_actuals.append(y_seq[step])
# Update for next step
prev_prediction = prediction
# Calculate loss for this sequence
mse = mean_squared_error(seq_actuals, seq_predictions)
batch_losses.append(mse)
# Calculate R² for this sequence
r2 = r2_score(seq_actuals, seq_predictions)
batch_r2s.append(r2)
# Update model based on batch losses
if hasattr(trained_model, 'partial_fit'):
# For models that support incremental learning
for X_seq, y_seq in batch_sequences:
trained_model.partial_fit(X_seq, y_seq)
else:
# For models that require full batch training
# Gather all training data from this batch with scheduled sampling
X_batch = []
y_batch = []
for X_seq, y_seq in batch_sequences:
# Process each sequence and collect inputs/outputs with teacher forcing
seq_X = []
seq_y = []
seq_preds = []
for step in range(len(X_seq)):
X_modified = X_seq[step].copy()
# Apply teacher forcing for previous step's target if needed
if step > 0 and np.random.random() > tf_ratio:
# Use prediction for previous step
prev_target_idx = n_features_per_window * (window_size + 1) + (window_size - 1)
# Handle edge case where seq_preds might be empty
if seq_preds:
X_modified[prev_target_idx] = seq_preds[-1]
# Clean input to ensure finite values
X_modified = ensure_finite(X_modified)
# Make prediction for this step (for next step's input if needed)
try:
pred = trained_model.predict(X_modified.reshape(1, -1))[0]
# Ensure prediction is finite
if not np.isfinite(pred):
print("Warning: Non-finite batch prediction detected, using actual value.")
pred = y_seq[step] # Use actual value as fallback
seq_preds.append(pred)
except Exception as e:
print(f"Error in batch prediction: {e}")
# If prediction fails, use actual value
seq_preds.append(y_seq[step])
seq_X.append(X_modified)
seq_y.append(y_seq[step])
# Add this sequence's data to the batch
X_batch.extend(seq_X)
y_batch.extend(seq_y)
# Convert to numpy arrays
X_batch = np.array(X_batch)
y_batch = np.array(y_batch)
# Update the model (only if we have data)
if len(X_batch) > 0:
try:
trained_model.fit(X_batch, y_batch)
except Exception as e:
print(f"Error fitting model: {e}")
# If batch fitting fails, try individual fitting
for i in range(len(X_batch)):
try:
trained_model.partial_fit(X_batch[i:i+1], y_batch[i:i+1])
except:
pass # Skip if partial_fit isn't available
# Track batch metrics
avg_batch_loss = np.mean(batch_losses)
avg_batch_r2 = np.mean(batch_r2s)
train_losses.append(avg_batch_loss)
train_r2s.append(avg_batch_r2)
# Calculate overall metrics for this epoch
epoch_train_loss = np.mean(train_losses)
epoch_train_r2 = np.mean(train_r2s)
# Evaluate on validation data
val_predictions = []
val_actuals = []
# Create validation sequences
val_sequences = create_sequences(X_val, y_val, seq_len, n_features_per_window, window_size)
# Process validation sequences
for X_seq, y_seq in val_sequences[:100]: # Limit to 100 sequences for speed
seq_predictions = []
prev_prediction = None
for step in range(len(X_seq)):
# Always use previous predictions for validation
use_prediction = step > 0
try:
prediction = virtual_experiment_predict(
trained_model, X_seq[step], y_seq[:step] if step > 0 else [],
n_features_per_window, window_size,
use_prediction, prev_prediction
)
except Exception as e:
print(f"Error in validation prediction: {e}")
# Fallback: use simple prediction without modifications
prediction = trained_model.predict(X_seq[step].reshape(1, -1))[0]
seq_predictions.append(prediction)
prev_prediction = prediction
val_predictions.extend(seq_predictions)
val_actuals.extend(y_seq)
# Calculate validation metrics
epoch_val_loss = mean_squared_error(val_actuals, val_predictions)
epoch_val_r2 = r2_score(val_actuals, val_predictions)
# Record history
history['train_loss'].append(epoch_train_loss)
history['train_r2'].append(epoch_train_r2)
history['val_loss'].append(epoch_val_loss)
history['val_r2'].append(epoch_val_r2)
print(f" Train Loss: {epoch_train_loss:.6f}, R²: {epoch_train_r2:.4f}")
print(f" Val Loss: {epoch_val_loss:.6f}, R²: {epoch_val_r2:.4f}")
return trained_model, history
def virtual_experiment(model, X_val, y_val, n_features_per_window, window_size=1):
"""
Run a virtual experiment with the trained model.
Args:
model: Trained regression model
X_val: Validation feature matrix
y_val: Validation target vector
n_features_per_window: Number of features per window
window_size: Window size used in training
Returns:
y_pred: Predicted target values
y_true: Actual target values
"""
n_samples = X_val.shape[0]
# We'll store actual and predicted values
y_true = []
y_pred = []
# Initialize with the first few actual values
prev_y_values = y_val[:window_size].tolist()
# Process each time step
for i in range(window_size, n_samples):
# Extract current input
X_input = X_val[i].copy() # Make a copy to avoid modifying the original
# Use previous predictions instead of actual values
for w in range(1, window_size + 1):
prev_y_idx = n_features_per_window * (window_size + 1) + (w - 1)
X_input[prev_y_idx] = prev_y_values[-w]
# Clean input data to ensure finite values
X_input = ensure_finite(X_input)
# Make prediction - reshape to 2D array (samples, features)
try:
prediction = model.predict(X_input.reshape(1, -1))[0]
# Ensure prediction is finite
if not np.isfinite(prediction):
print("Warning: Non-finite prediction detected, using default value.")
prediction = prev_y_values[-1] # Use previous value as fallback
except Exception as e:
print(f"Error making prediction: {e}")
# Fallback: use previous value as prediction
prediction = prev_y_values[-1]
# Store actual and predicted values
y_true.append(y_val[i])
y_pred.append(prediction)
# Update previous y values for next iteration
prev_y_values.append(prediction)
return np.array(y_pred), np.array(y_true)
def evaluate_model(model, X_test, y_test, model_name):
"""
Evaluate model performance on test data.
Args:
model: Trained model
X_test: Test feature matrix
y_test: Test target vector
model_name: Name of the model for display
Returns:
metrics: Dictionary with evaluation metrics
y_pred: Predicted values
"""
# Make predictions
y_pred = model.predict(X_test)
# Calculate metrics
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
# Return as dictionary
metrics = {
'model': model_name,
'mse': mse,
'rmse': rmse,
'mae': mae,
'r2': r2
}
return metrics, y_pred
def plot_learning_curves(history, model_name):
"""
Plot learning curves from training history.
Args:
history: Training history dictionary
model_name: Name of the model
"""
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
# Plot 1: Loss curves
axes[0].plot(history['train_loss'], label='Training Loss')
axes[0].plot(history['val_loss'], label='Validation Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss (MSE)')
axes[0].set_title('Loss Curves')
axes[0].legend()
axes[0].grid(True, alpha=0.3)
# Plot 2: R² curves
axes[1].plot(history['train_r2'], label='Training R²')
axes[1].plot(history['val_r2'], label='Validation R²')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('R² Score')
axes[1].set_title('R² Curves')
axes[1].legend()
axes[1].grid(True, alpha=0.3)
# Plot 3: Teacher forcing ratio
axes[2].plot(history['tf_ratio'], label='Teacher Forcing Ratio')
axes[2].set_xlabel('Epoch')
axes[2].set_ylabel('Ratio')
axes[2].set_title('Teacher Forcing Ratio')
axes[2].set_ylim(0, 1)
axes[2].grid(True, alpha=0.3)
plt.suptitle(f'Learning Curves - {model_name}', fontsize=16)
plt.tight_layout()
plt.subplots_adjust(top=0.9)
plt.show()
plt.close()
def plot_time_series_prediction(y_true, y_pred, model_name, title=None):
"""
Plot time series of actual and predicted values.
Args:
y_true: Actual target values
y_pred: Predicted target values
model_name: Name of the model
title: Custom title (optional)
"""
plt.figure(figsize=(15, 8))
# Create time series plot
plt.plot(range(len(y_true)), y_true, label='Actual', color='blue', alpha=0.7)
plt.plot(range(len(y_pred)), y_pred, label='Predicted', color='red', alpha=0.7)
# Add labels and title
plt.xlabel('Time Step')
plt.ylabel('Rel. Piston Trav')
if title:
plt.title(title)
else:
plt.title(f'Time Series Prediction - {model_name}')
plt.legend()
# Calculate metrics for the plot
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_true, y_pred)
# Add text with metrics
plt.annotate(f'RMSE: {rmse:.4f}\nR²: {r2:.4f}',
xy=(0.05, 0.9), xycoords='axes fraction',
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.8))
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
plt.close()
def plot_actual_vs_predicted(y_true, y_pred, model_name, title=None):
"""
Plot actual vs predicted values.
Args:
y_true: Actual target values
y_pred: Predicted target values
model_name: Name of the model
title: Custom title (optional)
"""
plt.figure(figsize=(12, 8))
# Create scatter plot
plt.scatter(y_true, y_pred, alpha=0.5)
# Add perfect prediction line
max_val = max(np.max(y_true), np.max(y_pred))
min_val = min(np.min(y_true), np.min(y_pred))
plt.plot([min_val, max_val], [min_val, max_val], 'r--', lw=2)
# Add labels and title
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
if title:
plt.title(title)
else:
plt.title(f'Actual vs Predicted - {model_name}')
# Calculate metrics for the plot
mse = mean_squared_error(y_true, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_true, y_pred)
# Add text with metrics
plt.annotate(f'RMSE: {rmse:.4f}\nR²: {r2:.4f}',
xy=(0.05, 0.9), xycoords='axes fraction',
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.8))
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
plt.close()
def plot_model_comparison(standard_metrics, multistep_metrics, approach="Virtual Experiment"):
"""
Plot comparison between standard and multi-step trained models.
Args:
standard_metrics: Dictionary of metrics for standard models
multistep_metrics: Dictionary of metrics for multi-step models
approach: The approach name for the title
"""
# Get all model names
model_names = sorted(set(list(standard_metrics.keys()) + list(multistep_metrics.keys())))
# Prepare data for plotting
rmse_standard = [standard_metrics.get(name, {}).get('rmse', 0) for name in model_names]
rmse_multistep = [multistep_metrics.get(name, {}).get('rmse', 0) for name in model_names]
r2_standard = [standard_metrics.get(name, {}).get('r2', 0) for name in model_names]
r2_multistep = [multistep_metrics.get(name, {}).get('r2', 0) for name in model_names]
# Compute improvement percentages
rmse_improvement = []
r2_improvement = []
for i, name in enumerate(model_names):
if name in standard_metrics and name in multistep_metrics:
std_rmse = standard_metrics[name]['rmse']
ms_rmse = multistep_metrics[name]['rmse']
rmse_imp = ((std_rmse - ms_rmse) / std_rmse) * 100
rmse_improvement.append(rmse_imp)
std_r2 = standard_metrics[name]['r2']
ms_r2 = multistep_metrics[name]['r2']
r2_imp = ((ms_r2 - std_r2) / abs(std_r2)) * 100 if std_r2 != 0 else 0
r2_improvement.append(r2_imp)
else:
rmse_improvement.append(0)
r2_improvement.append(0)
# Create figure with 2 subplots
fig, axes = plt.subplots(2, 1, figsize=(15, 14))
# Plot 1: RMSE comparison
x = np.arange(len(model_names))
width = 0.35
axes[0].bar(x - width/2, rmse_standard, width, label='Standard Training', color='red', alpha=0.7)
axes[0].bar(x + width/2, rmse_multistep, width, label='Multi-step Training', color='green', alpha=0.7)
# Add labels and annotations
axes[0].set_xlabel('Model')
axes[0].set_ylabel('RMSE (lower is better)')
axes[0].set_title('RMSE Comparison')
axes[0].set_xticks(x)
axes[0].set_xticklabels(model_names, rotation=45, ha='right')
axes[0].legend()
# Add value labels on the bars
for i, (v1, v2, imp) in enumerate(zip(rmse_standard, rmse_multistep, rmse_improvement)):
if v1 > 0 and v2 > 0: # Only label bars with valid data
axes[0].text(i - width/2, v1 + 0.02, f'{v1:.3f}', ha='center', va='bottom', fontsize=8)
axes[0].text(i + width/2, v2 + 0.02, f'{v2:.3f}', ha='center', va='bottom', fontsize=8)
# Add improvement percentage
if imp != 0:
color = 'green' if imp > 0 else 'red'
axes[0].text(i, min(v1, v2) / 2, f'{imp:.1f}%', ha='center', va='center',
fontsize=9, fontweight='bold', color=color,
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.8))
# Plot 2: R² comparison
axes[1].bar(x - width/2, r2_standard, width, label='Standard Training', color='blue', alpha=0.7)
axes[1].bar(x + width/2, r2_multistep, width, label='Multi-step Training', color='purple', alpha=0.7)
# Add labels and annotations
axes[1].set_xlabel('Model')
axes[1].set_ylabel('R² (higher is better)')
axes[1].set_title('R² Comparison')
axes[1].set_xticks(x)
axes[1].set_xticklabels(model_names, rotation=45, ha='right')
axes[1].legend()
# Add value labels on the bars
for i, (v1, v2, imp) in enumerate(zip(r2_standard, r2_multistep, r2_improvement)):
if v1 != 0 or v2 != 0: # Only label bars with valid data
axes[1].text(i - width/2, v1 + 0.02, f'{v1:.3f}', ha='center', va='bottom', fontsize=8)
axes[1].text(i + width/2, v2 + 0.02, f'{v2:.3f}', ha='center', va='bottom', fontsize=8)
# Add improvement percentage
if imp != 0:
color = 'green' if imp > 0 else 'red'
y_pos = max(0.1, (v1 + v2) / 2)
axes[1].text(i, y_pos, f'{imp:.1f}%', ha='center', va='center',
fontsize=9, fontweight='bold', color=color,
bbox=dict(boxstyle="round,pad=0.3", fc="white", ec="gray", alpha=0.8))
# Add horizontal line at 0 for R² reference
axes[1].axhline(y=0, color='gray', linestyle='--', alpha=0.7)
plt.suptitle(f'Standard vs Multi-step Training Comparison ({approach})', fontsize=16)
plt.tight_layout()
plt.subplots_adjust(top=0.9)
plt.show()
plt.close()
def main():
"""Main execution function"""
start_time = time.time()
print("SPS Sintering Multi-step Regression Analysis")
# Create output directory for results
import os
results_dir = "multistep_results"
os.makedirs(results_dir, exist_ok=True)
# Load data
print("\nLoading data...")
train_data, validation_data = load_data(file_paths, VALIDATION_FILE_INDEX)
# Preprocess data
print("\nPreprocessing data...")
X_train, y_train, feature_names = preprocess_data(
train_data, TARGET_COLUMN, EXCLUDED_COLUMNS, SELECTED_FEATURES)
X_val, y_val, _ = preprocess_data(
validation_data, TARGET_COLUMN, EXCLUDED_COLUMNS, SELECTED_FEATURES)
# Create windowed data
print("\nCreating windowed data...")
X_train_window, y_train_window = prepare_window_data(X_train, y_train, WINDOW_SIZE)
X_val_window, y_val_window = prepare_window_data(X_val, y_val, WINDOW_SIZE)
# Split data for initial training
print("\nSplitting data...")
X_train_split, X_test, y_train_split, y_test = train_test_split(
X_train_window, y_train_window, test_size=0.2, random_state=42)
# Scale the data
print("\nScaling data...")
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_split)
X_test_scaled = scaler.transform(X_test)
X_val_scaled = scaler.transform(X_val_window)
# Train models with standard approach first
print("\nTraining models with standard approach...")
model_status = "optimized" if USE_OPTIMIZED_MODELS else "default"
print(f"Using {model_status} hyperparameters")
base_models = create_base_models(X_train_scaled, y_train_split, USE_OPTIMIZED_MODELS)
standard_models = {}
standard_metrics = {}
for name, model in base_models.items():
print(f"\nTraining {name}...")
if not USE_OPTIMIZED_MODELS: # If we're using default models, we need to fit them here
model.fit(X_train_scaled, y_train_split)
# Evaluate on test set
test_metrics, _ = evaluate_model(model, X_test_scaled, y_test, name)
print(f" Test - RMSE: {test_metrics['rmse']:.4f}, R²: {test_metrics['r2']:.4f}")
# Run virtual experiment
print(f"Running virtual experiment for {name}...")
y_virtual_pred, y_virtual_true = virtual_experiment(
model, X_val_scaled, y_val_window, X_train.shape[1], WINDOW_SIZE)
# Calculate metrics
virtual_mse = mean_squared_error(y_virtual_true, y_virtual_pred)
virtual_rmse = np.sqrt(virtual_mse)
virtual_r2 = r2_score(y_virtual_true, y_virtual_pred)
print(f" Virtual - RMSE: {virtual_rmse:.4f}, R²: {virtual_r2:.4f}")
# Store model and metrics
standard_models[name] = model
standard_metrics[name] = {
'rmse': virtual_rmse,
'r2': virtual_r2
}
# Plot results
plot_time_series_prediction(
y_virtual_true, y_virtual_pred, name,
title=f"Virtual Experiment - {name} (Standard {model_status} Training)"
)
# Train models with multi-step approach
print("\nTraining models with multi-step approach...")
n_features_per_window = X_train.shape[1]
multistep_models = {}
multistep_metrics = {}
training_histories = {}
for name, base_model in base_models.items():
print(f"\nMulti-step training for {name}...")
# Use a fresh model instance
model_to_train = clone(base_model)
# First fit with standard approach to have a starting point
if not USE_OPTIMIZED_MODELS: # Only need to refit if we're using default models
model_to_train.fit(X_train_scaled, y_train_split)
# Then apply multi-step training
trained_model, history = multi_step_train(
model_to_train, X_train_scaled, y_train_split, X_val_scaled, y_val_window,
n_features_per_window, WINDOW_SIZE,
max_epochs=MAX_EPOCHS,
curriculum_steps=CURRICULUM_STEPS,
tf_ratio_start=TEACHER_FORCING_RATIO_START,
tf_ratio_end=TEACHER_FORCING_RATIO_END,
batch_size=BATCH_SIZE,
verbose=True
)
# Run virtual experiment with multi-step trained model
print(f"Running virtual experiment for multi-step trained {name}...")
y_ms_virtual_pred, y_ms_virtual_true = virtual_experiment(
trained_model, X_val_scaled, y_val_window, n_features_per_window, WINDOW_SIZE)
# Calculate metrics
ms_virtual_mse = mean_squared_error(y_ms_virtual_true, y_ms_virtual_pred)
ms_virtual_rmse = np.sqrt(ms_virtual_mse)
ms_virtual_r2 = r2_score(y_ms_virtual_true, y_ms_virtual_pred)
print(f" Multi-step Virtual - RMSE: {ms_virtual_rmse:.4f}, R²: {ms_virtual_r2:.4f}")
# Store model, metrics, and history
multistep_models[name] = trained_model
multistep_metrics[name] = {
'rmse': ms_virtual_rmse,
'r2': ms_virtual_r2
}
training_histories[name] = history
# Plot results
plot_learning_curves(history, name)
plot_time_series_prediction(
y_ms_virtual_true, y_ms_virtual_pred, name,
title=f"Virtual Experiment - {name} (Multi-step {model_status} Training)"
)
# Compare standard vs multi-step for this model
if name in standard_metrics:
# Get predictions from both models
y_std_pred, _ = virtual_experiment(
standard_models[name], X_val_scaled, y_val_window,
n_features_per_window, WINDOW_SIZE)
# Plot comparison
plt.figure(figsize=(15, 8))
plt.plot(y_virtual_true, label='Actual', color='blue', alpha=0.7)
plt.plot(y_std_pred, label=f'Standard (RMSE={standard_metrics[name]["rmse"]:.4f}, R²={standard_metrics[name]["r2"]:.4f})',
color='red', alpha=0.7)
plt.plot(y_ms_virtual_pred, label=f'Multi-step (RMSE={ms_virtual_rmse:.4f}, R²={ms_virtual_r2:.4f})',
color='green', alpha=0.7)
plt.xlabel('Time Step')
plt.ylabel('Rel. Piston Trav')
plt.title(f'Standard vs Multi-step Training Comparison - {name} ({model_status})')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()
plt.close()
# Overall comparison between standard and multi-step approaches
plot_model_comparison(standard_metrics, multistep_metrics)
# Calculate overall improvement if we have data
if standard_metrics and multistep_metrics:
std_rmse = np.mean([m['rmse'] for m in standard_metrics.values()])
ms_rmse = np.mean([m['rmse'] for m in multistep_metrics.values()])
rmse_improvement = ((std_rmse - ms_rmse) / std_rmse) * 100 if std_rmse != 0 else 0
std_r2 = np.mean([m['r2'] for m in standard_metrics.values()])
ms_r2 = np.mean([m['r2'] for m in multistep_metrics.values()])
r2_improvement = ((ms_r2 - std_r2) / abs(std_r2)) * 100 if std_r2 != 0 else 0
print("\nOverall Improvement:")
print(f" Standard Training ({model_status}) - Average RMSE: {std_rmse:.4f}, Average R²: {std_r2:.4f}")
print(f" Multi-step Training ({model_status}) - Average RMSE: {ms_rmse:.4f}, Average R²: {ms_r2:.4f}")
print(f" RMSE Improvement: {rmse_improvement:.2f}%")
print(f" R² Improvement: {r2_improvement:.2f}%")
else:
print("\nNot enough data to calculate improvement metrics.")
# Save summary of results to file
summary_file = os.path.join(results_dir, f"summary_{model_status}.txt")
with open(summary_file, 'w') as f:
f.write(f"SPS Sintering Multi-step Regression Analysis Summary ({model_status} models)\n\n")
f.write(f"Standard Training Results:\n")
for name, metrics in standard_metrics.items():
f.write(f" {name}: RMSE={metrics['rmse']:.4f}, R²={metrics['r2']:.4f}\n")
f.write(f"\nMulti-step Training Results:\n")
for name, metrics in multistep_metrics.items():
f.write(f" {name}: RMSE={metrics['rmse']:.4f}, R²={metrics['r2']:.4f}\n")
if standard_metrics and multistep_metrics:
f.write(f"\nOverall Improvement:\n")
f.write(f" Standard Training - Average RMSE: {std_rmse:.4f}, Average R²: {std_r2:.4f}\n")
f.write(f" Multi-step Training - Average RMSE: {ms_rmse:.4f}, Average R²: {ms_r2:.4f}\n")
f.write(f" RMSE Improvement: {rmse_improvement:.2f}%\n")
f.write(f" R² Improvement: {r2_improvement:.2f}%\n")
elapsed_time = time.time() - start_time
print(f"\nTotal execution time: {elapsed_time/60:.2f} minutes")
print(f"\nResults saved to {summary_file}")
print("\nAnalysis complete!")
if __name__ == "__main__":
main()