Physics-informed loss¶
MSE on (V, theta, P_g, Q_g) lets a model match solver outputs in distribution but doesn't directly penalize physics violations: a tiny mismatch in voltage angle can translate to a large bus-level power imbalance.
OPFLossManager supports a PhysicsInformedLoss term that adds a power-balance penalty on top of the supervised loss. This notebook compares the two side by side.
import torch
from lumina.dataset.opf.opf_dataset import OPFDataset
from lumina.dataset.opf.transforms import to_float32
from lumina.loader.opf.opf_loader import DataLoader
from lumina.model.opf.hetero_model import OPFHeteroGNN
from lumina.model.opf.losses import OPFLossManager, PhysicsInformedLoss
from lumina.evaluator.opf.evaluator import ACOPFConstraintEvaluator
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
DATA_ROOT = '/path/to/datasets'
ds = OPFDataset(
root=DATA_ROOT,
case_name='pglib_opf_case14_ieee',
group_id=0,
transform=to_float32, # cast features to float32 to match model weights
)
n = len(ds); n_train = int(0.9 * n)
train_loader = DataLoader(ds[:n_train], batch_size=64, shuffle=True)
val_loader = DataLoader(ds[n_train:], batch_size=64)
sample = ds[0]
Two loss configurations¶
- Baseline:
OPFLossManager(loss_type='mse') - Physics-informed:
PhysicsInformedLoss(physics_weight=λ, penalty_method='quadratic')— supervised MSE plus a power-balance penalty.
Note:
PhysicsInformedLossrequires aconstraint_computer(set vialoss.set_constraint_computer(...)) to produce a non-zero physics penalty. Without one, the penalty term is hard-coded to 0 and the loss reduces to plain MSE. A turn-key constraint computer isn't currently shipped — wire one up before treating this comparison as meaningful.
loss_baseline = OPFLossManager(loss_type='mse')
loss_physics = PhysicsInformedLoss(
physics_weight=0.1, # lambda
penalty_method='quadratic', # also: 'absolute', 'log_barrier'
)
# loss_physics.set_constraint_computer(my_constraint_computer) # required for non-zero penalty
Train both¶
We train two identical models, one per loss, then compare violations on the validation set.
input_channels = {nt: sample[nt].x.size(-1) for nt in sample.node_types}
def make_model():
return OPFHeteroGNN(
metadata=sample.metadata(),
input_channels=input_channels,
hidden_channels=64,
num_layers=3,
backend='sage',
).to(DEVICE)
def _scalar_loss(loss_mgr, pred, batch):
"""Normalize the two loss return shapes:
- OPFLossManager.compute_loss -> (loss, info_dict)
- PhysicsInformedLoss.compute_loss -> dict with 'total_loss'
"""
out = loss_mgr.compute_loss(pred, batch)
if isinstance(out, tuple):
return out[0]
return out['total_loss']
def train(model, loss_mgr, epochs=5):
optim = torch.optim.AdamW(model.parameters(), lr=1e-3)
for epoch in range(epochs):
model.train()
for batch in train_loader:
batch = batch.to(DEVICE)
optim.zero_grad()
pred = model(batch.x_dict, batch.edge_index_dict, batch.edge_attr_dict)
loss = _scalar_loss(loss_mgr, pred, batch)
loss.backward()
torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
optim.step()
return model
torch.manual_seed(0)
model_mse = train(make_model(), loss_baseline)
torch.manual_seed(0)
model_phys = train(make_model(), loss_physics)
Compare violations¶
import numpy as np
evaluator = ACOPFConstraintEvaluator()
def collect_metrics(model):
out = []
model.eval()
with torch.no_grad():
for batch in val_loader:
batch = batch.to(DEVICE)
pred = model(batch.x_dict, batch.edge_index_dict, batch.edge_attr_dict)
pred = {k: v.cpu() for k, v in pred.items()}
out.append(evaluator.evaluate(batch.cpu(), pred))
return {k: float(np.mean([m[k] for m in out])) for k in out[0]}
m_mse = collect_metrics(model_mse)
m_phys = collect_metrics(model_phys)
print(f'{"metric":28s} {"mse":>12s} {"physics":>12s} delta')
for k in m_mse:
delta = (m_mse[k] - m_phys[k]) / max(m_mse[k], 1e-12)
print(f'{k:28s} {m_mse[k]:12.4e} {m_phys[k]:12.4e} {100*delta:+6.1f}%')
Tuning notes¶
physics_weightis the trade-off knob. Too high crowds out the supervised signal; too low and the constraint barely matters. Start at 0.1 and sweep one decade in each direction.physics_penalty='log_barrier'is differentiable everywhere but blows up near the boundary — only use it when you're already close to feasibility (e.g., late in training).- For multi-case training, the physics term should be normalized per case so a single weight works across grids of different sizes.