pytorch mlp + mitsuba question #810
-
In this toy program, I am trying to learn the base_color param with an MLP in pytorch while using bsdf values as loss (there is no image rendered). Am I missing some crucial piece? Thanks! When trying to run the following code, I get the [[0.519923746585846, 0.5132380723953247, 0.502255916595459]]
---------------------------------------------------------------------------
RuntimeError Traceback (most recent call last)
[<ipython-input-9-9d711d9ed619>](https://localhost:8080/#) in <cell line: 103>()
116 loss = loss_fn(values.torch(), values_ref.torch()) # loss_fn(mi.TensorXf(values_ref), mi.TensorXf(values))
117 # propagate
--> 118 loss.backward()
119 optimizer.step()
120 train_losses += loss.item()
1 frames
[/usr/local/lib/python3.10/dist-packages/torch/autograd/__init__.py](https://localhost:8080/#) in backward(tensors, grad_tensors, retain_graph, create_graph, grad_variables, inputs)
198 # some Python versions print out the first line of a multi-line function
199 # calls in the traceback and some print out the last line
--> 200 Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass
201 tensors, grad_tensors_, retain_graph, create_graph, inputs,
202 allow_unreachable=True, accumulate_grad=True) # Calls into the C++ engine to run the backward pass
RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn error. import drjit as dr
import mitsuba as mi
#mi.set_variant('cuda_ad_rgb', 'llvm_ad_rgb')
mi.set_variant('llvm_ad_rgb')
import random
import torch
import torch.nn as nn
import torch.nn.functional as F
from matplotlib import pyplot as plt
# ground truth bsdf (simple red diffuse)
bsdf_gt_dict = {
'type': 'diffuse',
'reflectance': {
'type': 'rgb',
'value': [1.0, 0.0, 0.0]
}
}
# bsdf to optimize
bsdf_opt_dict = {
'type': 'principled',
'base_color': {
'type': 'rgb',
'value': [1.0, 0.0, 1.0]
}
}
# ground truth bsdf
bsdf_gt = mi.load_dict(bsdf_gt_dict)
params_gt = mi.traverse(bsdf_gt)
# predicted bsdf
bsdf_opt = mi.load_dict(bsdf_opt_dict)
params_opt = mi.traverse(bsdf_opt)
# starting value
data = mi.Color3f(1.0, 0.0, 1.0)
# conver to torch
data_torch = data.torch()
key = 'base_color.value'
# simple pytorch mlp (base_color in base_color out)
class TorchModel(nn.Module):
def __init__(self, in_size=3, hidden=2, width=128, out=3):
super().__init__()
hidden_layers = []
for _ in range(hidden):
hidden_layers.append(nn.Linear(width, width))
hidden_layers.append(nn.LeakyReLU(inplace=True))
self.network = nn.Sequential(
nn.Linear(in_size, width),
nn.LeakyReLU(inplace=True),
*hidden_layers,
nn.Linear(width, out),
nn.Sigmoid()
)
def forward(self, texture):
# Evaluate the model
data_out = self.network(data_torch)
return data_out
model = TorchModel()
if 'cuda' in mi.variant():
model = model.cuda()
# debugging: run random data through the model
'''
for i in range(100):
data = mi.Color3f(random.random(), random.random(), random.random())
data_torch = data.torch()
xx = model(data_torch)
print(xx.shape, xx[0, 1], xx[0, 1], xx[0, 2])
'''
@dr.wrap_ad(source='torch', target='drjit')
def update_param(param_in):
params_opt[key] = dr.unravel(mi.Color3f, param_in.array)
print(params_opt[key])
params_opt.update()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0002)
loss_fn = nn.L1Loss()
# set up the sampler
sampler = mi.load_dict({
'type': 'independent',
})
sampler.seed(0, wavefront_size=int(1e5))
# Optimization hyper-parameters
iteration_count = 100
spp = 4
model.train(mode=True)
train_losses = []
params_opt['base_color.value'] = mi.Color3f(1.0, 0.0, 1.0)
for i in range(iteration_count):
optimizer.zero_grad()
sampler.seed(i)
update_param(model(data_torch))
# dummy surface interaction (we don't have a scene, just a bsdf)
si = dr.zeros(mi.SurfaceInteraction3f)
# get some cosine distributed wi and wo
si.wi = mi.warp.square_to_cosine_hemisphere(sampler.next_2d())
wo = mi.warp.square_to_cosine_hemisphere(sampler.next_2d())
# eval both bsdfs
values = bsdf_opt.eval(mi.BSDFContext(), si, wo)
values_ref = bsdf_gt.eval(mi.BSDFContext(), si, wo)
# values are arrays of Color3f, convert them to torch?
loss = loss_fn(values.torch(), values_ref.torch()) # loss_fn(mi.TensorXf(values_ref), mi.TensorXf(values))
# propagate
loss.backward()
optimizer.step()
train_losses += loss.item()
print(f'Training iteration {i+1}/{iteration_count}, loss: {train_losses[-1]}', end='\r') |
Beta Was this translation helpful? Give feedback.
Replies: 1 comment 3 replies
-
I'd recommend you carefully go through this tutorial and also learn a bit more about the Pytorch In short, it is only possible to mix two frameworks by capturing all of the computations of one of the frameworks inside a |
Beta Was this translation helpful? Give feedback.
Hi @rajeshsharma-ai
I'd recommend you carefully go through this tutorial and also learn a bit more about the Pytorch
autograd
layer. Unfortunately, this feature or Dr.Jit is fairly low-level and therefore requires some intuition about how the automatic differentiation works.In short, it is only possible to mix two frameworks by capturing all of the computations of one of the frameworks inside a
wrap_ad
decorated function.Simply calling
drjit_variable.torch()
will not carry gradients over, it just creates a new variable in Pytorch. In your setup, this means the inputs to the loss are completely detached from the BSDF evaluation and hence the MLP. In the tutorial I linked above, you'll se…