RuntimeError: Parameter ssd0_l2norm0_weight was not initialized on context cpu(0). It was only initialized on [gpu(1), gpu(2)].

class L2Norm(nn.Block):
def init(self, n_channels, scale):
super(L2Norm, self).init()
self.n_channels = n_channels
self.eps = 1e-8
self.weight = self.params.get(
‘weight’, init=mx.init.Constant(scale),
shape=(n_channels,))

def forward(self, x):
norm = (x ** 2).sum(axis=1, keepdims=True).sqrt() + self.eps
x = F.divide(x, norm)
out = self.weight.data().expand_dims(axis=0).expand_dims(axis=2).expand_dims(axis=3) * x
return out

想把程序改成支持多gpu的, 但是总是报这个错, 说L2Norm没有初始化在cpu(0), 请问这是什么错误

net.collect_params().reset_ctx(ctx)应该就好了

请问在哪里加, 之前试过, 还是不work;另外换成BatchNorm就work了,这是为什么? 谢谢
ctx = [mx.gpu(1), mx.gpu(2)]

train_dataloader = DataLoader(train_dataset, batch_size=cfg.batch_size, shuffle=True)

net = build_ssd(“train”, 640, ctx, num_classes=2)
net.params_init(ctx)

# net.collect_params().reset_ctx()

lr_scheduler = mx.lr_scheduler.MultiFactorScheduler(step=cfg.decay_step, factor=cfg.decay_ratio)
trainer = mx.gluon.trainer.Trainer(net.collect_params(),
‘sgd’,
{‘learning_rate’: cfg.base_lr,
‘wd’: cfg.weight_decay,
‘momentum’: 0.9,
‘lr_scheduler’: lr_scheduler})
focal_cls_loss = FocalLoss(axis=2)
box_loss = SmoothL1Loss()
cls_loss = SoftmaxLoss()

cnt = 0
box_metric = mx.metric.MAE()
for epoch in range(1, cfg.epochs+1):
tic = time.time()
box_metric.reset()
for iteration, (data, label) in enumerate(train_dataloader):
btic = time.time()
# data = data.as_in_context(ctx)
# label = label.as_in_context(ctx)
datas = gluon.utils.split_and_load(data, ctx)
labels = gluon.utils.split_and_load(label, ctx)
cls_loss_list = list()
reg_loss_list = list()
for data_ctx, label_ctx in zip(datas, labels):
# print(data_ctx.context)
# print(label_ctx.context)
with mx.autograd.record():
anchors, class_preds, box_preds = net(data_ctx)
box_target, box_mask, cls_target = training_targets(anchors, class_preds, label_ctx)
loss1 = focal_cls_loss(class_preds, cls_target, -1.)
# loss1 = cls_loss(class_preds, cls_target, -1.)
loss2 = box_loss(box_preds, box_target, box_mask)
loss = loss1 + loss2
cls_loss_list.append(F.mean(loss1)[0].asscalar())
reg_loss_list.append(F.mean(loss2)[0].asscalar())
loss.backward()
trainer.step(cfg.batch_size)
box_metric.update([box_target], [box_preds * box_mask])
if (iteration + 1) % cfg.log_interval == 0:
val1 = np.mean(cls_loss_list)
val2 = np.mean(reg_loss_list)

print(’[Epoch %d Batch %d] speed: %f samples/s, training: %s=%f, %s=%f, %s=%f’ % (
epoch, iteration, cfg.batch_size / (time.time() - btic), “cls loss”, val1, “reg loss”, val2, “box metric”, box_metric.get()[1]))

cls_loss_list = []
reg_loss_list = []
cnt += 1
print(’[Epoch %d] time cost: %f’ % (epoch, time.time() - tic))
net.save_params(’./model/ssd_%04d.params’ % epoch)

if not os.path.exists(“model”):
os.mkdir(“model”)
net.save_params(‘model/ssd.params’)

还得注意延迟加载,如果是加载预训练的模型参数,需要在加载了模型参数后才能设置 net.collect_params().reset_ctx(ctx)

好的, 谢谢了

请问 我报的这个错和您差不多? 我该如何更改?RuntimeError: Parameter ‘conv8_weight’ was not initialized on context cpu(0). It was only initialized on [gpu(0)].