
def _jit_linear_cg_updates(
    result, alpha, residual_inner_prod, eps, beta, residual, precond_residual, mul_storage, is_zero, curr_conjugate_vec
    # # Update result
    # # result_{k} = result_{k-1} + alpha_{k} p_vec_{k-1}
    result = torch.addcmul(result, alpha, curr_conjugate_vec, out=result)

    # beta_{k} = (precon_residual{k}^T r_vec_{k}) / (precon_residual{k-1}^T r_vec_{k-1})
    torch.mul(residual, precond_residual, out=mul_storage)
    torch.sum(mul_storage, -2, keepdim=True, out=residual_inner_prod)

    # Do a safe division here, eps, out=is_zero)
    beta.masked_fill_(is_zero, 1)
    torch.div(residual_inner_prod, beta, out=beta)
    beta.masked_fill_(is_zero, 0)

    # Update curr_conjugate_vec
    # curr_conjugate_vec_{k} = precon_residual{k} + beta_{k} curr_conjugate_vec_{k-1}
def forward(self, *args):
        """ process input

            *args: (Tensor): string, string_len, string2, string2_len
                e.g. string (Tensor): [batch_size, seq_len, dim], string_len (Tensor): [batch_size]

            Tensor: [batch_size, seq_len, output_dim], [batch_size]
        dim_flag = True
        input_dims = list(self.layer_conf.input_dims)
        if (args[0].shape[1] * args[0].shape[2]) != (args[2].shape[1] * args[2].shape[2]):
            if args[0].shape[1] == args[2].shape[1] and (input_dims[1][-1] == 1 or input_dims[0][-1] == 1):
                dim_flag = True
                dim_flag = False
        if dim_flag == False:
            raise ConfigurationError("For layer ElementWisedMultiply3D, the dimensions of each inputs should be equal or 1 ,or the elements number of two inputs (expect for the first dimension) should be equal")
        return torch.addcmul(torch.zeros(args[0].size()).to('cuda'),1,args[0],args[2]),args[1] 
def E_Step(X, logdet, c1_temp, pi_temp, SigmaXY, X_C_SIGMA, sum, c_idx, c_idx_9, c_idx_25, distances2, r_ik_5, neig, sumP, X_C, X_C_SIGMA_buf):

    Computes the distances of the Data points for each centroid and normalize it,

    torch.add(X.unsqueeze(1), torch.neg(c1_temp.reshape(-1, Global.neig_num, Global.D_)),out=X_C)
    torch.mul(X_C[:, :, 0].unsqueeze(2), SigmaXY[:, :, 0:2],out=X_C_SIGMA_buf)
    X_C_SIGMA[:, :, 2:] = torch.mul(X_C[:, :, 2:], Global.SIGMA_INT)

    torch.mul(-X_C.view(-1, Global.neig_num,Global.D_),X_C_SIGMA.view(-1,Global.neig_num,Global.D_),out=distances2)

    r_ik_5.add_(torch.neg(logdet.reshape(-1, Global.neig_num)))
    r_ik_5.add_(torch.log(pi_temp.reshape(-1, Global.neig_num)))
    c_neig = c_idx_25.reshape(-1, Global.potts_area).float()
    torch.add(c_neig.unsqueeze(1), -c_idx.reshape(-1, Global.neig_num).unsqueeze(2).float(),out=neig)
    (my_help.softmaxTF(r_ik_5, 1,sum)) 
def test_forward_addcmul():

    class Addcmul1(Module):
        def forward(self, *args):
            t1 = torch.ones([3, 1])
            t2 = torch.ones([1, 3])
            if torch.cuda.is_available():
                t1 = t1.cuda()
                t2 = t2.cuda()
            return torch.addcmul(args[0], 0.1, t1, t2)

    class Addcmul2(Module):
        def forward(self, *args):
            return torch.addcmul(args[0], 0.5, args[1], args[2])

    input_data = torch.rand([1, 3]).float()
    verify_model(Addcmul1().float().eval(), input_data=input_data)
    t1 = torch.rand([3, 1]).float()
    t2 = torch.rand([1, 3]).float()
    verify_model(Addcmul2().float().eval(), input_data=[input_data, t1, t2]) 
def delta2bbox(rois,
               means=[0, 0, 0, 0],
               stds=[1, 1, 1, 1],
               wh_ratio_clip=16 / 1000):
    means = deltas.new_tensor(means).repeat(1, deltas.size(1) // 4)
    stds = deltas.new_tensor(stds).repeat(1, deltas.size(1) // 4)
    denorm_deltas = deltas * stds + means
    dx = denorm_deltas[:, 0::4]
    dy = denorm_deltas[:, 1::4]
    dw = denorm_deltas[:, 2::4]
    dh = denorm_deltas[:, 3::4]
    max_ratio = np.abs(np.log(wh_ratio_clip))
    dw = dw.clamp(min=-max_ratio, max=max_ratio)
    dh = dh.clamp(min=-max_ratio, max=max_ratio)
    px = ((rois[:, 0] + rois[:, 2]) * 0.5).unsqueeze(1).expand_as(dx)
    py = ((rois[:, 1] + rois[:, 3]) * 0.5).unsqueeze(1).expand_as(dy)
    pw = (rois[:, 2] - rois[:, 0] + 1.0).unsqueeze(1).expand_as(dw)
    ph = (rois[:, 3] - rois[:, 1] + 1.0).unsqueeze(1).expand_as(dh)
    gw = pw * dw.exp()
    gh = ph * dh.exp()
    gx = torch.addcmul(px, 1, pw, dx)  # gx = px + pw * dx
    gy = torch.addcmul(py, 1, ph, dy)  # gy = py + ph * dy
    x1 = gx - gw * 0.5 + 0.5
    y1 = gy - gh * 0.5 + 0.5
    x2 = gx + gw * 0.5 - 0.5
    y2 = gy + gh * 0.5 - 0.5
    if max_shape is not None:
        x1 = x1.clamp(min=0, max=max_shape[1] - 1)
        y1 = y1.clamp(min=0, max=max_shape[0] - 1)
        x2 = x2.clamp(min=0, max=max_shape[1] - 1)
        y2 = y2.clamp(min=0, max=max_shape[0] - 1)
    bboxes = torch.stack([x1, y1, x2, y2], dim=-1).view_as(deltas)
    return bboxes 
def ComplexMultiply_forward(X_re, X_im, Y_re, Y_im):
    Z_re = torch.addcmul(X_re*Y_re, -1, X_im, Y_im)
    Z_im = torch.addcmul(X_re*Y_im,  1, X_im, Y_re)
    return Z_re,Z_im 
def ComplexMultiply_backward(X_re, X_im, Y_re, Y_im, grad_Z_re, grad_Z_im):
    grad_X_re = torch.addcmul(grad_Z_re * Y_re,  1, grad_Z_im, Y_im)
    grad_X_im = torch.addcmul(grad_Z_im * Y_re, -1, grad_Z_re, Y_im)
    grad_Y_re = torch.addcmul(grad_Z_re * X_re,  1, grad_Z_im, X_im)
    grad_Y_im = torch.addcmul(grad_Z_im * X_re, -1, grad_Z_re, X_im)
    return grad_X_re,grad_X_im,grad_Y_re,grad_Y_im 
def forward(self, input):
        pos_mask = (input > 0).type_as(input)
        output = torch.addcmul(
        self.save_for_backward(input, output)
        return output 
def backward(self, grad_output):
        input, output = self.saved_tensors

        pos_mask_1 = (input > 0).type_as(grad_output)
        pos_mask_2 = (grad_output > 0).type_as(grad_output)
        grad_input = torch.addcmul(
                torch.zeros(input.size()).type_as(input), grad_output, pos_mask_1),

        return grad_input 
def _matmul(self, rhs):
        return torch.addcmul(self._lazy_tensor._matmul(rhs), self._diag_tensor._diag.unsqueeze(-1), rhs) 
def forward(self, *args):
        """ process input

            *args: (Tensor): string, string_len, string2, string2_len
                e.g. string (Tensor): [batch_size, dim], string_len (Tensor): [batch_size]

            Tensor: [batch_size, output_dim], [batch_size]
        return torch.addcmul(torch.zeros(args[0].size()).to('cuda'),1,args[0],args[2]),args[1] 
def forward(self, input):
        positive_mask = (input > 0).type_as(input)
        output = torch.addcmul(torch.zeros(input.size()).type_as(input), input, positive_mask)
        self.save_for_backward(input, output)
        return output 
def backward(self, grad_output):
        input, output = self.saved_tensors
        grad_input = None

        positive_mask_1 = (input > 0).type_as(grad_output)
        positive_mask_2 = (grad_output > 0).type_as(grad_output)
        grad_input = torch.addcmul(torch.zeros(input.size()).type_as(input),
                                   torch.addcmul(torch.zeros(input.size()).type_as(input), grad_output,
                                                 positive_mask_1), positive_mask_2)

        return grad_input 
