Python源码示例:torch.clone()

示例1
def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            weight_decay = group['weight_decay']
            momentum = group['momentum']
            dampening = group['dampening']
            nesterov = group['nesterov']

            for p in group['params']:
                if p.grad is None:
                    continue
                d_p = p.grad.data
                if weight_decay != 0:
                    d_p.add_(weight_decay, p.data)
                if momentum != 0:
                    param_state = self.state[p]
                    if 'momentum_buffer' not in param_state:
                        buf = param_state['momentum_buffer'] = torch.clone(d_p).detach()
                    else:
                        buf = param_state['momentum_buffer']
                        buf.mul_(momentum).add_(1 - dampening, d_p)
                    if nesterov:
                        d_p = d_p.add(momentum, buf)
                    else:
                        d_p = buf

                p.data.add_(-group['lr'], d_p)

        return loss 
示例2
def empty(shape, dtype=types.float32, split=None, device=None, comm=None, order="C"):
    """
    Returns a new uninitialized array of given shape and data type. May be allocated split up across multiple
    nodes along the specified axis.

    Parameters
    ----------
    shape : int or sequence of ints
        Desired shape of the output array, e.g. 1 or (1, 2, 3,).
    dtype : ht.dtype
        The desired HeAT data type for the array, defaults to ht.float32.
    split: int, optional
        The axis along which the array is split and distributed, defaults to None (no distribution).
    device : str, ht.Device or None, optional
        Specifies the device the tensor shall be allocated on, defaults to None (i.e. globally set default device).
    comm: Communication, optional
        Handle to the nodes holding distributed parts or copies of this tensor.
    order: str, optional
        Options: 'C' or 'F'. Specifies the memory layout of the newly created tensor. Default is order='C', meaning the array
        will be stored in row-major order (C-like). If order=‘F’, the array will be stored in column-major order (Fortran-like).
        Raises NotImplementedError for NumPy options 'K' and 'A'.
        #TODO: implement 'K' option when torch.clone() fix to preserve memory layout is released.

    Returns
    -------
    out : ht.DNDarray
        Array of zeros with given shape, data type and node distribution.

    Examples
    --------
    >>> ht.empty(3)
    tensor([ 0.0000e+00, -2.0000e+00,  3.3113e+35])

    >>> ht.empty(3, dtype=ht.int)
    tensor([ 0.0000e+00, -2.0000e+00,  3.3113e+35])

    >>> ht.empty((2, 3,))
    tensor([[ 0.0000e+00, -2.0000e+00,  3.3113e+35],
            [ 3.6902e+19,  1.2096e+04,  7.1846e+22]])
    """
    return __factory(shape, dtype, split, torch.empty, device, comm, order) 
示例3
def zeros_like(a, dtype=None, split=None, device=None, comm=None, order="C"):
    """
    Returns a new array filled with zeros with the same type, shape and data distribution of given object. Data type and
    data distribution strategy can be explicitly overriden.

    Parameters
    ----------
    a : object
        The shape and data-type of 'a' define these same attributes of the returned array.
    dtype : ht.dtype, optional
        Overrides the data type of the result.
    split: int, optional
        The axis along which the array is split and distributed, defaults to None (no distribution).
    device : str, ht.Device or None, optional
        Specifies the device the tensor shall be allocated on, defaults to None (i.e. globally set default device).
    comm: Communication, optional
        Handle to the nodes holding distributed parts or copies of this tensor.
    order: str, optional
        Options: 'C' or 'F'. Specifies the memory layout of the newly created tensor. Default is order='C', meaning the array
        will be stored in row-major order (C-like). If order=‘F’, the array will be stored in column-major order (Fortran-like).
        Raises NotImplementedError for NumPy options 'K' and 'A'.
        #TODO: implement 'K' option when torch.clone() fix to preserve memory layout is released.

    Returns
    -------
    out : ht.DNDarray
        Array of zeros with the same shape, type and split axis as 'a' unless overriden.

    Examples
    --------
    >>> x = ht.ones((2, 3,))
    >>> x
    tensor([[1., 1., 1.],
            [1., 1., 1.]])

    >>> ht.zeros_like(x)
    tensor([[0., 0., 0.],
            [0., 0., 0.]])
    """
    return __factory_like(a, dtype, split, zeros, device, comm, order=order) 
示例4
def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
           closure (callable, optional): A closure that reevaluates the model
               and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            momentum = group["momentum"]
            dampening = group["dampening"]
            nesterov = group["nesterov"]

            for p in group["params"]:
                if p.grad is None:
                    continue
                d_p = p.grad.data
                if momentum != 0:
                    param_state = self.state[p]
                    if "momentum_buffer" not in param_state:
                        buf = param_state["momentum_buffer"] = torch.clone(d_p).detach()
                    else:
                        buf = param_state["momentum_buffer"]
                        buf.mul_(momentum).add_(1 - dampening, d_p)
                    if nesterov:
                        d_p = d_p.add(momentum, buf)
                    else:
                        d_p = buf
                # Apply weight decay. THE ONLY DIFFERENCE IS HERE
                if group["weight_decay"] != 0:
                    p.data.mul_(1 - group["lr"] * group["weight_decay"])
                # Apply momentum
                p.data.add_(-group["lr"], d_p)
        return loss 
示例5
def clone_parameters(param_list):
    return [p.clone() for p in param_list] 
示例6
def detach_module(module):
    """

    [[Source]](https://github.com/learnables/learn2learn/blob/master/learn2learn/utils.py)

    **Description**

    Detaches all parameters/buffers of a previously cloned module from its computational graph.

    Note: detach works in-place, so it does not return a copy.

    **Arguments**

    * **module** (Module) - Module to be detached.

    **Example**

    ~~~python
    net = nn.Sequential(Linear(20, 10), nn.ReLU(), nn.Linear(10, 2))
    clone = clone_module(net)
    detach_module(clone)
    error = loss(clone(X), y)
    error.backward()  # Gradients are back-propagate on clone, not net.
    ~~~
    """
    if not isinstance(module, torch.nn.Module):
        return
    # First, re-write all parameters
    for param_key in module._parameters:
        if module._parameters[param_key] is not None:
            detached = module._parameters[param_key].detach_()

    # Second, handle the buffers if necessary
    for buffer_key in module._buffers:
        if module._buffers[buffer_key] is not None and \
                module._buffers[buffer_key].requires_grad:
            module._buffers[buffer_key] = module._buffers[buffer_key].detach_()

    # Then, recurse for each submodule
    for module_key in module._modules:
        detach_module(module._modules[module_key]) 
示例7
def clone_distribution(dist):
    # TODO: This function was never tested.
    clone = copy.deepcopy(dist)

    for param_key in clone.__dict__:
        item = clone.__dict__[param_key]
        if isinstance(item, th.Tensor):
            if item.requires_grad:
                clone.__dict__[param_key] = dist.__dict__[param_key].clone()
        elif isinstance(item, th.nn.Module):
            clone.__dict__[param_key] = clone_module(dist.__dict__[param_key])
        elif isinstance(item, th.Distribution):
            clone.__dict__[param_key] = clone_distribution(dist.__dict__[param_key])

    return clone 
示例8
def nnef_copy_n(x, times):
    # type: (torch.Tensor, int)->List[torch.Tensor]
    return [x.clone() for _ in range(times)] 
示例9
def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """

        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            weight_decay = group['weight_decay']
            momentum = group['momentum']
            dampening = group['dampening']
            nesterov = group['nesterov']
            # uncertainty = group['uncertaintsy']

            for p in group['params']:
                if p.grad is None:
                    continue
                d_p = p.grad.data

                if weight_decay != 0:
                    d_p.add_(weight_decay, p.data)
                if momentum != 0:
                    param_state = self.state[p]
                    if 'momentum_buffer' not in param_state:
                        buf = param_state['momentum_buffer'] = torch.clone(d_p).detach()
                    else:
                        buf = param_state['momentum_buffer']
                        buf.mul_(momentum).add_(1 - dampening, d_p)
                    if nesterov:
                        d_p = d_p.add(momentum, buf)
                    else:
                        d_p = buf

                if isinstance(group['lr'], torch.Tensor):
                    p.data = p.data + torch.mul(-group['lr'].data, d_p)
                else:
                    p.data.add_(-group['lr'], d_p)

        return loss 
示例10
def __factory_like(a, dtype, split, factory, device, comm, order="C", **kwargs):
    """
    Abstracted '...-like' factory function for HeAT tensor initialization

    Parameters
    ----------
    a : object
        The shape and data-type of 'a' define these same attributes of the returned array.
    dtype : ht.dtype
        The desired HeAT data type for the array, defaults to ht.float32.
    split: int, optional
        The axis along which the array is split and distributed, defaults to None (no distribution).
    factory : function
        Function that creates a HeAT tensor.
    device : str or None
        Specifies the device the tensor shall be allocated on, defaults to None (i.e. globally set default device).
    comm: Communication
        Handle to the nodes holding distributed parts or copies of this tensor.
    order: str, optional
        Options: 'C' or 'F'. Specifies the memory layout of the newly created tensor. Default is order='C', meaning the array
        will be stored in row-major order (C-like). If order=‘F’, the array will be stored in column-major order (Fortran-like).
        Raises NotImplementedError for NumPy options 'K' and 'A'.
        #TODO: implement 'K' option when torch.clone() fix to preserve memory layout is released.


    Returns
    -------
    out : ht.DNDarray
        Array of ones with given shape, data type and node distribution that is like a
    """
    # determine the global shape of the object to create
    # attempt in this order: shape property, length of object or default shape (1,)
    try:
        shape = a.shape
    except AttributeError:
        try:
            shape = (len(a),)
        except TypeError:
            shape = (1,)

    # infer the data type, otherwise default to float32
    if dtype is None:
        try:
            dtype = types.heat_type_of(a)
        except TypeError:
            dtype = types.float32

    # infer split axis
    if split is None:
        try:
            split = a.split if not isinstance(a, str) else None
        except AttributeError:
            # do not split at all
            pass

    # use the default communicator, if not set
    comm = sanitize_comm(comm)

    return factory(shape, dtype=dtype, split=split, device=device, comm=comm, order=order, **kwargs) 
示例11
def ones(shape, dtype=types.float32, split=None, device=None, comm=None, order="C"):
    """
    Returns a new array of given shape and data type filled with one values. May be allocated split up across multiple
    nodes along the specified axis.

    Parameters
    ----------
    shape : int or sequence of ints
        Desired shape of the output array, e.g. 1 or (1, 2, 3,).
    dtype : ht.dtype
        The desired HeAT data type for the array, defaults to ht.float32.
    split : int, optional
        The axis along which the array is split and distributed, defaults to None (no distribution).
    device : str, ht.Device or None, optional
        Specifies the device the tensor shall be allocated on, defaults to None (i.e. globally set default device).
    comm : Communication, optional
        Handle to the nodes holding distributed parts or copies of this tensor.
    order: str, optional
        Options: 'C' or 'F'. Specifies the memory layout of the newly created tensor. Default is order='C', meaning the array
        will be stored in row-major order (C-like). If order=‘F’, the array will be stored in column-major order (Fortran-like).
        Raises NotImplementedError for NumPy options 'K' and 'A'.
        #TODO: implement 'K' option when torch.clone() fix to preserve memory layout is released.


    Returns
    -------
    out : ht.DNDarray
        Array of ones with given shape, data type and node distribution.

    Examples
    --------
    >>> ht.ones(3)
    tensor([1., 1., 1.])

    >>> ht.ones(3, dtype=ht.int)
    tensor([1, 1, 1])

    >>> ht.ones((2, 3,))
    tensor([[1., 1., 1.],
            [1., 1., 1.]])
    """
    return __factory(shape, dtype, split, torch.ones, device, comm, order) 
示例12
def zeros(shape, dtype=types.float32, split=None, device=None, comm=None, order="C"):
    """
    Returns a new array of given shape and data type filled with zero values. May be allocated split up across multiple
    nodes along the specified axis.

    Parameters
    ----------
    shape : int or sequence of ints
        Desired shape of the output array, e.g. 1 or (1, 2, 3,).
    dtype : ht.dtype
        The desired HeAT data type for the array, defaults to ht.float32.
    split: int, optional
        The axis along which the array is split and distributed, defaults to None (no distribution).
    device : str, ht.Device or None, optional
        Specifies the device the tensor shall be allocated on, defaults to None (i.e. globally set default device).
    comm: Communication, optional
        Handle to the nodes holding distributed parts or copies of this tensor.
    order: str, optional
        Options: 'C' or 'F'. Specifies the memory layout of the newly created tensor. Default is order='C', meaning the array
        will be stored in row-major order (C-like). If order=‘F’, the array will be stored in column-major order (Fortran-like).
        Raises NotImplementedError for NumPy options 'K' and 'A'.
        #TODO: implement 'K' option when torch.clone() fix to preserve memory layout is released.


    Returns
    -------
    out : ht.DNDarray
        Array of zeros with given shape, data type and node distribution.

    Examples
    --------
    >>> ht.zeros(3)
    tensor([0., 0., 0.])

    >>> ht.zeros(3, dtype=ht.int)
    tensor([0, 0, 0])

    >>> ht.zeros((2, 3,))
    tensor([[0., 0., 0.],
            [0., 0., 0.]])
    """
    return __factory(shape, dtype, split, torch.zeros, device, comm, order=order) 
示例13
def sanitize_memory_layout(x, order="C"):
    """
    Return the given object with memory layout as defined below. The default memory distribution is assumed.

    Parameters
    -----------

    x: torch.tensor
        Input data

    order: str, optional.
        Default is 'C' as in C-like (row-major) memory layout. The array is stored first dimension first (rows first if ndim=2).
        Alternative is 'F', as in Fortran-like (column-major) memory layout. The array is stored last dimension first (columns first if ndim=2).
    """
    if order == "K":
        raise NotImplementedError(
            "Internal usage of torch.clone() means losing original memory layout for now. \n Please specify order='C' for row-major, order='F' for column-major layout."
        )
    if x.ndim < 2 or x.numel() == 0:
        # do nothing
        return x
    dims = list(range(x.ndim))
    stride = torch.tensor(x.stride())
    # since strides can get a bit wonky with operations like transpose
    #   we should assume that the tensors are row major or are distributed the default way
    sdiff = stride[1:] - stride[:-1]
    column_major = all(sdiff >= 0)
    row_major = True if not column_major else False
    if (order == "C" and row_major) or (order == "F" and column_major):
        # do nothing
        return x
    elif (order == "C" and column_major) or (order == "F" and row_major):
        dims = tuple(reversed(dims))
        y = torch.empty_like(x)
        permutation = x.permute(dims).contiguous()
        y = y.set_(
            permutation.storage(),
            x.storage_offset(),
            x.shape,
            tuple(reversed(permutation.stride())),
        )
        return y
    else:
        raise ValueError(
            "combination of order and layout not permitted, order: {} column major: {} row major: {}".format(
                order, column_major, row_major
            )
        ) 
示例14
def step(self, grad_quantizer, grad_clip, closure=None):
        """Performs a single optimization step.
        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            weight_decay = group['weight_decay']
            momentum = group['momentum']
            dampening = group['dampening']
            nesterov = group['nesterov']

            for p in group['params']:
                if p.grad is None:
                    continue
                d_p = p.grad.data

                if weight_decay != 0:
                    d_p.add_(weight_decay, p.data)
                if momentum != 0:
                    param_state = self.state[p]
                    if 'momentum_buffer' not in param_state:
                        buf = param_state['momentum_buffer'] = torch.clone(d_p).detach()
                    else:
                        buf = param_state['momentum_buffer']
                        buf.mul_(momentum).add_(1 - dampening, d_p)
                    if nesterov:
                        d_p = d_p.add(momentum, buf)
                    else:
                        d_p = buf

                # quantize gradient for both weights and biases
                d_p = grad_quantizer(d_p, group['lr'])
                p.data.add_(-d_p)
                p.data = grad_clip(p.data)
                # p.data.add_(-group['lr'], d_p)

        return loss 
示例15
def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            weight_decay = group['weight_decay']
            momentum = group['momentum']
            dampening = group['dampening']
            nesterov = group['nesterov']

            for p in group['params']:
                if p.grad is None:
                    continue
                d_p = p.grad.data
                if weight_decay != 0:
                    d_p.add_(weight_decay, p.data)
                if momentum != 0:
                    param_state = self.state[p]
                    if 'momentum_buffer' not in param_state:
                        buf = param_state['momentum_buffer'] = torch.clone(d_p).detach()
                    else:
                        buf = param_state['momentum_buffer']
                        buf.mul_(momentum).add_(1 - dampening, d_p)
                    if nesterov:
                        d_p = d_p.add(momentum, buf)
                    else:
                        d_p = buf

                # LARS
                p_norm = p.data.pow(2).sum().sqrt()
                update_norm = d_p.pow(2).sum().sqrt()
                # Compute the local LR
                if p_norm == 0 or update_norm == 0:
                    local_lr = 1
                else:
                    local_lr = p_norm / update_norm

                p.data.add_(-group['lr'] * local_lr, d_p)

        return loss 
示例16
def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model
                and returns the loss.
        """
        loss = None
        if closure is not None:
            loss = closure()

        for group in self.param_groups:
            weight_decay = group['weight_decay']
            momentum = group['momentum']
            dampening = group['dampening']
            nesterov = group['nesterov']

            for p in group['params']:
                if p.grad is None:
                    continue
                d_p = p.grad.data
                if weight_decay != 0:
                    d_p.add_(weight_decay, p.data)
                if momentum != 0:
                    param_state = self.state[p]
                    if 'momentum_buffer' not in param_state:
                        buf = param_state['momentum_buffer'] = torch.clone(d_p).detach()
                    else:
                        buf = param_state['momentum_buffer']
                        buf.mul_(momentum).add_(1 - dampening, d_p)
                    if nesterov:
                        d_p = d_p.add(momentum, buf)
                    else:
                        d_p = buf

                # LARS
                p_norm = p.data.pow(2).sum().sqrt()
                update_norm = d_p.pow(2).sum().sqrt()
                # Compute the local LR
                if p_norm == 0 or update_norm == 0:
                    local_lr = 1
                else:
                    local_lr = p_norm / update_norm

                p.data.add_(-group['lr'] * local_lr, d_p)

        return loss 
示例17
def wpe_v6(Y, taps=10, delay=3, iterations=3, psd_context=0, statistics_mode='full'):
    """
    Short of wpe_v7 with no extern references.
    Applicable in for-loops.

    >>> T = np.random.randint(100, 120)
    >>> D = np.random.randint(2, 8)
    >>> K = np.random.randint(3, 5)
    >>> delay = np.random.randint(0, 2)
    
    # Real test:
    >>> Y = np.random.normal(size=(D, T))
    >>> from nara_wpe import wpe as np_wpe
    >>> desired = np_wpe.wpe_v6(Y, K, delay, statistics_mode='full')
    >>> actual = wpe_v6(torch.tensor(Y), K, delay, statistics_mode='full').numpy()
    >>> np.testing.assert_allclose(actual, desired, atol=1e-6)

    # Complex test:
    >>> Y = np.random.normal(size=(D, T)) + 1j * np.random.normal(size=(D, T))
    >>> from nara_wpe import wpe as np_wpe
    >>> desired = np_wpe.wpe_v6(Y, K, delay, statistics_mode='full')
    >>> actual = wpe_v6(torch.tensor(Y), K, delay, statistics_mode='full').numpy()
    >>> np.testing.assert_allclose(actual, desired, atol=1e-6)
    """

    if statistics_mode == 'full':
        s = Ellipsis
    elif statistics_mode == 'valid':
        s = (Ellipsis, slice(delay + taps - 1, None))
    else:
        raise ValueError(statistics_mode)

    X = torch.clone(Y)
    Y_tilde = build_y_tilde(Y, taps, delay)
    for iteration in range(iterations):
        inverse_power = get_power_inverse(X, psd_context=psd_context)
        Y_tilde_inverse_power = Y_tilde * inverse_power[..., None, :]
        R = torch.matmul(Y_tilde_inverse_power[s], hermite(Y_tilde[s]))
        P = torch.matmul(Y_tilde_inverse_power[s], hermite(Y[s]))
        # G = _stable_solve(R, P)
        G, _ = torch.solve(P, R)
        X = Y - torch.matmul(hermite(G), Y_tilde)

    return X 
示例18
def step(self, closure=None):
        """Performs a single optimization step.

        Arguments:
            closure (callable, optional): A closure that reevaluates the model and returns the loss.
        """
        loss = None
        if closure is not None:
            with torch.enable_grad():
                loss = closure()

        for group in self.param_groups:
            weight_decay = group['weight_decay']
            momentum = group['momentum']
            dampening = group['dampening']
            nesterov = group['nesterov']

            for p in group['params']:
                if p.grad is None:
                    continue
                d_p = p.grad.data
                if weight_decay != 0:
                    d_p.add_(p.data, alpha=weight_decay)
                if momentum != 0:
                    param_state = self.state[p]
                    if 'momentum_buffer' not in param_state:
                        buf = param_state['momentum_buffer'] = torch.clone(d_p).detach()
                    else:
                        buf = param_state['momentum_buffer']
                        buf.mul_(momentum).add_(d_p, alpha=1 - dampening)
                    if nesterov:
                        d_p = d_p.add(buf, alpha=momentum)
                    else:
                        d_p = buf

                # LARS
                p_norm = p.data.pow(2).sum().sqrt()
                update_norm = d_p.pow(2).sum().sqrt()
                # Compute the local LR
                if p_norm == 0 or update_norm == 0:
                    local_lr = 1
                else:
                    local_lr = p_norm / update_norm

                p.data.add_(d_p, alpha=-group['lr'] * local_lr)

        return loss