提问者:小点点

-1和1之间的随机数加和为0


对于R,如何生成n随机数x_1<位于-11之间的code>x_n,以及与0之和?

推广到另一个和和另一个范围怎么样?


共2个答案

匿名用户

这是第一个问题的简单解决方案。模拟介于 -1 和 1 之间的u_1、...、u_n。然后设置 x_1 = (u_1-u_n)/2, x_2 = (u_2-u_1)/2, x_3 = (u_3-u_2)/2, ..., x_n = (u_n-u_{n-1}/2

n <- 10L
u <- runif(n, -1, 1)
x <- c(u[1L]-u[n], diff(u)) / 2
sum(x)
summary(x)

像以前一样继续,但减去u_is/n,然后将 s/n 添加到x_i

s <- 3
n <- 10L
u <- runif(n, -1, 1) - s/n
x <- c(u[1L]-u[n], diff(u))/2 + s/n
sum(x)
summary(x)

前面的方法可以推广到情况a=-b

a <- -4; b <- 4; s <- 10
n <- 10L
u <- runif(n, -(b-a)/2, (b-a)/2) - s/n
x <- c(u[1L]-u[n], diff(u))/2 + s/n
sum(x)
summary(x)

Dirichlet重缩放算法解决了任何一对数字a的最后一个问题

此外,模拟向量< code>(x_1,...,x_n)在< code>(n-1)维流形上具有均匀分布< code>{sum x_i = s},< code>a

这是一个R实现,改编自Roger Stafford编写的Matlab实现(代码中给出了参考)。

# adapted from Roger Stafford's Matlab implementation
# Roger Stafford (2023). Random Vectors with Fixed Sum (https://www.mathworks.com/matlabcentral/fileexchange/9700-random-vectors-with-fixed-sum), MATLAB Central File Exchange. Retrieved March 23, 2023.
DRS <- function(n, s, a, b) {
  n <- as.integer(n)
  if(s < n*a || s > n*b || a >= b) {
    stop("Invalid parameters.")
  }
  # Rescale to a unit cube: 0 <= x(i) <= 1
  s <- (s - n*a) / (b - a)
  # Construct the transition probability table, t.
  #   t(i,j) will be used only in the region where j <= i + 1.
  k <- max(min(as.integer(floor(s)), n-1L), 0L) # Must have 0 <= k <= n-1
  s <- max(min(s, k+1L), k)                     # Must have k <= s <= k+1
  s1 <- s - (k:(k-n+1L))   # s1 will never be negative
  s2 <- ((k+n):(k+1L)) - s # s2 will never be negative
  w <- matrix(0, nrow = n, ncol = n+1L)
  w[1L, 2L] <- .Machine$double.xmax # Scale for full 'double' range
  t <- matrix(0, nrow = n-1L, ncol = n)
  tiny <- .Machine$double.eps # The smallest positive 'double' 
  for(i in 2L:n) {
    tmp1 <- w[i-1L, 2L:(i+1L)] * s1[1L:i] / i
    tmp2 <- w[i-1L, 1L:i] * s2[(n-i+1L):n] / i
    w[i, 2L:(i+1L)] <- tmp1 + tmp2
    tmp3 <- w[i, 2L:(i+1L)] + tiny               # In case tmp1 & tmp2 are both 0,
    tmp4 <- as.double(s2[(n-i+1L):n] > s1[1L:i]) #   then t is 0 on left & 1 on right
    t[i-1L, 1L:i] <- (tmp2 / tmp3) * tmp4 + (1 - tmp1 / tmp3) * (1 - tmp4)
  }
  # Derive the polytope volume v from the appropriate element in the bottom row of w.
  v <- n^(3/2) * (w[n, k+2L] / .Machine$double.xmax) * (b - a)^(n - 1L)
  # Now construct the vector x.
  x <- numeric(n)
  rt <- runif(n - 1L) # For random selection of simplex type
  rs <- runif(n - 1L) # For random location within a simplex
  j <- k + 1L # For indexing in the t table
  sm <- 0 # Start with sum zero
  pr <- 1 # Start with product 1
  for(i in (n-1L):1L) { # Work backwards in the t table
    e <- as.double(rt[n-i] <= t[i, j]) # Use rt to choose a transition
    sx <- rs[n-i] ^ (1L/i)             # Use rs to compute next simplex coordinate
    sm <- sm + (1 - sx) * pr * s / (i + 1L) # Update sum
    pr <- sx * pr                           # Update product
    x[n-i] <- sm + pr * e # Calculate x using simplex coordinates
    s <- s - e
    j <- j - e # Transition adjustment
  }
  x[n] <- sm + pr * s # Compute the last x
  # Randomly permute the order in x and rescale
  p <- order(runif(n)) # it is a random permutation
  a + (b - a) * x[p]
}

示例:

x <- DRS(n = 10L, s = 14, a = 1, b = 2)
sum(x)
summary(x)

有问题:这不是DRS - 请参阅给定链接中的幻灯片。DRS 通常允许x_i的不同边界。

我刚刚发现这个方法是在R包代理中实现的。

匿名用户

遵循与上一个解决方案相同的递归思想,我们可以通过首先生成带有更多边界约束的r来避免重复,这可能会加快速度并提高效率

f <- function(s, n, a, b) {
  if (s < n * a || s > n * b) {
    stop("Invalid parameters.")
  }
  if (n == 1) {
    return(s)
  }
  r <- runif(1, max(a, s - (n - 1) * b), min(b, s - (n - 1) * a))
  c(r, Recall(s - r, n - 1, a, b))
}

我们可以看到

> (v <- f(s = 60, n = 30, a = 1, b = 3))
 [1] 1.544962 1.229845 2.013064 1.510149 2.933672 1.782947 1.650229 2.700521
 [9] 1.151468 1.758759 2.035019 1.355591 2.731922 2.918394 2.288166 2.198345
[17] 1.313646 2.312720 1.232810 1.591426 1.020105 2.788073 1.208734 2.929171
[25] 1.397976 2.044319 1.593190 2.961647 2.849886 2.953244

> summary(v)
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max.
  1.002   1.387   2.126   2.000   2.585   2.892

> length(v)
[1] 30

> sum(v)
[1] 60

我想你可以尝试一些递归方法,如下所示(我认为我的方法没有优化,只是给你一些提示)

(警告:对于较大的ns,这可能会非常低效)

f <- function(s, n, a = 1, b = 2) {
  if (s < n * a || s > n * b) {
    stop("Invalid parameters.")
  }
  if (n == 1) {
    return(s)
  }
  repeat {
    r <- runif(1, a, b)
    if (s - r >= (n - 1) * a && s - r <= (n - 1) * b) break
  }
  c(r, f(s - r, n - 1))
}

例如,使用s=25n=20,我们可以获得

> (v <- f(s = 25, n = 20))
 [1] 1.901342 1.153576 1.280439 1.920860 1.401054 1.245442 1.227995 1.340637
 [9] 1.051620 1.958662 1.360496 1.001955 1.087513 1.006621 1.002153 1.008432
[17] 1.033762 1.004273 1.009684 1.003484

> length(v)
[1] 20

> sum(v)
[1] 25