SOCR ≫ TCIU Website ≫ TCIU GitHub ≫

1 Mathematical Foundation

\[Y_{j,k} = S(t_k, \theta_{j,k}) + \epsilon_{j,k} = \sum_n f_n(t_k)e^{in\theta} + \epsilon,\] where \(\theta_{j,k} \sim \phi(·|t_k), \epsilon \sim N(0, \sigma^2)\).

The Goal is to reconstruct the kime-surface (2D manifold) from a batch of observed (repeated measurement) time-courses under strictly controlled environmental and experimental conditions

\[S(t_k, \theta) = \sum_n f_n(t_k)e^{in\theta}\] from observed \(Y_{j,k}\), i.e., predict \(f_n(t_k), \phi(\theta|t_k), \sigma^2.\)

In practice, we will use the following Data Generation protocol for tesing/validation of the KPT reconstruction algorithm:

  • Number of Harmonic coefficient: 3
  • Sample Size : 50
  • Time Steps: 20
  • T Grid: 0-1, length.out = 20
  • \(\theta\) Grid: \(-\pi \to \pi\), length.out = 50

1.1 KPT Algorithm

Using expectation maximization (EM algorithm), we iteratively estimate the parameters.

  • Initialization

    • \(f_n\): We first set all \(f_n\) equals to 0
    • \(\phi_0\): Let \(\phi\) be uniform distribution
  • E Step

    • For each \(t_k\), sample L angles \(\theta^{(l)}\) from current distribution \(\phi^{(old)}(\theta|t_k)\)

    • Compute post distribution

    \[p(\theta|Y_{j,k}, t_k) \propto p(Y_{j,k}|\theta, t_k) \phi^{(old)}(\theta|t_k)\]

    • Calculate \(p(Y_{j,k}|\theta, t_k)\)

    \[\hat S_k^{(l)} = \sum_n \hat f_n^{(l)}(t_k) e^{in\theta^{(l)}}\]

    \[p(Y_{j,k}|\theta^{(l)},t_k) = \frac{1}{\sqrt{2\pi \sigma^2}}e^{-\frac{(Y_{j,k} - R(\hat S^{(l)}(t_k, \theta^{(l)})))^2}{2\sigma^2}}\]

    • Calculate Weight for each observation

    \[w_{j,k}^{(l)} \propto e^{-\frac{1}{2\sigma^2}(Y_{j,k} - \hat S(t_k, \theta^{(l)}))^2}\]

  • M step

    • Update \(\hat f_n\): Ridge regression + weighted OLS is used in the estimation

    \[\min_{f(t_k)} \sum_{j = 1}^N \sum_{l = 1}^L w_{j,k}^{(l)}\left(Y_{j,k} - R\left (\sum_n f_n(t_k)e^{in\theta^{(l)}}\right)\right)^2 + \lambda \sum_n \left (f_n(t_k)\right)^2\]

    • Update \(\phi\): Nonparametric Estimation and time smoothing

      • We got from E step L angles \(\theta_{j,k}^{(l)}\) from the old distribution \(\phi^{(old)}\)

      • For each angel we have a weight \(w_{j,k}^{(l)}\)

    • Build fixed \(\theta\) grid and update \(\phi\)

    \[{\phi}^{new}(\theta_m|t_k) = \frac{\sum_{j,l}w_{j,k}^{(l)} 1({\theta_{j,k}^{(l)} \in bin_m})}{\sum_{j,l}w_{j,k}^{(l)}} \frac{1}{d\theta}\]

    • Smoothing along time axis \(y_k = \log \tilde{\phi}(\theta_m|t_k)\)

    \[\min_g \sum_{k = 1}^{Tg}(g_k - y_k)^2 + \lambda_t g^TD^TDg\]

  • Normalization

    \[\phi(\theta_m, t_k) = \frac{\exp(g_{m,k})}{\sum_{m'}\exp(g_{m',k}) d\theta}\]

  • Update \(\sigma^2\)

    \[Q(\sigma^2) = \sum_{k = 1}^{T_g} \sum_{j = 1}^{N_g}\sum_{l = 1}^L w_{j,k}^{(l)}log(\frac{1}{\sqrt{2\pi}\sigma}e^{- \frac{(Y_{j,k} - R(S(t_k, \theta_{j,k}^{(l)}))^2}{2\sigma^2}})\]

  • Set derivative \(= 0\)

    \[\sigma^{2(new)} =\frac{\sum_{j,k,l}w_{j,k}^{(l)}(Y_{j,k} - \mu_{jkl})^2}{\sum_{j,k,l}w_{j,k}^{(l)}}.\]

  • Repeat, until convergence criterion is met.

2 KPT Algorithm Implementation and Experimental Validation

2.1 Core functions

# non-parametric phi updation + time smoothing
update_phi_nonparam_smooth <- function(theta_samples, weights, Tg, M_theta=50, lambda_t=10) {
  theta_grid <- seq(0, 2*pi, length.out=M_theta)
  dtheta <- theta_grid[2] - theta_grid[1]
  
  D <- diff(diag(Tg), differences=1)
  DtD <- crossprod(D)
  
  emp_phi <- matrix(0, nrow=M_theta, ncol=Tg)
  for (k in 1:Tg) {
    for (l in 1:length(theta_samples[[k]])) {
      idx <- which.min(abs(theta_grid - theta_samples[[k]][l]))
      emp_phi[idx, k] <- emp_phi[idx, k] + weights[[k]][l]
    }
    emp_phi[, k] <- pmax(emp_phi[, k], 1e-8)
    emp_phi[, k] <- emp_phi[, k] / sum(emp_phi[, k]) / dtheta
  }
  
  log_emp <- log(emp_phi)
  log_phi <- matrix(0, M_theta, Tg)
  A <- diag(Tg) + lambda_t * DtD
  for (m in 1:M_theta) {
    log_phi[m, ] <- solve(A, log_emp[m, ])
  }
  
  phi <- exp(log_phi)
  for (k in 1:Tg) {
    phi[, k] <- phi[, k] / sum(phi[, k]) / dtheta
  }
  
  list(phi=phi, theta_grid=theta_grid)
}

#f_n(t_k) update (with L2  regularization)
update_f <- function(Y_obs, theta_samples, weights, N_harm, lambda_f=0.01) {
  Tg <- ncol(Y_obs)
  f_hat <- matrix(0, nrow=N_harm, ncol=Tg)
  for (k in 1:Tg) {
    X <- outer(theta_samples[[k]], 0:(N_harm-1), function(th, n) exp(1i * n * th))
    if (k %% 10==0) cat("k=", k, " dim(X)=", dim(X), " length(weights)=", length(weights[[k]]), "\n")

    W <- diag(weights[[k]])
    A <- Conj(t(X)) %*% W %*% X + lambda_f * diag(N_harm)
    b <- Conj(t(X)) %*% W %*% Y_obs[, k]
    f_hat[, k] <- Re(solve(A, b))
  }
  f_hat
}

# update_f <- function(Y_obs, theta_samples, weights, N_harm, lambda_f=0.01) {
#   Tg <- ncol(Y_obs)
#   f_hat <- matrix(0, nrow=N_harm, ncol=Tg)
#   for (k in 1:Tg) {
#     th_vec <- as.numeric(theta_samples[[k]])
#     w_vec <- as.numeric(weights[[k]])
#     X <- outer(th_vec, 0:(N_harm-1), function(th, n) exp(1i * n * th))
#     W <- diag(w_vec)
#     A <- Conj(t(X)) %*% W %*% X + lambda_f * diag(N_harm)
#     b <- Conj(t(X)) %*% W %*% Y_obs[, k]
#     f_hat[, k] <- Re(solve(A, b))
#   }
#   f_hat
# }



# weight calculation , given f_hat, calculate weight for each theta
compute_weights <- function(Y_obs, theta_samples, f_hat, sigma2) {
  Tg <- ncol(Y_obs)
  N_harm <- nrow(f_hat)
  weights <- vector("list", Tg)
  for (k in 1:Tg) {
    S_hat <- sapply(theta_samples[[k]], function(th) sum(f_hat[, k] * exp(1i * (0:(N_harm-1)) * th)))
    w <- exp(-(Y_obs[, k] - Re(S_hat))^2 / (2 * sigma2))
    w <- w / sum(w)
    weights[[k]] <- w
  }
  weights
}

# prediction surfaces vs true surfaces
plot_surface_comparison <- function(t_grid, theta_grid, f_hat, Y_obs, sigma2) {
  N_harm <- nrow(f_hat)
  S_hat <- matrix(0, nrow=length(theta_grid), ncol=length(t_grid))
  for (k in 1:length(t_grid)) {
    for (i in 1:length(theta_grid)) {
      S_hat[i, k] <- Re(sum(f_hat[, k] * exp(1i * (0:(N_harm-1)) * theta_grid[i])))
    }
  }
  
  # true surface:simple kernel smoother
  Y_surface <- matrix(0, nrow=length(theta_grid), ncol=length(t_grid))
  for (k in 1:length(t_grid)) {
    for (i in 1:length(theta_grid)) {
      kern <- dnorm(theta_grid[i] - runif(nrow(Y_obs), 0, 2*pi), sd=0.3)
      Y_surface[i, k] <- mean(Y_obs[, k]) # simple version
    }
  }
  
  p1 <- plot_ly(x=t_grid, y=theta_grid, z=S_hat,
                type="surface", colorscale="Viridis", opacity = 0.5) %>%
    layout(scene = list(
      xaxis = list(title = "time"),
      yaxis = list(title = "θ"),
      zaxis = list(title = "Kimesurface Intensity")
    ))
  
  p2 <- plot_ly(x=t_grid, y=theta_grid, z=Y_surface,
                type="surface", colorscale="Plasma") %>%
    layout(scene = list(
      xaxis = list(title = "time"),
      yaxis = list(title = "θ"),
      zaxis = list(title = "Kimesurface Intensity")
    ))
  
  subplot(p1, p2, nrows=1) %>% 
    layout(scene = list(domain = list(x = c(0, 0.5), y = c(0, 1))),
           scene2 = list(domain = list(x = c(0.5, 1.0), y = c(0, 1))),
           width = 1200,  # 50% larger than default 600
           height = 900) # 50% larger than default 400
    # %>% layout(width = 1200, height = 600)
}

# OLD version
# plot_surface_comparison <- function(t_grid, theta_grid, f_hat, Y_obs, sigma2) {
#   N_harm <- nrow(f_hat)
#   S_hat <- matrix(0, nrow=length(theta_grid), ncol=length(t_grid))
#   for (k in 1:length(t_grid)) {
#     for (i in 1:length(theta_grid)) {
#       S_hat[i, k] <- Re(sum(f_hat[, k] * exp(1i * (0:(N_harm-1)) * theta_grid[i])))
#     }
#   }
#   
#   # true surface:simple kernel smoother
#   Y_surface <- matrix(0, nrow=length(theta_grid), ncol=length(t_grid))
#   for (k in 1:length(t_grid)) {
#     for (i in 1:length(theta_grid)) {
#       kern <- dnorm(theta_grid[i] - runif(nrow(Y_obs), 0, 2*pi), sd=0.3)
#       Y_surface[i, k] <- mean(Y_obs[, k]) # simple version
#     }
#   }
#   
#   p1 <- plot_ly(x=t_grid, y=theta_grid, z=S_hat,
#                 type="surface", colorscale="Viridis") %>%
#     layout(title= "S_hat(t, theta)")
#   
#   p2 <- plot_ly(x=t_grid, y=theta_grid, z=Y_surface,
#                 type="surface", colorscale="Plasma") %>%
#     layout(title= "Y(t,theta)")
#   
#   subplot(p1, p2, nrows=1) %>% 
#      layout(width = 1200, height = 600)
# 
# }

set.seed(123)
Tg <- 20
Ng <- 50
N_harm <- 3
sigma2 <- 0.05

t_grid <- seq(0, 1, length.out=Tg)
theta_grid <- seq(0, 2*pi, length.out=50)
dtheta <- theta_grid[2] - theta_grid[1]

# 1. true y and true observations
f_true <- matrix(0, nrow=N_harm, ncol=Tg)
for (n in 1:N_harm) {
  f_true[n, ] <- sin(2*pi*n*t_grid) * 0.5^(n-1)
}

Y_obs <- matrix(0, nrow=Ng, ncol=Tg)
for (k in 1:Tg) {
  theta_samples_true <- runif(Ng, 0, 2*pi)
  S_val <- sapply(theta_samples_true, function(th) sum(f_true[, k] * exp(1i * (0:(N_harm-1)) * th)))
  Y_obs[, k] <- Re(S_val) + rnorm(Ng, 0, sqrt(sigma2))
}

# 2. initialization of the  parameters
f_hat <- matrix(0, nrow=N_harm, ncol=Tg)
phi <- matrix(1/(2*pi), nrow=length(theta_grid), ncol=Tg)  # uniform

L <- Ng # number of samples

# 3. sample theta from non-parametric distribution phi 
sample_theta_from_phi <- function(phi, theta_grid, L) {
  apply(phi, 2, function(p) sample(theta_grid, size=L, replace=TRUE, prob=p))
}

# 4. EM iteration
n_iter <- 15
theta_samples <- sample_theta_from_phi(phi, theta_grid, L)
weights <- lapply(1:Tg, function(k) rep(1/L, L))

for (it in 1:n_iter) {
  if (it %% 10==0) cat("Iter:", it, "\n")
  
  # E step:first sample theta,then calculate weight
  theta_samples <- sample_theta_from_phi(phi, theta_grid, L)
  theta_samples_list <- lapply(1:Tg, function(k) theta_samples[, k])
  weights <- compute_weights(Y_obs, theta_samples_list, f_hat, sigma2)
  
  # M step:update parameters
  f_hat <- update_f(Y_obs, theta_samples_list, weights, N_harm, lambda_f=0.01)
  phi_res <- update_phi_nonparam_smooth(theta_samples_list, weights, Tg, M_theta=length(theta_grid), lambda_t=5)
  phi <- phi_res$phi
  theta_grid <- phi_res$theta_grid
}
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50 
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50 
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50 
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50 
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50 
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50 
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50 
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50 
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50 
## Iter: 10 
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50 
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50 
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50 
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50 
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50 
## k= 10  dim(X)= 50 3  length(weights)= 50 
## k= 20  dim(X)= 50 3  length(weights)= 50

2.2 3D Display functions

plot_surface_comparison(t_grid, theta_grid, f_hat, Y_obs, sigma2)
# New example (fn + Von mises distribution)
# Initialization of True Surface

set.seed(123)
Tg <- 50
Ng <- 50
N_harm <- 3
sigma2 <- 0.05

t_grid <- seq(0, 2, length.out=Tg)
theta_grid <- seq(0, 2*pi, length.out=50)
dtheta <- theta_grid[2] - theta_grid[1]

# 1. true f and observations
f_true <- matrix(0, nrow=N_harm, ncol=Tg)
# A_t <-  0.6 + 0.4*cos(2*pi*t_grid)
for (n in 1:N_harm) {
  # f_true[n, ] <- sin(2*pi*n*t_grid) * 0.5^(n-1)
  f_true[n,] <- 0.6^(n-1)*cos(2*pi*n*t_grid + n*pi/6)
}

Y_obs <- matrix(0, nrow=Ng, ncol=Tg)
for (k in 1:Tg) {
  theta_samples_true <- runif(Ng, 0, 2*pi)
  S_val <- sapply(theta_samples_true, function(th) sum(f_true[, k] * exp(1i * (0:(N_harm-1)) * th)))
  Y_obs[, k] <- Re(S_val) + rnorm(Ng, 0, sqrt(sigma2))
}

# N_harm <- nrow(f_hat)
# S_hat <- matrix(0, nrow=length(theta_grid), ncol=length(t_grid))
# for (k in 1:length(t_grid)) {
#   for (i in 1:length(theta_grid)) {
#     S_hat[i, k] <- Re(sum(f_hat[, k] * exp(1i * (0:(N_harm-1)) * theta_grid[i])))
#   }
# }

# true surface
Y_surface <- matrix(0, nrow=length(theta_grid), ncol=length(t_grid))
for (k in 1:length(t_grid)) {
  for (i in 1:length(theta_grid)) {
    kern <- dnorm(theta_grid[i] - runif(nrow(Y_obs), 0, 2*pi), sd=0.3)
    Y_surface[i, k] <- mean(Y_obs[, k]) # simple version
  }
}

p2 <- plot_ly(x=t_grid, y=theta_grid, z=Y_surface, opacity = 0.5,
                type="surface", colorscale="Plasma") %>%
    layout(title="Y(t,θ)",
           scene = list(xaxis = list(title = "time"),
                        yaxis = list(title = "θ"),
                        zaxis = list(title = "Kimesurface Intensity")
    )) %>% 
    layout(width = 1200, height = 600)
p2

2.3 Experiment 1

# sample theta
library(circular)

Y_obs_1 <- matrix(0, nrow=Ng, ncol=Tg)
thetas <- matrix(0, nrow=Ng, ncol=Tg)
for (k in 1:Tg) {
  mu <- circular(pi/4)
  kappa <- 5
  theta_samples <- rvonmises(Ng, mu = mu, kappa = kappa)
  theta_samples_pi <- conversion.circular(theta_samples, modulo = "pi")
  theta_samples_true <- as.numeric(theta_samples_pi)

  #theta_samples_true <- as.numeric(theta_samples)
  # theta_samples_true <- runif(Ng, 0, 2*pi)
  S_val <- sapply(theta_samples_true, 
                  function(th) sum(f_true[, k] * exp(1i * (0:(N_harm-1)) * th)))
  Y_obs_1[, k] <- Re(S_val) + rnorm(Ng, 0, sqrt(sigma2))
  thetas[,  k] <- theta_samples_true
}

mu <- circular(pi/4)
kappa <- 5
theta_samples <- rvonmises(Ng, mu = mu, kappa = kappa)
theta_samples_pi <- conversion.circular(theta_samples, modulo = "pi")
theta_samples_true <- as.numeric(theta_samples_pi)
max(theta_samples_true)
## [1] 3.096735
n_lines <- 5

for (i in 1:n_lines) {
  theta_seq <- thetas[i, ]
  t_seq <- t_grid
  z_seq <- Y_obs_1[i,]
  
  p2 <- p2 %>%
    add_trace(
      x = t_seq,
      y = theta_seq,
      z = z_seq,
      type = "scatter3d",
      mode = "lines",
      line = list(width = 4, color = i),
      name = paste0("line_", i)
    )
}

p2
x <- 1:ncol(Y_obs_1)

# Display original observed (repeated measurement) time-series
# Create the plot
p <- plot_ly()

# Add a trace for each row in Y_obs_1
for (i in 1:nrow(Y_obs_1)) {
  p <- p %>% add_trace(
    x = x,
    y = Y_obs_1[i, ],
    type = 'scatter',
    mode = 'lines',
    name = paste("Trace", i),
    line = list(color = rainbow(nrow(Y_obs_1))[i])
  )
}

# Customize the layout
p %>% layout(
  title = "Y_obs",
  xaxis = list(title = "time"),
  yaxis = list(title = "Y_obs"),
  showlegend = TRUE
)