Tutorial on multiRL • multiRL

0. Data Structure

Subject	Block	Trial	L_choice	R_choice	L_reward	R_reward	Sub_Choose	-
1	1	1	A	B	36	40	A	…
1	1	2	B	A	0	36	B	…
1	1	3	C	D	-36	-40	C	…
1	1	4	D	C	0	-36	D	…
…	…	…	…	…	…	…	…	…

data = multiRL::TAB
colnames = list(
  subid = "Subject", block = "Block", trial = "Trial",
  object = c("L_choice", "R_choice"), 
  reward = c("L_reward", "R_reward"),
  action = "Sub_Choose",
  exinfo = c("...", "...", "...")
)
behrule = list(
  cue = c("A", "B", "C", "D"),
  rsp = c("A", "B", "C", "D")
)

Note

The number of object and reward can be multiple (> 2), but they must have a strict one-to-one correspondence.
An object can consist of multiple elements; please separate them with “_” (e.g., ‘context_target_behavior’).
An action can point to either an entire object or a specific part of it. For details, see this
Any additional information (column names) should be passed to exinfo as a character vector.

1. Run Model

Create a function with ONLY ONE argument, params

Model <- function(params){
  
############################# [Modify Here] ####################################
  params <- list(free = list(alpha = params[1], beta = params[2]))
############################# [Modify Here] ####################################

  multiRL.model <- multiRL::run_m(
    data = data,
    behrule = behrule,
    colnames = colnames,
    params = params,
    funcs = funcs,
    priors = priors,
    settings = settings
  )
  
  assign(x = "multiRL.model", value = multiRL.model, envir = multiRL.env)
  return(.return_result(multiRL.model))
}

How to define a new model?

Customize the following six core functions.
Figure out the free parameters within your model.
Declare these parameters as free when defining the object function above.

Learning Rate (α)

print(multiRL::func_alpha)

func_alpha <- function(
    qvalue,
    reward,
    params,
    system,
    ...
){
  
  list2env(list(...), envir = environment())
  
  # If you need extra information(...)
  # Column names may be lost(C++), indexes are recommended
  # e.g.
  # Trial  <- idinfo[3]
  # Frame  <- exinfo[1]
  # Action <- behave[1]
  
  alpha     <-  params[["alpha"]]
  alphaN    <-  params[["alphaN"]]
  alphaP    <-  params[["alphaP"]]
  
  # Determine the model currently in use based on which parameters are free.
  if (
    system == "RL" && !(is.null(alpha)) && is.null(alphaN) && is.null(alphaP)
  ) {
    model <- "TD"
  } else if (
    system == "RL" && is.null(alpha) && !(is.null(alphaN)) && !(is.null(alphaP))
  ) {
    model <- "RSTD"
  } else if (
    system == "WM"
  ) {
    model <- "WM"
    alpha <- 1
  } else {
    stop("Unknown Model! Plase modify your learning rate function")
  }

  # TD
  if (model == "TD") {
    update <- qvalue + alpha  * (reward - qvalue)
  # RSTD
  } else if (model == "RSTD" && reward < qvalue) {
    update <- qvalue + alphaN * (reward - qvalue)
  } else if (model == "RSTD" && reward >= qvalue) {
    update <- qvalue + alphaP * (reward - qvalue)
  # WM
  } else if (model == "WM") {
    update <- qvalue + alpha  * (reward - qvalue)
  }

  return(update)
}

Soft-Max (β)

print(multiRL::func_beta)

func_beta <- function(
    qvalue, 
    explor,
    params,
    system,
    ...
){
  
  list2env(list(...), envir = environment())
  
  # If you need extra information(...)
  # Column names may be lost(C++), indexes are recommended
  # e.g.
  # Trial  <- idinfo[3]
  # Frame  <- exinfo[1]
  # Action <- behave[1]
  
  beta     <- params[["beta"]]
  lapse    <- params[["lapse"]]
  weight   <- params[["weight"]]
  capacity <- params[["capacity"]]

  index     <- which(!is.na(qvalue[[1]]))
  n_shown   <- length(index)
  n_system  <- length(qvalue)
  n_options <- length(qvalue[[1]])
  
  # Assign weights to different systems
  if (length(weight) == 1L) {weight <- c(weight, 1 - weight)}
  weight <- weight / sum(weight)
  if (n_system == 1) {weight <- weight[1]}
  
  # Compute the probabilities estimated by different systems
  prob_mat <- matrix(0, nrow = n_options, ncol = n_system)
  
  if (explor == 1) {
    prob_mat[index, ] <- 1 / n_shown
  } else {
    for (s in seq_len(n_system)) {
      sub_qvalue <- qvalue[[s]]
      exp_stable <- exp(beta * (sub_qvalue - max(sub_qvalue, na.rm = TRUE)))
      prob_mat[, s] <- exp_stable / sum(exp_stable, na.rm = TRUE)
    }
  }
  
  # Weighted average
  prob <- as.vector(prob_mat %*% weight)
  
  # lapse
  prob <- (1 - lapse * n_shown) * prob + lapse
  
  return(prob)
}

Utility Function (γ)

print(multiRL::func_gamma)

func_gamma <- function(
    reward,
    params,
    ...
){
  
  list2env(list(...), envir = environment())
  
  # If you need extra information(...)
  # Column names may be lost(C++), indexes are recommended
  # e.g.
  # Trial  <- idinfo[3]
  # Frame  <- exinfo[1]
  # Action <- behave[1]

  gamma     <-  params[["gamma"]]

  # Stevens' Power Law
  utility <- sign(reward) * (abs(reward) ^ gamma)
  
  return(utility)
}

Upper-Confidence-Bound (δ)

print(multiRL::func_delta)

func_delta <- function(
    count,
    params,
    ...
){
  
  list2env(list(...), envir = environment())
  
  # If you need extra information(...)
  # Column names may be lost(C++), indexes are recommended
  # e.g.
  # Trial  <- idinfo[3]
  # Frame  <- exinfo[1]
  # Action <- behave[1]
  latent <- behave[2]
  position <- as.numeric(cue %in% latent)
  
  delta     <- params[["delta"]]
  sticky    <- params[["sticky"]]
  
  # Upper-Confidence-Bound
  bias <- delta * sqrt(log(count + exp(1)) / (count + 1e-10)) + 
    # Sticky to the same response
    sticky * position
  
  return(bias)
}

Epsilon Related (ε)

print(multiRL::func_epsilon)

func_epsilon <- function(
    rownum,
    params,
    ...
){
  
  list2env(list(...), envir = environment())
  
  # If you need extra information(...)
  # Column names may be lost(C++), indexes are recommended
  # e.g.
  # Trial  <- idinfo[3]
  # Frame  <- exinfo[1]
  # Action <- behave[1]
  
  epsilon   <-  params[["epsilon"]]
  threshold <-  params[["threshold"]]
  
  # Determine the model currently in use based on which parameters are free.
  if (is.na(epsilon) && threshold > 0) {
    model <- "first"
  } else if (!(is.na(epsilon)) && threshold == 0) {
    model <- "decreasing"
  } else if (!(is.na(epsilon)) && threshold == 1) {
    model <- "greedy"
  } else {
    stop("Unknown Model! Plase modify your learning rate function")
  }
  
  set.seed(rownum)
  # Epsilon-First: 
  if (rownum <= threshold) {
    try <- 1
  } else if (rownum > threshold && model == "first") {
    try <- 0
  # Epsilon-Greedy:
  } else if (rownum > threshold && model == "greedy"){
    try <- sample(
      c(1, 0),
      prob = c(epsilon, 1 - epsilon),
      size = 1
    )
  # Epsilon-Decreasing: 
  } else if (rownum > threshold && model == "decreasing") {
    try <- sample(
      c(1, 0),
      prob = c(
        1 / (1 + epsilon * rownum),
        epsilon * rownum / (1 + epsilon * rownum)
      ),
      size = 1
    )
  }
  
  return(try)
}

Working Memory (ζ)

print(multiRL::func_zeta)

func_zeta <- function(
    value0, 
    values,
    reward,
    params,
    system,
    ...
){
  
  list2env(list(...), envir = environment())
  
  # If you need extra information(...)
  # Column names may be lost(C++), indexes are recommended
  # e.g.
  # Trial  <- idinfo[3]
  # Frame  <- exinfo[1]
  # Action <- behave[1]
  
  zeta       <-  params[["zeta"]]
  bonus      <-  params[["bonus"]]
  
  if (reward == 0) {
    decay <- values + zeta * (value0 - values)
  } else if (reward < 0) {
    decay <- values + zeta * (value0 - values) + bonus
  } else if (reward > 0) {
    decay <- values + zeta * (value0 - values) - bonus
  }

  return(decay)
}

2. Recovery

Randomly generate free parameters for Model A and use them to produce simulated data. Subsequently, fit all candidate models to this simulated data. This step involves parameter recovery and model recovery for each candidate model. If a model meets the following two criteria, it can be considered robust and suitable for fitting real data and for model comparison.

If Model A can successfully recover its true generative parameters from the simulated data, it indicates strong parameter recovery for the model.

If Model A yields the best fit indices compared to all other candidate models when fitted to the data it generated, it demonstrates robust model recovery.

Note

if a researcher skips this step and directly fits the candidate models to real data and conducts model comparison, then when one model emerges as the winner, it is impossible to determine whether this is because the model is genuinely superior or because the other models are not robust at all and thus are not even qualified to be included in a model comparison.

recovery.MLE <- multiRL::rcv_d(
  estimate = "MLE",

  data = data,
  colnames = colnames,
  behrule = behrule,

  models = list(multiRL::TD, multiRL::RSTD, multiRL::Utility),
  priors = list(
    list(
      alpha = function(x) {stats::rbeta(n = 1, shape1 = 2, shape2 = 2)}, 
      beta = function(x) {stats::rexp(n = 1, rate = 1)}
    ),
    list(
      alphaN = function(x) {stats::rbeta(n = 1, shape1 = 2, shape2 = 2)}, 
      alphaP = function(x) {stats::rbeta(n = 1, shape1 = 2, shape2 = 2)}, 
      beta = function(x) {stats::rexp(n = 1, rate = 1)}
    ),
    list(
      alpha = function(x) {stats::rbeta(n = 1, shape1 = 2, shape2 = 2)}, 
      beta = function(x) {stats::rexp(n = 1, rate = 1)},
      gamma = function(x) {stats::rbeta(n = 1, shape1 = 2, shape2 = 2)}
    )
  ),
  settings = list(name = c("TD", "RSTD", "Utility")),
  
  algorithm = "NLOPT_GN_MLSL",
  lowers = list(c(0, 0), c(0, 0, 0), c(0, 0, 0)),
  uppers = list(c(1, 5), c(1, 1, 5), c(1, 5, 1)),
  control = list(core = 10, sample = 100, iter = 100)
)

Recovery TD

Simulating...

Fitting TD
  |==================                                                           | 25%

3. Fit Real Data

A model that achieves good parameter recovery in Step 2 indicates that it has reliability. A model that achieves good model recovery in Step 2 indicates that it is distinguishable from other models.

Only models that possess both reliability and distinguishability should be used to fit real data. The optimal model is then determined based on how similar the experimental effects reproduced by the model are to those observed in human participants.

fitting.MLE <- multiRL::fit_p(
  estimate = "MLE",

  data = data,
  colnames = colnames,
  behrule = behrule,

  models = list(multiRL::TD, multiRL::RSTD, multiRL::Utility),
  settings = list(name = c("TD", "RSTD", "Utility")),
  
  algorithm = "NLOPT_GN_MLSL",
  lowers = list(c(0, 0), c(0, 0, 0), c(0, 0, 0)),
  uppers = list(c(1, 5), c(1, 1, 5), c(1, 5, 1)),
  control = list(core = 10, iter = 100)
)

Fitting TD
  |==================                                                           | 25%

4. Replay the Experiment

The results from recovery(rcv_d) and fitting(fit_p) can be passed to the rpl_e function to reproduce the behavioral patterns generated by re-incorporating these parameters into the model.

replay.recovery <- multiRL::rpl_e(
  result = recovery.MLE,

  data = data,
  colnames = colnames,
  behrule = behrule,
  
  models = list(multiRL::TD, multiRL::RSTD, multiRL::Utility),
  settings = list(name = c("TD", "RSTD", "Utility")),

  omit = c("data", "funcs")
)
  
plot(x = replay.recovery)

TD_alpha TD_beta

RSTD_alphaN RSTD_alphaP RSTD_beta

Utility_alpha Utility_beta Utility_gamma

replay.fitting <- multiRL::rpl_e(
  result = fitting.MLE,

  data = data,
  colnames = colnames,
  behrule = behrule,
  
  models = list(multiRL::TD, multiRL::RSTD, multiRL::Utility),
  settings = list(name = c("TD", "RSTD", "Utility")),
  
  omit = c("funcs")
)

plot(x = replay.fitting)

Block_Ratio

Subject	Block	Trial	L_choice	R_choice	L_reward	R_reward	Sub_Choose	-
1	1	1	A	B	36	40	A	…
1	1	2	B	A	0	36	B	…
1	1	3	C	D	-36	-40	C	…
1	1	4	D	C	0	-36	D	…
…	…	…	…	…	…	…	…	…

Subject	Block	Trial	L_choice	R_choice	L_reward	R_reward	Sub_Choose	-
1	1	1	A	B	36	40	A	…
1	1	2	B	A	0	36	B	…
1	1	3	C	D	-36	-40	C	…
1	1	4	D	C	0	-36	D	…
…	…	…	…	…	…	…	…	…

Subject	Block	Trial	L_choice	R_choice	L_reward	R_reward	Sub_Choose	-
1	1	1	A	B	36	40	A	…
1	1	2	B	A	0	36	B	…
1	1	3	C	D	-36	-40	C	…
1	1	4	D	C	0	-36	D	…
…	…	…	…	…	…	…	…	…