Skip to contents

0. Data Structure

Subject Block Trial L_choice R_choice L_reward R_reward Sub_Choose -
1 1 1 A B 36 40 A
1 1 2 B A 0 36 B
1 1 3 C D -36 -40 C
1 1 4 D C 0 -36 D
data = multiRL::TAB
colnames = list(
  subid = "Subject", block = "Block", trial = "Trial",
  object = c("L_choice", "R_choice"), 
  reward = c("L_reward", "R_reward"),
  action = "Sub_Choose",
  exinfo = c("...", "...", "...")
)
behrule = list(
  cue = c("A", "B", "C", "D"),
  rsp = c("A", "B", "C", "D")
)

Note

  1. The number of object and reward can be multiple (> 2), but they must have a strict one-to-one correspondence.
  2. An object can consist of multiple elements; please separate them with “_” (e.g., ‘context_target_behavior’).
  3. An action can point to either an entire object or a specific part of it. For details, see this
  4. Any additional information (column names) should be passed to exinfo as a character vector.

1. Run Model

Create a function with ONLY ONE argument, params

Model <- function(params){
  
############################# [Modify Here] ####################################
  params <- list(free = list(alpha = params[1], beta = params[2]))
############################# [Modify Here] ####################################

  multiRL.model <- multiRL::run_m(
    data = data,
    behrule = behrule,
    colnames = colnames,
    params = params,
    funcs = funcs,
    priors = priors,
    settings = settings
  )
  
  assign(x = "multiRL.model", value = multiRL.model, envir = multiRL.env)
  return(.return_result(multiRL.model))
}

How to define a new model?

  1. Customize the following six core functions.
  2. Figure out the free parameters within your model.
  3. Declare these parameters as free when defining the object function above.
Learning Rate (α)
print(multiRL::func_alpha)
func_alpha <- function(
    qvalue,
    reward,
    params,
    ...
){
  # if you need extra information
  # e.g.
  # Trial <- idinfo["Trial"]
  # Frame <- exinfo["Frame"]
  
  alpha     <-  get_param(params, "alpha")
  alphaN    <-  get_param(params, "alphaN")
  alphaP    <-  get_param(params, "alphaP")
  
  if (
    !(is.na(alpha)) && is.na(alphaN) && is.na(alphaP)
  ) {
    model <- "TD"
  } else if (
    is.na(alpha) && !(is.na(alphaN)) && !(is.na(alphaP))
  ) {
    model <- "RSTD"
  } else {
    stop("Unknown Model! Plase modify your learning rate function")
  }

  # TD
  if (model == "TD") {
    update <- qvalue + alpha   * (reward - qvalue)
  # RSTD
  } else if (model == "RSTD" && reward < qvalue) {
    update <- qvalue + alphaN * (reward - qvalue)
  } else if (model == "RSTD" && reward >= qvalue) {
    update <- qvalue + alphaP * (reward - qvalue)
  }

  return(update)
}
Soft-Max (β)
print(binaryRL::func_tau)
func_beta <- function(
    qvalue, 
    explor,
    params,
    ...
){
  # if you need extra information
  # e.g.
  # Trial <- idinfo["Trial"]
  # Frame <- exinfo["Frame"]
  
  beta      <-  get_param(params, "beta")
  lapse     <-  get_param(params, "lapse")
  
  n_options <- length(qvalue)
  prob      <- rep(x = NA_real_, times = n_options)
  index     <- which(!is.na(qvalue))
  n_shown   <- length(index)
  
  if (explor == 1) {
    # Exploration
    prob[index] <- 1 / n_shown
  } else {
    # Exploitation
    exp_stable <- exp(beta * (qvalue - max(qvalue, na.rm = TRUE)))
    prob <- exp_stable / sum(exp_stable, na.rm = TRUE)
  }
  
  # lapse
  prob <- (1 - lapse * n_shown) * prob + lapse
  
  return(prob)
}
Utility Function (γ)
print(binaryRL::func_gamma)
func_gamma <- function(
    reward,
    params,
    ...
){
  # if you need extra information
  # e.g.
  # Trial <- idinfo["Trial"]
  # Frame <- exinfo["Frame"]

  gamma     <-  get_param(params, "gamma")

  utility <- sign(reward) * (abs(reward) ^ gamma)
}
Upper-Confidence-Bound (δ)
print(binaryRL::func_pi)
func_delta <- function(
    count,
    params,
    ...
){
  # if you need extra information
  # e.g.
  # Trial <- idinfo["Trial"]
  # Frame <- exinfo["Frame"]
  
  delta     <-  get_param(params, "delta")

  bias <- delta * sqrt(log(count + exp(1)) / (count + 1e-10))
  
  return(bias)
}
Epsilon Related (ε)
print(binaryRL::func_epsilon)
func_epsilon <- function(
    rownum,
    params,
    ...
){
  # if you need extra information
  # e.g.
  # Trial <- idinfo["Trial"]
  # Frame <- exinfo["Frame"]
  
  epsilon   <-  get_param(params, "epsilon")
  threshold <-  get_param(params, "threshold")
  
  if (is.na(epsilon) && threshold > 0) {
    model <- "first"
  } else if (!(is.na(epsilon)) && threshold == 0) {
    model <- "decreasing"
  } else if (!(is.na(epsilon)) && threshold == 1) {
    model <- "greedy"
  } else {
    stop("Unknown Model! Plase modify your learning rate function")
  }
  
  set.seed(rownum)
  # Epsilon-First: 
  if (rownum <= threshold) {
    try <- 1
  } else if (rownum > threshold && model == "first") {
    try <- 0
  # Epsilon-Greedy:
  } else if (rownum > threshold && model == "greedy"){
    try <- sample(
      c(1, 0),
      prob = c(epsilon, 1 - epsilon),
      size = 1
    )
  # Epsilon-Decreasing: 
  } else if (rownum > threshold && model == "decreasing") {
    try <- sample(
      c(1, 0),
      prob = c(
        1 / (1 + epsilon * rownum),
        epsilon * rownum / (1 + epsilon * rownum)
      ),
      size = 1
    )
  }
  
  return(try)
}
Working Memory (ζ)
print(binaryRL::func_zeta)
func_zeta <- function(
    value0, 
    values,
    reward,
    params,
    ...
){
  # if you need extra information
  # e.g.
  # Trial <- idinfo["Trial"]
  # Frame <- exinfo["Frame"]
  
  zeta       <-  get_param(params, "zeta")
  bonus      <-  get_param(params, "bonus")
  
  if (reward == 0) {
    decay <- values + zeta * (value0 - values)
  } else if (reward < 0) {
    decay <- values + zeta * (value0 - values) + bonus
  } else if (reward > 0) {
    decay <- values + zeta * (value0 - values) - bonus
  }

  return(decay)
}

2. Recovery

Randomly generate free parameters for Model A and use them to produce simulated data. Subsequently, fit all candidate models to this simulated data. This step involves parameter recovery and model recovery for each candidate model. If a model meets the following two criteria, it can be considered robust and suitable for fitting real data and for model comparison.

  1. If Model A can successfully recover its true generative parameters from the simulated data, it indicates strong parameter recovery for the model.
  2. If Model A yields the best fit indices compared to all other candidate models when fitted to the data it generated, it demonstrates robust model recovery.

Note

if a researcher skips this step and directly fits the candidate models to real data and conducts model comparison, then when one model emerges as the winner, it is impossible to determine whether this is because the model is genuinely superior or because the other models are not robust at all and thus are not even qualified to be included in a model comparison.

recovery.MLE <- multiRL::rcv_d(
  estimate = "MLE",

  data = data,
  colnames = colnames,
  behrule = behrule,

  models = list(multiRL::TD, multiRL::RSTD, multiRL::Utility),
  priors = list(
    list(
      alpha = function(x) {stats::rbeta(n = 1, shape1 = 2, shape2 = 2)}, 
      beta = function(x) {stats::rexp(n = 1, rate = 1)}
    ),
    list(
      alphaN = function(x) {stats::rbeta(n = 1, shape1 = 2, shape2 = 2)}, 
      alphaP = function(x) {stats::rbeta(n = 1, shape1 = 2, shape2 = 2)}, 
      beta = function(x) {stats::rexp(n = 1, rate = 1)}
    ),
    list(
      alpha = function(x) {stats::rbeta(n = 1, shape1 = 2, shape2 = 2)}, 
      beta = function(x) {stats::rexp(n = 1, rate = 1)},
      gamma = function(x) {stats::rbeta(n = 1, shape1 = 2, shape2 = 2)}
    )
  ),
  settings = list(name = c("TD", "RSTD", "Utility")),
  
  algorithm = "NLOPT_GN_MLSL",
  lowers = list(c(0, 0), c(0, 0, 0), c(0, 0, 0)),
  uppers = list(c(1, 5), c(1, 1, 5), c(1, 5, 1)),
  control = list(core = 10, sample = 100, iter = 100)
)
Recovery TD

Simulating...

Fitting TD
  |==================                                                           | 25%

3. Fit Real Data

A model that achieves good parameter recovery in Step 2 indicates that it has reliability. A model that achieves good model recovery in Step 2 indicates that it is distinguishable from other models.

Only models that possess both reliability and distinguishability should be used to fit real data. The optimal model is then determined based on how similar the experimental effects reproduced by the model are to those observed in human participants.

fitting.MLE <- multiRL::fit_p(
  estimate = "MLE",

  data = data,
  colnames = colnames,
  behrule = behrule,

  models = list(multiRL::TD, multiRL::RSTD, multiRL::Utility),
  settings = list(name = c("TD", "RSTD", "Utility")),
  
  algorithm = "NLOPT_GN_MLSL",
  lowers = list(c(0, 0), c(0, 0, 0), c(0, 0, 0)),
  uppers = list(c(1, 5), c(1, 1, 5), c(1, 5, 1)),
  control = list(core = 10, iter = 100)
)
Fitting TD
  |==================                                                           | 25%

4. Replay the Experiment

The results from recovery(rcv_d) and fitting(fit_p) can be passed to the rpl_e function to reproduce the behavioral patterns generated by re-incorporating these parameters into the model.

replay.recovery <- multiRL::rpl_e(
  result = recovery.MLE,

  data = data,
  colnames = colnames,
  behrule = behrule,
  
  models = list(multiRL::TD, multiRL::RSTD, multiRL::Utility),
  settings = list(name = c("TD", "RSTD", "Utility")),

  omit = c("data", "funcs")
)
  
plot(x = replay.recovery)

TD_alphaTD_beta

RSTD_alphaNRSTD_alphaPRSTD_beta

Utility_alphaUtility_betaUtility_gamma

replay.fitting <- multiRL::rpl_e(
  result = fitting.MLE,

  data = data,
  colnames = colnames,
  behrule = behrule,
  
  models = list(multiRL::TD, multiRL::RSTD, multiRL::Utility),
  settings = list(name = c("TD", "RSTD", "Utility")),
  
  omit = c("funcs")
)

plot(x = replay.fitting)

Block_Ratio