# PROBLEM 1 - Rescorla-Wagner Model¶

Several classical conditioning experiments can be qualitatively reproduced by the Rescorla-Wagner model (see Chapter 9.1-9.2 of the Dayan & Abbott). To explain these experimental observations, we assume that the objective of an animal is to predict the presence of certain events such as a food reward. Let us denote the presence or absence of a such a reward (also called unconditioned stimulus or UCS) by $r = 1$ or $r = 0$, respectively. Other events such as stimuli (also called conditioned stimuli or CS) may or may not predict the occurence of this reward, and we denote their presence by $u = 1$ and their absence by $u = 0$. The organism's task is then to predict if a reward is present, depending on whether the stimulus was present. We denote the animal's prediction by $v$ and write

$$v = wu \qquad \text{(1)}$$

where $w$ is a free parameter that the animal needs to learn.

After every trial of a conditioning experiment, this parameter is learned or updated using the Rescorla-Wagner learning rule:

$$w → w + εδu \qquad \text{(2)}$$

where

• $ε$ is the learning rate
• $δ = r − v$ is the prediction error
• and $u$ is the stimulus
In [201]:
# /!\ I'm using Python 3

import plotly
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go

import matplotlib.pyplot as plt
import scipy.io as sio
from scipy import stats
%matplotlib inline

plotly.offline.init_notebook_mode(connected=True)
from IPython.core.display import display, HTML, Markdown
# The polling here is to ensure that plotly.js has already been loaded before
# setting display alignment in order to avoid a race condition.
display(HTML(
'<script>'
'var waitForPlotly = setInterval( function() {'
'if( typeof(window.Plotly) !== "undefined" ){'
'MathJax.Hub.Config({ SVG: { font: "STIX-Web" }, displayAlign: "center" });'
'MathJax.Hub.Queue(["setRenderer", MathJax.Hub, "SVG"]);'
'clearInterval(waitForPlotly);'
'}}, 250 );'
'</script>'
))

# Colorscales
def colorscale_list(cmap, number_colors, return_rgb_only=False):
cm = plt.get_cmap(cmap)
colors = [np.array(cm(i/number_colors)) for i in range(1, number_colors+1)]
rgb_colors_plotly = []
rgb_colors_only = []
for i, c in enumerate(colors):
col = 'rgb{}'.format(tuple(255*c[:-1]))
rgb_colors_only.append(col)
rgb_colors_plotly.append([i/number_colors, col])
rgb_colors_plotly.append([(i+1)/number_colors, col])
return rgb_colors_only if return_rgb_only else rgb_colors_plotly

## (a) - Generate a set of stimuli $u_i$ and rewards $r_i$. We use the index $i$ to number the trials. Assume that during the first $25$ trials, both stimuli and reward are present, and during the next $25$ trials, only the stimulus is present. Plot the rewards and stimuli as a function of the trial index.¶

In [265]:
nb_trials = 50
index_change = 25

u = np.ones(nb_trials)
r = np.ones(nb_trials)
r[index_change:] = 0

layout= go.Layout(
title= 'Trials of a conditioning experiment',
hovermode= 'closest',
xaxis= dict(
title= 'Trial number...',
ticklen= 5,
zeroline= False,
gridwidth= 2,
),
yaxis=dict(
title= 'Value',
ticklen= 5,
gridwidth= 2,
),
showlegend= True
)

trace_stimulus = go.Scatter(
x = list(range(1, nb_trials+1)),
y = u,
mode = 'markers',
name = 'Stimulus u'
)

trace_reward = go.Scatter(
x = list(range(1, nb_trials+1)),
y = r,
mode = 'lines',
name = 'Reward r',
line = dict(
dash = 'dash',
width = 3
)
)

plotly.offline.iplot(
go.Figure(
data=[trace_stimulus, trace_reward],
layout=layout)
)

## (b) - Use the learning rule above to adapt $w$ during the experiment. Choose the learning rate $ε = 0.1$. Plot $v_i$ as a function of the trial.¶

In [203]:
epsilon = 0.1

w = np.zeros(nb_trials)

for i in range(nb_trials):
w[i] = w[i-1]+epsilon*(r[i]-w[i-1]*u[i])*u[i]

return np.array(w)
In [266]:
trace_w = go.Scatter(
x = list(range(1, nb_trials+1)),
mode = 'lines+markers',
name = 'Prediction v',
line = dict(
width = 3
)
)

plotly.offline.iplot(
go.Figure(
data=[trace_stimulus, trace_reward, trace_w],
layout=layout)
)

## (c) - Change the learning parameter $ε$. What happens?¶

In [267]:
display(Markdown("### For different values of epsilon $∈ [0.1, 1]$"))

number_lines = 10
legend_every = 1

value_min, value_max = .1, 1.
delta_val = (value_max-value_min)/(number_lines-1)

colors = colorscale_list('Greens', number_lines+3, return_rgb_only=True)

# Plotting the evolution of the free parameter w
traces_eps = []

for i, epsilon in enumerate([round(value_min + i*delta_val,2) for i in range(number_lines)]):
traces_eps.append(
go.Scatter(
x = list(range(1, nb_trials+1)),
mode = 'lines',
name = 'Prediction with epsilon = {}'.format(epsilon),
line = dict(
width = 1,
color = colors[i+2],
shape = 'spline',
dash = 'solid'
),
hoverlabel = dict(
namelength = -1
),
showlegend = (i % legend_every == 0)
)
)

plotly.offline.iplot(go.Figure(data=[trace_stimulus, trace_reward]+traces_eps, layout=layout))

### For different values of epsilon $∈ [0.1, 1]$¶

In [268]:
display(Markdown("### For different values of epsilon $∈ [1, 2[$"))

number_lines = 10
legend_every = 1

value_min, value_max = 1, 1.9
delta_val = (value_max-value_min)/(number_lines-1)

colors = colorscale_list('Greens', number_lines+3, return_rgb_only=True)

# Plotting the evolution of the free parameter w
traces_eps = []

for i, epsilon in enumerate([round(value_min + i*delta_val,2) for i in range(number_lines)]):
traces_eps.append(
go.Scatter(
x = list(range(1, nb_trials+1)),
mode = 'lines',
name = 'Prediction with epsilon = {}'.format(epsilon),
line = dict(
width = 1,
color = colors[i+2],
shape = 'spline',
dash = 'solid'
),
hoverlabel = dict(
namelength = -1
),
showlegend = (i % legend_every == 0)
)
)

plotly.offline.iplot(go.Figure(data=[trace_stimulus, trace_reward]+traces_eps, layout=layout))

### For different values of epsilon $∈ [1, 2[$¶

In [271]:
display(Markdown("### For different values of epsilon $≥ 2$"))

number_lines = 5
legend_every = 1

value_min, value_max = 2, 2.05
delta_val = (value_max-value_min)/(number_lines-1)

colors = colorscale_list('Greens', number_lines+3, return_rgb_only=True)

# Plotting the evolution of the free parameter w
traces_eps = []

for i, epsilon in enumerate([round(value_min + i*delta_val,2) for i in range(number_lines)]):
traces_eps.append(
go.Scatter(
x = list(range(1, nb_trials+1)),
mode = 'lines',
name = 'Prediction with epsilon = {}'.format(epsilon),
line = dict(
width = 1,
color = colors[i+2],
shape = 'spline',
dash = 'solid'
),
hoverlabel = dict(
namelength = -1
),
showlegend = (i % legend_every == 0)
)
)

layout2 = go.Layout(
title= 'Trials of a conditioning experiment',
hovermode= 'closest',
xaxis= dict(
range=[10, 50],
title= 'Trial number...',
ticklen= 5,
zeroline= False,
gridwidth= 2,
),
yaxis=dict(
range=[-10, 10],
title= 'Value',
ticklen= 5,
gridwidth= 2,
),
showlegend= True
)

plotly.offline.iplot(go.Figure(data=[trace_stimulus, trace_reward]+traces_eps, layout=layout2))

## (d) - Partial conditioning: In each trial, the stimulus is present, but the presence of the reward is a random event with probability $0.4$. Plot the presence/absence of the reward as a function of the trial index. Redo the simulation from (b). What happens?¶

In [272]:
proba = 0.4

np.random.seed(1)

r = (np.random.rand(nb_trials)<proba).astype(int)

trace_reward = go.Scatter(
x = list(range(1, nb_trials+1)),
y = r,
mode = 'lines',
name = 'Random reward r with proba p={}'.format(proba),
line = dict(
dash = 'dash'
)
)

plotly.offline.iplot(
go.Figure(
data=[trace_stimulus, trace_reward],
layout=layout)
)
In [273]:
trace_w = go.Scatter(
x = list(range(1, nb_trials+1)),
mode = 'lines+markers',
name = 'Prediction with epsilon=0.1',
line = dict(
width = 3
)
)

plotly.offline.iplot(
go.Figure(
data=[trace_stimulus, trace_reward, trace_w],
layout=layout)
)
In [274]:
display(Markdown("### For different values of epsilon $∈ [0.1, 1]$"))

number_lines = 10
legend_every = 1

value_min, value_max = .1, 1
delta_val = (value_max-value_min)/(number_lines-1)

colors = colorscale_list('Greens', number_lines+3, return_rgb_only=True)

# Plotting the evolution of the free parameter w
traces_eps = []

for i, epsilon in enumerate([round(value_min + i*delta_val, 2) for i in range(number_lines)]):
traces_eps.append(
go.Scatter(
x = list(range(1, nb_trials+1)),
mode = 'lines',
name = 'Prediction with epsilon = {}'.format(epsilon),
line = dict(
width = 1,
color = colors[i+2],
shape = 'spline',
dash = 'solid'
),
hoverlabel = dict(
namelength = -1
),
showlegend = (i % legend_every == 0)
)
)

plotly.offline.iplot(go.Figure(data=[trace_stimulus, trace_reward]+traces_eps, layout=layout))

### For different values of epsilon $∈ [0.1, 1]$¶

In [275]:
display(Markdown("### For different values of epsilon $∈ [1, 2[$"))

number_lines = 10
legend_every = 1

value_min, value_max = 1, 1.9
delta_val = (value_max-value_min)/(number_lines-1)

colors = colorscale_list('Greens', number_lines+3, return_rgb_only=True)

# Plotting the evolution of the free parameter w
traces_eps = []

for i, epsilon in enumerate([round(value_min + i*delta_val, 2) for i in range(number_lines)]):
traces_eps.append(
go.Scatter(
x = list(range(1, nb_trials+1)),
mode = 'lines',
name = 'Prediction with epsilon = {}'.format(epsilon),
line = dict(
width = 1,
color = colors[i+2],
shape = 'spline',
dash = 'solid'
),
hoverlabel = dict(
namelength = -1
),
showlegend = (i % legend_every == 0)
)
)

plotly.offline.iplot(go.Figure(data=[trace_stimulus, trace_reward]+traces_eps, layout=layout))

### For different values of epsilon $∈ [1, 2[$¶

In [277]:
display(Markdown("### For different values of epsilon $≥2$"))

number_lines = 5
legend_every = 1

value_min, value_max = 2, 2.05
delta_val = (value_max-value_min)/(number_lines-1)

colors = colorscale_list('Greens', number_lines+3, return_rgb_only=True)

# Plotting the evolution of the free parameter w
traces_eps = []

for i, epsilon in enumerate([round(value_min + i*delta_val, 2) for i in range(number_lines)]):
traces_eps.append(
go.Scatter(
x = list(range(1, nb_trials+1)),
mode = 'lines',
name = 'Prediction with epsilon = {}'.format(epsilon),
line = dict(
width = 1,
color = colors[i+2],
shape = 'spline',
dash = 'solid'
),
hoverlabel = dict(
namelength = -1
),
showlegend = (i % legend_every == 0)
)
)

layout3 = go.Layout(
title= 'Trials of a conditioning experiment',
hovermode= 'closest',
xaxis= dict(
range=[0, 50],
title= 'Trial number...',
ticklen= 5,
zeroline= False,
gridwidth= 2,
),
yaxis=dict(
range=[-10, 10],
title= 'Value',
ticklen= 5,
gridwidth= 2,
),
showlegend= True
)

plotly.offline.iplot(go.Figure(data=[trace_stimulus, trace_reward]+traces_eps, layout=layout3))

## (e) - Blocking: Assume that there are two stimuli, $u_1$ and $u_2$, and two parameters, $w_1$ and $w_2$ to learn, and that the animal's prediction is given by $v = w_1 u_1 + w_2 u_2$. During the first $25$ trials, only one stimulus and the reward are present, during the next $25$ trials, both stimuli and the reward are present. What happens?¶

In [298]:
u = np.ones(nb_trials)
r = np.ones(nb_trials)

trace_reward = go.Scatter(
x = list(range(1, nb_trials+1)),
y = r,
mode = 'lines',
name = 'Reward r',
line = dict(
dash = 'dash',
width = 3
)
)

trace_w_1D = go.Scatter(
x = list(range(1, nb_trials+1)),
mode = 'lines',
name = 'Prediction v based on one (the first) stimulus only',
line = dict(
width = 1
)
)

plotly.offline.iplot(
go.Figure(
data=[trace_stimulus, trace_reward, trace_w_1D],
layout=layout)
)
In [299]:
w = np.zeros((nb_trials, 2))
eps = np.array(epsilons)

for i in range(nb_trials):
w[i] = w[i-1]+(r[i]-w[i-1].dot(u[i]))*eps*u[i]

return np.array(w)

nb_trials = 50
index_change = 25

u = np.ones((nb_trials, 2))
r = np.ones(nb_trials)
u[:index_change, -1] = 0

trace_stimulus_2D = []
trace_stimulus_2D.append(go.Scatter(
x = list(range(1, nb_trials+1)),
y = u[:,0],
mode = 'markers',
name = 'Stimulus 1',
marker = dict(
size = 9
)
))

trace_stimulus_2D.append(go.Scatter(
x = list(range(1, nb_trials+1)),
y = u[:,1],
mode = 'markers',
name = 'Stimulus 2',
marker = dict(
symbol = 'diamond',
color = 'yellow'
),
))

trace_reward = go.Scatter(
x = list(range(1, nb_trials+1)),
y = r,
mode = 'lines',
name = 'Reward r',
line = dict(
dash = 'dash',
width = 4,
color = 'orange'
)
)
In [300]:
trace_w = go.Scatter(
x = list(range(1, nb_trials+1)),
y = np.array([w.dot(ui) for w, ui in zip(gradient_descent_2D(), u)]),
mode = 'lines',
name = 'Prediction v for the two stimuli',
line = dict(
width = 3,
dash = 'dash',
color = 'green'
)
)

plotly.offline.iplot(
go.Figure(
data=trace_stimulus_2D+[trace_reward, trace_w, trace_w_1D],
layout=layout)
)
In [301]:
Out[301]:
array([[ 0.1       ,  0.        ],
[ 0.19      ,  0.        ],
[ 0.271     ,  0.        ],
[ 0.3439    ,  0.        ],
[ 0.40951   ,  0.        ],
[ 0.468559  ,  0.        ],
[ 0.5217031 ,  0.        ],
[ 0.56953279,  0.        ],
[ 0.61257951,  0.        ],
[ 0.65132156,  0.        ],
[ 0.6861894 ,  0.        ],
[ 0.71757046,  0.        ],
[ 0.74581342,  0.        ],
[ 0.77123208,  0.        ],
[ 0.79410887,  0.        ],
[ 0.81469798,  0.        ],
[ 0.83322818,  0.        ],
[ 0.84990536,  0.        ],
[ 0.86491483,  0.        ],
[ 0.87842335,  0.        ],
[ 0.89058101,  0.        ],
[ 0.90152291,  0.        ],
[ 0.91137062,  0.        ],
[ 0.92023356,  0.        ],
[ 0.9282102 ,  0.        ],
[ 0.93538918,  0.00717898],
[ 0.94113237,  0.01292216],
[ 0.94572691,  0.01751671],
[ 0.94940255,  0.02119235],
[ 0.95234306,  0.02413286],
[ 0.95469547,  0.02648527],
[ 0.95657739,  0.02836719],
[ 0.95808294,  0.02987273],
[ 0.95928737,  0.03107717],
[ 0.96025092,  0.03204071],
[ 0.96102175,  0.03281155],
[ 0.96163842,  0.03342822],
[ 0.96213176,  0.03392156],
[ 0.96252643,  0.03431623],
[ 0.96284216,  0.03463196],
[ 0.96309475,  0.03488455],
[ 0.96329682,  0.03508662],
[ 0.96345848,  0.03524827],
[ 0.9635878 ,  0.0353776 ],
[ 0.96369126,  0.03548106],
[ 0.96377403,  0.03556383],
[ 0.96384024,  0.03563004],
[ 0.96389321,  0.03568301],
[ 0.96393559,  0.03572539],
[ 0.96396949,  0.03575929]])

## (f) - Overshadowing: Assume again that there are two stimuli and two parameters to learn. However, now both stimuli are present from the beginning, as is the reward. What happens if one of the learning rates is larger (e.g. $ε = 0.2$ for one stimulus vs. $ε = 0.1$ for the other)?¶

In [302]:
u = np.ones((nb_trials, 2))

trace_stimulus_2D = []
trace_stimulus_2D.append(go.Scatter(
x = list(range(1, nb_trials+1)),
y = u[:,0],
mode = 'markers',
name = 'Stimulus 1',
marker = dict(
size = 9
)
))

trace_stimulus_2D.append(go.Scatter(
x = list(range(1, nb_trials+1)),
y = u[:,1],
mode = 'markers',
name = 'Stimulus 2',
marker = dict(
symbol = 'diamond',
color = 'yellow'
),
))

trace_reward = go.Scatter(
x = list(range(1, nb_trials+1)),
y = r,
mode = 'lines',
name = 'Reward r',
line = dict(
dash = 'dash',
width = 4,
color = 'orange'
)
)
In [305]:
trace_w = go.Scatter(
x = list(range(1, nb_trials+1)),
y = np.array([w.dot(ui) for w, ui in zip(gradient_descent_2D(epsilons=(0.2, 0.1)), u)]),
mode = 'lines',
name = 'Prediction v for the two stimuli',
line = dict(
width = 2,
dash = 'dash',
color = 'green'
)
)

plotly.offline.iplot(
go.Figure(
data=trace_stimulus_2D+[trace_reward, trace_w, trace_w_1D],
layout=layout)
)
In [306]:
Out[306]:
array([[ 0.2       ,  0.1       ],
[ 0.34      ,  0.17      ],
[ 0.438     ,  0.219     ],
[ 0.5066    ,  0.2533    ],
[ 0.55462   ,  0.27731   ],
[ 0.588234  ,  0.294117  ],
[ 0.6117638 ,  0.3058819 ],
[ 0.62823466,  0.31411733],
[ 0.63976426,  0.31988213],
[ 0.64783498,  0.32391749],
[ 0.65348449,  0.32674224],
[ 0.65743914,  0.32871957],
[ 0.6602074 ,  0.3301037 ],
[ 0.66214518,  0.33107259],
[ 0.66350163,  0.33175081],
[ 0.66445114,  0.33222557],
[ 0.6651158 ,  0.3325579 ],
[ 0.66558106,  0.33279053],
[ 0.66590674,  0.33295337],
[ 0.66613472,  0.33306736],
[ 0.6662943 ,  0.33314715],
[ 0.66640601,  0.33320301],
[ 0.66648421,  0.3332421 ],
[ 0.66653895,  0.33326947],
[ 0.66657726,  0.33328863],
[ 0.66660408,  0.33330204],
[ 0.66662286,  0.33331143],
[ 0.666636  ,  0.333318  ],
[ 0.6666452 ,  0.3333226 ],
[ 0.66665164,  0.33332582],
[ 0.66665615,  0.33332807],
[ 0.6666593 ,  0.33332965],
[ 0.66666151,  0.33333076],
[ 0.66666306,  0.33333153],
[ 0.66666414,  0.33333207],
[ 0.6666649 ,  0.33333245],
[ 0.66666543,  0.33333271],
[ 0.6666658 ,  0.3333329 ],
[ 0.66666606,  0.33333303],
[ 0.66666624,  0.33333312],
[ 0.66666637,  0.33333318],
[ 0.66666646,  0.33333323],
[ 0.66666652,  0.33333326],
[ 0.66666656,  0.33333328],
[ 0.6666666 ,  0.3333333 ],
[ 0.66666662,  0.33333331],
[ 0.66666663,  0.33333332],
[ 0.66666664,  0.33333332],
[ 0.66666665,  0.33333332],
[ 0.66666665,  0.33333333]])