We assume a bee is collecting nectar from yellow and blue flowers. During one day the bee can sample $100$ flowers. Let us observe its behavior during two days.
During the first day, blue flowers carry a reward of $r_b = 8$, and yellow flowers a reward of $r_y = 2$. During the second day, rewards are reversed, so that $r_b = 2$ and $r_y = 8$.
We assume that in each trial the bee decides to land either on a blue or yellow flower. The bee bases its decision on estimates of the nectar reward that it receives on blue or yellow flowers. Let us denote the bee's internal estimate of these nectar rewards as $m_b$ (blue flowers) and $m_y$ (yellow flowers). Then the bee will choose a blue flower with probability
$$p_b = \frac{1}{1 + \exp(β (m_y - m_b))} \qquad (1)$$
where we call $β$ the “exploitation-exploration trade-off” parameter.
# /!\ I'm using Python 3
import plotly
import numpy as np
import plotly.plotly as py
import plotly.graph_objs as go
from plotly import tools
import matplotlib.pyplot as plt
import scipy.io as sio
from scipy import stats
%matplotlib inline
plotly.offline.init_notebook_mode(connected=True)
from IPython.core.display import display, HTML, Markdown
# The polling here is to ensure that plotly.js has already been loaded before
# setting display alignment in order to avoid a race condition.
display(HTML(
'<script>'
'var waitForPlotly = setInterval( function() {'
'if( typeof(window.Plotly) !== "undefined" ){'
'MathJax.Hub.Config({ SVG: { font: "STIX-Web" }, displayAlign: "center" });'
'MathJax.Hub.Queue(["setRenderer", MathJax.Hub, "SVG"]);'
'clearInterval(waitForPlotly);'
'}}, 250 );'
'</script>'
))
# Colorscales
def colorscale_list(cmap, number_colors, return_rgb_only=False):
cm = plt.get_cmap(cmap)
colors = [np.array(cm(i/number_colors)) for i in range(1, number_colors+1)]
rgb_colors_plotly = []
rgb_colors_only = []
for i, c in enumerate(colors):
col = 'rgb{}'.format(tuple(255*c[:-1]))
rgb_colors_only.append(col)
rgb_colors_plotly.append([i/number_colors, col])
rgb_colors_plotly.append([(i+1)/number_colors, col])
return rgb_colors_only if return_rgb_only else rgb_colors_plotly
Note: the above decision-making strategy is sometimes called a “softmax”-strategy.
def sigmoid(x, beta=1):
return 1/(1+np.exp(-beta * x))
sigmoid = np.vectorize(sigmoid)
display(Markdown("### For different values of β"))
d_min, d_max = -10, 10
layout = go.Layout(
title= 'Probability of choosing the blue flower depending on the expected reward difference',
hovermode= 'closest',
xaxis= dict(
title= '$m_y - m_b$',
ticklen= 5,
zeroline= False,
gridwidth= 2,
),
yaxis=dict(
title= '$p_b = \\frac{1}{1 + \exp(β (m_y - m_b))}$',
ticklen= 5,
gridwidth= 2,
),
showlegend= True
)
legend_every = 1
values = list(np.arange(-0.75, 1, 0.25)) + [10]
colors = colorscale_list('Blues', len(values)+3, return_rgb_only=True)
# Plotting the evolution
traces_beta = []
for i, beta in enumerate(values):
if not (1 <= beta <= 4):
traces_beta.append(
go.Scatter(
x = np.linspace(d_min, d_max+1, 50),
y = sigmoid(-np.linspace(d_min, d_max+1, 50), beta=beta),
mode = 'lines+markers',
name = 'probability with β = {}'.format(beta),
line = dict(
width = 3,
color = colors[i+2],
shape = 'spline',
dash = 'solid'
),
hoverlabel = dict(
namelength = -1
),
showlegend = (i % legend_every == 0)
)
)
plotly.offline.iplot(go.Figure(data=traces_beta, layout=layout))
display(Markdown("### For different values of $m_y - m_b$"))
beta_min, beta_max = -3, 3
layout2 = go.Layout(
title= 'Probability of choosing the blue flower depending on the “exploitation-exploration trade-off” parameter',
hovermode= 'closest',
xaxis= dict(
title= '$β$',
ticklen= 5,
zeroline= False,
gridwidth= 2,
),
yaxis=dict(
title= '$p_b = \\frac{1}{1 + \exp(β (m_y - m_b))}$',
ticklen= 5,
gridwidth= 2,
),
showlegend= True
)
legend_every = 2
values = list(np.arange(-10, 11, 1))
colors = colorscale_list('Reds', len(values)+3, return_rgb_only=True)
# Plotting the evolution
traces_d = []
for i, d in enumerate(values):
traces_d.append(
go.Scatter(
x = np.linspace(beta_min, beta_max, 20),
y = [sigmoid(-d, beta=beta) for beta in np.linspace(beta_min, beta_max, 20)],
mode = 'lines+markers',
name = '$\\qquad \\quad \\text{proba. with } m_y-m_b = ' + '{}$'.format(d),
line = dict(
width = 3,
color = colors[i+2],
shape = 'spline',
dash = 'solid'
),
hoverlabel = dict(
namelength = -1
),
showlegend = (i % legend_every == 0)
)
)
plotly.offline.iplot(go.Figure(data=traces_d, layout=layout2))
nb_trials = 200
index_change_day = 100
rb = 8*np.ones(nb_trials)
ry = 2*np.ones(nb_trials)
rb[index_change_day:] = 2
ry[index_change_day:] = 8
mb = np.zeros(nb_trials)
my = 5*np.ones(nb_trials)
trace_rb = go.Scatter(
x = list(range(1, nb_trials+1)),
y = rb,
mode = 'lines',
name = 'Blue flower reward',
line = dict(
width = 2,
color = 'blue'
)
)
trace_ry = go.Scatter(
x = list(range(1, nb_trials+1)),
y = ry,
mode = 'lines',
name = 'Yellow flower reward',
line = dict(
width = 2,
color = 'gold'
)
)
trace_mb = go.Scatter(
x = list(range(1, nb_trials+1)),
y = mb,
mode = 'markers',
name = 'Estimate of blue flower reward',
marker = dict(
symbol = 'diamond',
color = 'blue'
),
)
trace_my = go.Scatter(
x = list(range(1, nb_trials+1)),
y = my,
mode = 'markers',
name = 'Estimate of yellow flower reward',
marker = dict(
symbol = 'diamond',
color = 'gold'
),
)
for beta in [0, 0.5, 0.8]:
display(Markdown("### Dumb bee, for $β = {}$".format(beta)))
trace_pb = go.Bar(
x = list(range(1, nb_trials+1)),
y = sigmoid(-(my-mb), beta=beta),
base = 0,
marker = dict(
color = 'blue'
),
name = 'Proba. of choosing the blue flower'
)
trace_py = go.Bar(
x = list(range(1, nb_trials+1)),
y = 1-sigmoid(-(my-mb), beta=beta),
base = -(1-sigmoid(-(my-mb), beta=beta)),
marker = dict(
color = 'gold'
),
name = 'Proba. of choosing the yellow flower'
)
fig = tools.make_subplots(rows=3, cols=1, print_grid=False,\
subplot_titles=('Reward and Estimate of reward for the blue flower',\
'Probability of landing on the two types of flowers',\
'Reward and Estimate of reward for the yellow flower'))
fig.append_trace(trace_rb, 1, 1)
fig.append_trace(trace_mb, 1, 1)
fig.append_trace(trace_pb, 2, 1)
fig.append_trace(trace_py, 2, 1)
fig.append_trace(trace_ry, 3, 1)
fig.append_trace(trace_my, 3, 1)
fig['layout'].update(height=900, width=1050, title='Choices made by the dumb bee for β={}'.format(beta))
fig['layout']['xaxis1'].update(title='Trial number...')
fig['layout']['xaxis2'].update(title='Trial number...')
fig['layout']['xaxis3'].update(title='Trial number...')
fig['layout']['yaxis1'].update(title='Value')
fig['layout']['yaxis2'].update(title='Probability', tickvals=[-1, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 1],\
ticktext=['1', '0.7', '0.5', '0.3', '0.1', '0.1', '0.3', '0.5', '0.7', '1'])
fig['layout']['yaxis3'].update(title='Value')
plotly.offline.iplot(fig)
$$\begin{align*} m_b &→ m_b + ε(r_b − m_b) && (4) \\ m_y &→m_y +ε(r_y − m_y) && (5) \end{align*}$$
nb_trials = 200
index_change_day = 100
rb = 8*np.ones(nb_trials)
ry = 2*np.ones(nb_trials)
rb[index_change_day:] = 2
ry[index_change_day:] = 8
def online_update(beta, epsilon=0.2):
mb, my = np.zeros(nb_trials), np.zeros(nb_trials)
my[0] = 5
for i in range(1, nb_trials):
p_b = sigmoid(-(my[i-1]-mb[i-1]), beta=beta)
if np.random.rand() <= p_b:
mb[i] = mb[i-1]+epsilon*(rb[i]-mb[i-1])
my[i] = my[i-1]
else:
my[i] = my[i-1]+epsilon*(ry[i]-my[i-1])
mb[i] = mb[i-1]
return mb, my
for beta in [0, 0.5, 1]:
display(Markdown("### Smart bee, for $β = {}$".format(beta)))
mb, my = online_update(beta)
trace_rb = go.Scatter(
x = list(range(1, nb_trials+1)),
y = rb,
mode = 'lines',
name = 'Blue flower reward',
line = dict(
width = 2,
color = 'blue'
)
)
trace_ry = go.Scatter(
x = list(range(1, nb_trials+1)),
y = ry,
mode = 'lines',
name = 'Yellow flower reward',
line = dict(
width = 2,
color = 'gold'
)
)
trace_mb = go.Scatter(
x = list(range(1, nb_trials+1)),
y = mb,
mode = 'markers',
name = 'Estimate of blue flower reward',
marker = dict(
symbol = 'diamond',
color = 'blue'
),
)
trace_my = go.Scatter(
x = list(range(1, nb_trials+1)),
y = my,
mode = 'markers',
name = 'Estimate of yellow flower reward',
marker = dict(
symbol = 'diamond',
color = 'gold'
),
)
trace_pb = go.Bar(
x = list(range(1, nb_trials+1)),
y = sigmoid(-(my-mb), beta=beta),
base = 0,
marker = dict(
color = 'blue'
),
name = 'Proba. of choosing the blue flower'
)
trace_py = go.Bar(
x = list(range(1, nb_trials+1)),
y = 1-sigmoid(-(my-mb), beta=beta),
base = -(1-sigmoid(-(my-mb), beta=beta)),
marker = dict(
color = 'gold'
),
name = 'Proba. of choosing the yellow flower'
)
fig = tools.make_subplots(rows=3, cols=1, print_grid=False,\
subplot_titles=('Reward and Estimate of reward for the blue flower',\
'Probability of landing on the two types of flowers',\
'Reward and Estimate of reward for the yellow flower'))
fig.append_trace(trace_rb, 1, 1)
fig.append_trace(trace_mb, 1, 1)
fig.append_trace(trace_pb, 2, 1)
fig.append_trace(trace_py, 2, 1)
fig.append_trace(trace_ry, 3, 1)
fig.append_trace(trace_my, 3, 1)
fig['layout'].update(height=900, width=1050, title='Choices made by the smart bee for ε=0.2 and β={}'.format(beta))
fig['layout']['xaxis1'].update(title='Trial number...')
fig['layout']['xaxis2'].update(title='Trial number...')
fig['layout']['xaxis3'].update(title='Trial number...')
fig['layout']['yaxis1'].update(title='Value')
fig['layout']['yaxis2'].update(title='Probability', tickvals=[-1, -0.7, -0.5, -0.3, -0.1, 0.1, 0.3, 0.5, 0.7, 1],\
ticktext=['1', '0.7', '0.5', '0.3', '0.1', '0.1', '0.3', '0.5', '0.7', '1'])
fig['layout']['yaxis3'].update(title='Value')
plotly.offline.iplot(fig)