viewof sequences = {
const div = document.createElement("div");
div.value = new vega.View(parsedSpec).initialize(div).run();
return div;
}
Psychology as a Science
Today we’ll learn about the sampling distribution
But before we can do that we need to know what distributions are, where they come from, and how to describe them
The binomial distribution
The normal distribution
Processes that produce normal distributions
Process that don’t produce normal distributions
Describing normal distributions
Describing departures from the normal distributions
Distributions and samples
The Standard Error of the Mean
The binomial distribution is one of the simplest distribution you’ll come across
To see where it comes from, we’ll just build one!
We can build one by flipping a coin (multiple times) and counting up the number of heads that we get
viewof coins = htl.html`<input style="width:300px" type="range" id="coins" min="1" max="7" value="1" class="form-range">`
coins_label = htl.html`<label for="coins" class= "form-label" width="100%">Number of coin flips: ${
coins - 1
}</label>`
In Figure 1 we can see the possible sequences of events that can happen if we flip a coin (⚈ = heads and ⚆ = tails) Figure 2 look very interesting at the moment.
In Figure 2 we just count up the number of sequences that lead to 0 heads, 1 head, 2 heads, etc
As we flip more coins the distribution of number of heads takes on a characteristic shape
This is the binomial distribution
The binomial distribution is just an idealised representation of the process that generates sequences of heads and tails when we flip a coin
It’s an idealisation but natural processes do give rise to binomial distribution
In the bean machine (Figure 3) balls fall from the top and bounce off pegs as they fall
Most of the balls collect near the middle, and fewer balls are found at the edges
Flipping coins might seem a long way off anything you might want to study in psychology, but the shape of the binomial distribution might be familiar to you
But there are a few key differences:
The binomial distribution is bounded at 0 and n (number of coins)
The binomial distribution is discrete (0, 1, 2, 3 etc, but no 2.5)
The normal distribution is a mathematical abstraction, but we can use it as model of real-life populations that are produced by certain kinds of natural processes
To see how a natural process can give rise to a normal distribution, let’s play a board game!
There’s only 1 rule: You roll the dice n times (number of rounds), add up all the values, and move than many spaces. That is your score
We can play any number of rounds
And we’ll play with friends, because you can’t get a distribution of scores if you play by yourself!
If we have enough players who play enough rounds then the distribution of scores across all the players will take on a characteristic shape
A players score on the dice game is determined by adding up the values of each roll
So after each roll their score can increase by some amount
The dice game might look artificial, but it maybe isn’t that different to some natural processes
For example, developmental processes might look pretty similar to the dice game
Think about height:
At each point in time some value can be added (growth) or a person’s current height
So if we looked at the distribution of heights in the population then we might find something that looks similar to a normal distribution
A key factor that results in the normal distribution shape is this adding up of values
Let’s change the rules of the game
Instead of adding up the value of each roll, we’ll multiply them ( e.g., roll a 1, 2, and 4 and your score is 8)
The distribution is skewed with most player having low scores and a few players have very high scores
Can you think of a process that operates like this in the real world?
How about interest or returns on investments?
Maybe this explains the shape of real world wealth distributions…
The normal distribution has a characteristic bell shape but not all normal distributions are identical
They can vary in terms of where they are centered and how spread out they are
Changing \(\mu\) and \(\sigma\) changes the absolute position of points on the plot, but not the relative positions measured in units of \(\sigma\)
normal_plot_output = Plot.plot({
x: {
grid: true,
domain: [-4, 4]
},
y: {
grid: true,
domain: [0, 0.8]
},
marks: [
Plot.line(
normal_plot(
mean_value - sd_value * s > -4 ? mean_value - sd_value * s : -4,
mean_value + sd_value * s < 4 ? mean_value + sd_value * s : 4,
mean_value,
sd_value
),
{
x: "x",
y: "y",
strokeWidth: 1,
fill: "blue",
opacity: show ? 0.5 : 0
}
),
Plot.line(fill_limits(s), {
x: "x",
y: "y",
fill: "blue",
strokeWidth: 1,
opacity: show ? 0.5 : 0
}),
Plot.line(normal_plot(-4, 4, mean_value, sd_value), {
x: "x",
y: "y",
strokeWidth: 4
}),
Plot.ruleY([0])
]
})
normal_sliders = htl.html`<div style="font:smaller">${mean_value_slider}${sd_value_slider}</div>`
viewof show = Inputs.toggle({ label: "Show", value: false })
texmd`The plot shows a normal distribution with a mean ($\mu$) of ${mean_value} and a standard deviation ($\sigma$) of ${sd_value}.`
When looked at the distribution of scores from the second dice game we saw that it was skew
Another way to deviate from the normal distribution is to have either fatter or skinnier tails
The tailedness of a distribution is given by its kurtosis.
Kurtosis of a distribution is often specified with reference to the normal distribution. This is excess kurtosis.
kurtosis_desc = md`- This distribution has an excess kurtosis of ${
kurtosis_values[kurtosis_value]
}. ${
kurtosis_values[kurtosis_value] === 0
? "It is a **Mesokurtic distribution**."
: kurtosis_values[kurtosis_value] > 0
? "It is a **Leptokurtic distribution**."
: "It is a **Platykurtic distribution**."
}`
kurtosis_plot = Plot.plot({
x: {
grid: true,
domain: [-3, 3]
},
y: {
grid: true,
domain: [0, 0.4]
},
marks: [
true
? Plot.line(dists.standard_normal, {
x: "x",
y: "y",
stroke: "grey",
strokeWidth: 1
})
: null,
Plot.line(dists[Object.keys(kurtosis)[kurtosis_value]], {
x: "x",
y: "y",
strokeWidth: 4
}),
Plot.ruleY([0])
]
})
We’ve seen that whenever we look at the distribution of values where the values are produced by adding up numbers we got something that looked like a normal distribution
In Lecture 6, we saw that the formula for the sample mean was as shown in in Equation 1, below:
\[\bar{x}={\displaystyle\sum^{N}_{i=1}{\frac{x_i}{N}}} \qquad(1)\]
So to calculate a sample mean, we just add up a bunch of numbers
Let’s say I take lots of samples from a population.
And for each sample, I calculate the sample mean.
If we had to plot these sample means, then what would the distribution look like?
We can try it out.
Let’s say that I have a population with a mean of 100
And a standard deviation of 15.
From this population I can draw samples of 25 values
I’ll do this 100,000 times and plot the results in Figure 5
The standard deviation of the sampling distribution of the mean (the plot in Figure 5) has a special name:
It’s called the standard error of the mean!
Before we move on to how to calculate the standard error of the mean I want to assure you something
You might think that the sampling distribution of the mean in Figure 5 is normally distributed because the population is normally distributed
But this is not the case, as your sample size increases, then sampling distribution of the mean will be normally distributed
And this will happen even if the population is not normally distributed
I showed you Figure 8 (above) where the average deviation of sample means from the population mean was either small (A) or large (B)
I asked you to image two scenarios: One that was a feature of the population and one that was a feature of the samples where the average deviation of sample means from the population mean would be small (or zero).
If you managed, then great! But if not, then here they are:
If the average (squared) deviation in the population is 0 then the average deviation of sample means from the population mean would be 0
Because all members of the population would be the same, so all samples would be the same, so all sample means would be the same
Conversely, if the average (squared) deviations in the population was larger, then the average deviations of sample means from the population mean would be larger
If the sample size was large (so large to include the entire population) then the average deviation of sample means from the population mean would be 0
Because every sample would be identical to the population, so every sample mean would be identical to the population mean
Conversely, if the sample size was smaller, then the average deviations of sample means from the population mean would be larger
Let’s put these two ideas together to try come up with a formula for the average (squared) deviations of the sample means from the population mean
Our formula will include:
\(n\): the sample size
\(\sigma^2\): the average (squared) deviations in the population (aka the variance of the population)
And we’ll call our result \(\sigma_{\bar{x}}^2\)
The only way to combine \(n\) and \(\sigma^2\) so that:
when \(n\) is very big \(\sigma_{\bar{x}}^2\) will be small (and vice versa) and
when \(\sigma^2\) is very small \(\sigma_{\bar{x}}^2\) will be small (and vice versa)
is formula is Equation 2, below:
\[\sigma_{\bar{x}}^2=\frac{\sigma^2}{n} \qquad(2)\]
But remember, we don’t actually know the true \(\sigma^2\) (the variance of the population), we only know \(s^2\) (the sample variance, which is out estimate of the variance in the population). So we’ll make a slight change to the formula as in Equation 3
\[s_{\bar{x}}^2=\frac{s^2}{n} \qquad(3)\]
There’s one final step to get to the formula for the standard error of the mean.
The formula in Equation 3 is framed in terms of the average (squared) deviations) of sample means from the population mean—that is, in terms of variance.
But the standard error of the mean is the standard deviation of the sampling distribution
The standard deviation is just the square root of the variance, so we just need to take the square root of both sides of Equation 3, to get the equation in Equation 4, below:
\[s_{\bar{x}}=\frac{s}{\sqrt{n}} \qquad(4)\]
More commonly, however, you’ll see \(s_{\bar{x}}\) just written as \(\mathrm{SEM}\) for Standard Error of the Mean
And is the formula for the standard error of the mean and where it comes from
This was, admittedly, a fairly long winded way to get to what is essentially a very simple formula
However, as I have alluded to several times, the standard error of the mean is a fairly misunderstood concept
I hope that getting there the long way has helped you to build a better intuition of what the standard error of the mean actually is
I dislike talking about misconceptions because I think it can sometimes create them
But it worth talking about one prominent one
Misconception
The SEM tells you how far away the sample mean is (likely) to be from the actual population mean
But it doesn’t tell you anything about the sample mean… at least not your sample mean that you have calculated for your particular sample
The standard error of the mean is just what we’re defined it as:
The standard deviation of the sampling distribution
So what does this tell you?
It tells you how far on averages sample means (not your sample mean) will be from the population mean
Your sample mean might be close to the population mean, it might be far away from the population mean. But the SEM doesn’t quantity this
Your sample mean is either close or it is far from population mean
The SEM tells you something about the consequences of a sampling process
Not something about your sample
So why is it even useful? More on that next week!
n_heads = jstat(0, coins - 1, coins + 1 - 1)[0].map((v) => {
return {
x: v,
y: jstat.binomial.pdf(v, coins - 1, 0.5) * 2 ** (coins - 1)
};
})
coin_data = [
{ name: "START", id: 1, parent: "", color: "red" },
...d3.range(2, 2 ** coins).map((i) => {
return {
name: i % 2 ? "T" : "H",
id: i,
parent: Math.floor(i / 2)
};
})
].map((x) => {
let colors = { H: "black", T: "white", START: "red" };
x.color = colors[x.name];
return x;
})
spec3 = {
return {
$schema: "https://vega.github.io/schema/vega/v5.0.json",
padding: 0,
width: 500,
height: 100,
layout: {
padding: 0,
columns: 1
},
marks: [
{
type: "group",
encode: {
update: {
width: {
value: 1000
},
height: {
value: 130
}
}
},
data: [
{
name: "tree",
values: coin_data,
transform: [
{
type: "stratify",
key: "id",
parentKey: "parent"
},
{
type: "tree",
method: "tidy",
size: [500, 200],
as: ["x", "y", "depth", "children"]
}
]
},
{
name: "links",
source: "tree",
transform: [
{
type: "treelinks",
key: "id"
},
{
type: "linkpath",
orient: "horizontal",
shape: "line"
}
]
}
],
scales: [
{
name: "color",
domain: [0, 1, 2, 3, 4, 5],
type: "sequential",
range: "ramp"
}
],
marks: [
{
type: "path",
from: {
data: "links"
},
encode: {
update: {
path: {
field: "path"
},
stroke: {
value: "black"
}
}
}
},
{
type: "symbol",
from: {
data: "tree"
},
encode: {
enter: {
size: {
value: 50
},
stroke: {
value: "black"
}
},
update: {
x: {
field: "x"
},
y: {
field: "y"
},
fill: {
field: "color"
}
}
}
},
{
type: "text",
from: {
data: "tree"
},
encode: {
enter: {
text: {
field: "name"
},
fontSize: {
value: 0
},
baseline: {
value: "bottom"
}
},
update: {
x: {
field: "x"
},
y: {
field: "y"
}
}
}
}
]
}
]
};
}
dicedata = {
return d3.sort(
dist.six_dice_roll_histogram(n_dice, n_players).counts,
(d) => d.value
);
}
d = {
return Array(n_players_mult)
.fill(0)
.map((x) => {
return {
x: Number(
Array.from(dist.six_dice_roll(1, n_dice_mult)).reduce(
(state, item) => state * item
)
)
};
});
}
// jStat.normal.pdf( x, mean, std )
normal_plot = (min, max, mean, sd) => {
// jStat.normal.pdf(x, mean, sd)
return d3.ticks(min, max, 501).map((v) => {
return {
x: v,
y: dnorm(v, mean, sd)
};
});
}
fill_limits = (mult) => {
let s = sd_value * mult;
return [
{ x: mean_value - s > -4 ? mean_value - s : -4, y: 0 },
normal_plot(
mean_value - s > -4 ? mean_value - s : -4,
mean_value - s > -4 ? mean_value - s : -4,
mean_value,
sd_value
)[0],
normal_plot(
mean_value + s < 4 ? mean_value + s : 4,
mean_value + s < 4 ? mean_value + s : 4,
mean_value,
sd_value
)[0],
{ x: mean_value + s < 4 ? mean_value + s : 4, y: 0 }
];
}
skew_normal_plot = (min, max, alpha) => {
// jStat.normal.pdf(x, mean, sd)
return d3.ticks(min, max, 201).map((v) => {
return {
x: v,
y: dsn(v, alpha)
};
});
}
dsn = (x, alpha) => {
// set the defaults
const xi = 0;
const omega = 1;
const tau = 0;
let z = (x - xi) / omega;
let logN = -Math.log(Math.sqrt(2 * Math.PI)) - 0 - Math.pow(z, 2) / 2;
let logS = Math.log(
jStat.normal.cdf(tau * Math.sqrt(1 + Math.pow(alpha, 2)) + alpha * z, 0, 1)
);
let logPDF = logN + logS - Math.log(jStat.normal.cdf(tau, 0, 1));
return Math.exp(logPDF);
}
kurtosis = {
return {
uniform: -(6 / 5),
raised_cosine: (6 * (90 - Math.PI ** 4)) / (5 * (Math.PI ** 2 - 6) ** 2),
standard_normal: 0,
t_dist30: 6 / (30 - 4),
t_dist20: 6 / (20 - 4),
t_dist10: 6 / (10 - 4),
t_dist7: 6 / (7 - 5),
t_dist5: 6 / (5 - 4)
};
}
dists = {
return {
raised_cosine: d3.ticks(-3, 3, 500).map((v) => {
return {
x: v,
y: dist.raised_cosine(v, 0, 2.5)
};
}),
standard_normal: d3.ticks(-3, 3, 500).map((v) => {
return {
x: v,
y: dnorm(v, 0, 1)
};
}),
t_dist30: d3.ticks(-3, 3, 500).map((v) => {
return {
x: v,
y: dist.dt(v, 30)
};
}),
t_dist20: d3.ticks(-3, 3, 500).map((v) => {
return {
x: v,
y: dist.dt(v, 20)
};
}),
t_dist10: d3.ticks(-3, 3, 500).map((v) => {
return {
x: v,
y: dist.dt(v, 10)
};
}),
t_dist7: d3.ticks(-3, 3, 500).map((v) => {
return {
x: v,
y: dist.dt(v, 7)
};
}),
t_dist5: d3.ticks(-3, 3, 500).map((v) => {
return {
x: v,
y: dist.dt(v, 5)
};
}),
uniform: d3.ticks(-2.1, 2.1, 500).map((v) => {
return {
x: v,
y: dist.dunif(v, -2, 2)
};
})
};
}