In [ ]:
# Baby Sleep Analysis
# By Parker Jones
# Released under the MIT License
# This script produces multiple analyses and visualizations of changes in infant sleep data over time.
# It focuses on the following metrics:
# 1. The length of babies' longest sleep periods
# 2. Daytime and nighttime sleep distributions
# 3. Total sleep amounts
# 4. How early into the evening nighttime sleep begins
# Install required packages
!python3 -m pip install pandas numpy seaborn matplotlib statsmodels scipy
# Imports
import time
script_start_time = time.time()
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm
# Set style for all plots
plt.style.use('default') # Using default style instead of seaborn
sns.set_theme() # This will apply seaborn styling
sns.set_palette("husl")
# Create directories if they don't exist
import os
os.makedirs('Data_Output', exist_ok=True)
os.makedirs('Visualizations', exist_ok=True)
# Part 1: Creating and Updating a Sleep Data Table
data_for_author = False
if data_for_author == True:
df_personal_data = pd.read_csv('personal_variables.csv', index_col = 'Variable')
dob = pd.to_datetime(df_personal_data.loc['dob', 'Value'])
path_to_data = df_personal_data.loc['path_to_data', 'Value']
max_night_start_date_limit = True
max_night_start_date = pd.to_datetime(df_personal_data.loc['max_night_start_date', 'Value']).date()
data_output_folder = df_personal_data.loc['data_and_chart_save_path', 'Value']
visualizations_folder = df_personal_data.loc['data_and_chart_save_path', 'Value']
else:
dob = pd.to_datetime('2024-06-16')
path_to_data = 'sleep_dataset.csv'
data_output_folder = 'Data_Output/'
visualizations_folder = 'Visualizations/'
max_night_start_date_limit = False
max_night_start_date = pd.to_datetime('2024-07-13').date()
# Define nighttime start and end hours
night_start_hour = 19
night_end_hour = 7
Requirement already satisfied: pandas in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (2.2.3) Requirement already satisfied: numpy in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (2.2.2) Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (0.13.2) Requirement already satisfied: matplotlib in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (3.10.0) Requirement already satisfied: statsmodels in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (0.14.4) Requirement already satisfied: scipy in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (1.15.1) Requirement already satisfied: python-dateutil>=2.8.2 in /Users/pjones/Library/Python/3.11/lib/python/site-packages (from pandas) (2.9.0.post0) Requirement already satisfied: pytz>=2020.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pandas) (2024.2) Requirement already satisfied: tzdata>=2022.7 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pandas) (2025.1) Requirement already satisfied: contourpy>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib) (1.3.1) Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib) (0.12.1) Requirement already satisfied: fonttools>=4.22.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib) (4.55.8) Requirement already satisfied: kiwisolver>=1.3.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib) (1.4.8) Requirement already satisfied: packaging>=20.0 in /Users/pjones/Library/Python/3.11/lib/python/site-packages (from matplotlib) (24.2) Requirement already satisfied: pillow>=8 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib) (11.1.0) Requirement already satisfied: pyparsing>=2.3.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib) (3.2.1) Requirement already satisfied: patsy>=0.5.6 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from statsmodels) (1.0.1) Requirement already satisfied: six>=1.5 in /Users/pjones/Library/Python/3.11/lib/python/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0) [notice] A new release of pip is available: 23.2.1 -> 25.0 [notice] To update, run: python3 -m pip install --upgrade pip
In [11]:
# Read in the sleep data
df = pd.read_csv(path_to_data)
print("Initial data shape:", df.shape)
print("\nInitial columns:", df.columns.tolist())
# Filter only sleep entries
df = df[df['Type'] == 'Sleep'].copy()
print("\nShape after filtering sleep entries:", df.shape)
# Convert date columns to datetime with error handling
print("\nConverting datetime columns...")
print("Sample 'Start' value:", df['Start'].iloc[0] if not df.empty else "No data")
print("Sample 'End' value:", df['End'].iloc[0] if not df.empty else "No data")
# Convert date columns to datetime with error handling
df['Sleep Start'] = pd.to_datetime(df['Start'])
df['Sleep End'] = pd.to_datetime(df['End'])
print("\nAfter datetime conversion:")
print("Sleep Start dtype:", df['Sleep Start'].dtype)
print("Sleep End dtype:", df['Sleep End'].dtype)
# Convert duration strings to hours
def duration_to_hours(duration_str):
if pd.isna(duration_str):
return None
try:
hours, minutes = map(int, duration_str.split(':'))
return hours + minutes/60
except (ValueError, AttributeError) as e:
print(f"Error converting duration: {duration_str}")
return None
df['Sleep Duration'] = df['Duration'].apply(duration_to_hours)
# Calculate age in days (modified approach)
df['Days Old'] = (df['Sleep Start'] - pd.Timestamp(dob)).dt.total_seconds() / (24 * 60 * 60)
df['Days Old'] = df['Days Old'].astype(int)
df['Weeks Old'] = df['Days Old'] // 7
# Add columns for time of day
df['Start Hour'] = df['Sleep Start'].dt.hour
df['End Hour'] = df['Sleep End'].dt.hour
# Determine if each sleep period is during day or night
def is_night_sleep(row):
start_hour = row['Start Hour']
end_hour = row['End Hour']
# Night sleep starts between night_start_hour and 23:59
# or ends between 00:00 and night_end_hour
return (start_hour >= night_start_hour) or (end_hour <= night_end_hour)
df['Is Night Sleep'] = df.apply(is_night_sleep, axis=1)
# Group by date to find longest sleep period for each day
df_longest_sleep_periods = df.loc[df.groupby(df['Sleep Start'].dt.date)['Sleep Duration'].idxmax()]
df_longest_sleep_periods_by_day = df_longest_sleep_periods[['Days Old', 'Sleep Duration']].rename(
columns={'Sleep Duration': 'Hours'})
print("\nFinal dataset info:")
print(df.info())
print("\nSample of final processed data:")
print(df[['Sleep Start', 'Sleep End', 'Sleep Duration', 'Days Old', 'Weeks Old', 'Is Night Sleep']].head())
Initial data shape: (2776, 8)
Initial columns: ['Type', 'Start', 'End', 'Duration', 'Start Condition', 'Start Location', 'End Condition', 'Notes']
Shape after filtering sleep entries: (667, 8)
Converting datetime columns...
Sample 'Start' value: 2025-01-08 02:00
Sample 'End' value: 2025-01-08 05:50
After datetime conversion:
Sleep Start dtype: datetime64[ns]
Sleep End dtype: datetime64[ns]
Final dataset info:
<class 'pandas.core.frame.DataFrame'>
Index: 667 entries, 4 to 2750
Data columns (total 16 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Type 667 non-null object
1 Start 667 non-null object
2 End 667 non-null object
3 Duration 667 non-null object
4 Start Condition 4 non-null object
5 Start Location 5 non-null object
6 End Condition 4 non-null object
7 Notes 2 non-null object
8 Sleep Start 667 non-null datetime64[ns]
9 Sleep End 667 non-null datetime64[ns]
10 Sleep Duration 667 non-null float64
11 Days Old 667 non-null int64
12 Weeks Old 667 non-null int64
13 Start Hour 667 non-null int32
14 End Hour 667 non-null int32
15 Is Night Sleep 667 non-null bool
dtypes: bool(1), datetime64[ns](2), float64(1), int32(2), int64(2), object(8)
memory usage: 95.0+ KB
None
Sample of final processed data:
Sleep Start Sleep End Sleep Duration Days Old \
4 2025-01-08 02:00:00 2025-01-08 05:50:00 3.833333 206
6 2025-01-07 20:56:00 2025-01-08 01:25:00 4.483333 205
7 2025-01-07 19:38:00 2025-01-07 20:13:00 0.583333 205
10 2025-01-07 16:19:00 2025-01-07 16:54:00 0.583333 205
15 2025-01-07 13:19:00 2025-01-07 13:45:00 0.416667 205
Weeks Old Is Night Sleep
4 29 True
6 29 True
7 29 True
10 29 False
15 29 False
In [15]:
# Calculate daily total sleep hours, split by day/night
daily_sleep = df.groupby([df['Sleep Start'].dt.date, 'Is Night Sleep'])['Sleep Duration'].sum().unstack()
daily_sleep.columns = ['Day Sleep', 'Night Sleep']
daily_sleep['Total Sleep'] = daily_sleep['Day Sleep'] + daily_sleep['Night Sleep']
# Add age information to daily sleep data
daily_sleep.index = pd.to_datetime(daily_sleep.index) # Convert index to datetime
daily_sleep['Days Old'] = (daily_sleep.index - pd.Timestamp(dob)).days # Remove .dt
daily_sleep['Weeks Old'] = daily_sleep['Days Old'] // 7
# Create stacked area plot for day/night sleep distribution
plt.figure(figsize=(12, 6))
plt.fill_between(daily_sleep['Days Old'], 0, daily_sleep['Night Sleep'],
alpha=0.5, label='Night Sleep', color='navy')
plt.fill_between(daily_sleep['Days Old'], daily_sleep['Night Sleep'],
daily_sleep['Total Sleep'], alpha=0.5, label='Day Sleep', color='skyblue')
plt.title('Daily Sleep Distribution: Day vs Night Sleep')
plt.xlabel('Days Old')
plt.ylabel('Hours of Sleep')
plt.legend()
plt.savefig(f'{visualizations_folder}day_night_sleep_distribution.png',
dpi=300,
bbox_inches='tight')
plt.show()
# Calculate weekly averages
weekly_sleep = daily_sleep.groupby('Weeks Old')[['Day Sleep', 'Night Sleep', 'Total Sleep']].agg(
['mean', 'std']).reset_index()
# Create line plot for weekly sleep averages
plt.figure(figsize=(12, 6))
for sleep_type in ['Day Sleep', 'Night Sleep', 'Total Sleep']:
mean_col = (sleep_type, 'mean')
std_col = (sleep_type, 'std')
plt.plot(weekly_sleep['Weeks Old'],
weekly_sleep[mean_col],
label=sleep_type,
linewidth=2)
plt.fill_between(weekly_sleep['Weeks Old'],
weekly_sleep[mean_col] - weekly_sleep[std_col],
weekly_sleep[mean_col] + weekly_sleep[std_col],
alpha=0.2)
plt.title('Weekly Sleep Averages')
plt.xlabel('Weeks Old')
plt.ylabel('Hours of Sleep')
plt.legend()
plt.savefig(f'{visualizations_folder}weekly_sleep_averages.png',
dpi=300,
bbox_inches='tight')
plt.show()
# Create box plots for day/night sleep distribution by week
sleep_long = pd.melt(daily_sleep.reset_index(),
id_vars=['Days Old', 'Weeks Old'],
value_vars=['Day Sleep', 'Night Sleep'],
var_name='Sleep Type',
value_name='Hours')
plt.figure(figsize=(15, 6))
sns.boxplot(data=sleep_long, x='Weeks Old', y='Hours', hue='Sleep Type')
plt.title('Distribution of Day/Night Sleep by Week')
plt.xticks(rotation=45)
plt.savefig(f'{visualizations_folder}sleep_distribution_boxplot.png',
dpi=300,
bbox_inches='tight')
plt.show()
In [16]:
# Statistical Analysis of Sleep Patterns
# This section provides:
# Linear regression analysis of longest sleep periods, showing the trend over time with statistical significance
# Monthly statistics including count, mean, standard deviation, min, and max sleep durations
# Box plots showing the distribution of sleep durations by month
# Week-over-week changes analysis to identify significant changes in sleep patterns
# The code includes:
# Trend analysis with R² values and p-values
# Monthly breakdowns of sleep patterns
# Visualization of distributions
# Analysis of changes between consecutive weeks
# 1. Linear regression for sleep duration trends
from scipy import stats
# Analyze trend in longest sleep periods
slope, intercept, r_value, p_value, std_err = stats.linregress(
df_longest_sleep_periods_by_day['Days Old'],
df_longest_sleep_periods_by_day['Hours']
)
# Create figure for regression analysis
plt.figure(figsize=(12, 6))
plt.scatter(df_longest_sleep_periods_by_day['Days Old'],
df_longest_sleep_periods_by_day['Hours'],
alpha=0.5)
plt.plot(df_longest_sleep_periods_by_day['Days Old'],
intercept + slope * df_longest_sleep_periods_by_day['Days Old'],
'r', label=f'Trend line (R² = {r_value**2:.3f})')
plt.title('Longest Sleep Duration Trend Analysis')
plt.xlabel('Days Old')
plt.ylabel('Hours of Sleep')
plt.legend()
# Add statistical information to plot
stats_text = (f'Slope: {slope:.3f} hours/day\n'
f'P-value: {p_value:.3f}\n'
f'R²: {r_value**2:.3f}')
plt.text(0.05, 0.95, stats_text,
transform=plt.gca().transAxes,
bbox=dict(facecolor='white', alpha=0.8))
plt.savefig(f'{visualizations_folder}sleep_duration_trend.png',
dpi=300,
bbox_inches='tight')
plt.show()
# 2. Monthly statistics
df['Month'] = df['Sleep Start'].dt.to_period('M')
monthly_stats = df.groupby('Month')['Sleep Duration'].agg([
'count',
'mean',
'std',
'min',
'max'
]).round(2)
print("\nMonthly Sleep Statistics:")
print(monthly_stats)
# Visualize monthly distributions
plt.figure(figsize=(15, 6))
sns.boxplot(data=df, x=df['Sleep Start'].dt.strftime('%Y-%m'), y='Sleep Duration')
plt.xticks(rotation=45)
plt.title('Monthly Sleep Duration Distributions')
plt.xlabel('Month')
plt.ylabel('Hours of Sleep')
plt.savefig(f'{visualizations_folder}monthly_sleep_distributions.png',
dpi=300,
bbox_inches='tight')
plt.show()
# 3. Week-over-week changes
weekly_avg = df.groupby('Weeks Old')['Sleep Duration'].mean()
week_over_week_change = weekly_avg.diff()
plt.figure(figsize=(12, 6))
plt.bar(week_over_week_change.index, week_over_week_change.values)
plt.axhline(y=0, color='r', linestyle='-', alpha=0.3)
plt.title('Week-over-Week Changes in Average Sleep Duration')
plt.xlabel('Week Number')
plt.ylabel('Change in Average Sleep Duration (Hours)')
plt.savefig(f'{visualizations_folder}week_over_week_changes.png',
dpi=300,
bbox_inches='tight')
plt.show()
# Print summary statistics
print("\nSummary Statistics:")
print(f"Average change in sleep duration between weeks: {week_over_week_change.mean():.2f} hours")
print(f"Largest increase: {week_over_week_change.max():.2f} hours (Week {week_over_week_change.idxmax()})")
print(f"Largest decrease: {week_over_week_change.min():.2f} hours (Week {week_over_week_change.idxmin()})")
Monthly Sleep Statistics:
count mean std min max
Month
2024-08 15 1.71 0.52 0.35 2.42
2024-09 23 1.96 1.35 0.25 4.97
2024-10 200 2.07 1.45 0.18 8.13
2024-11 182 2.40 2.17 0.15 10.50
2024-12 198 2.19 2.12 0.08 10.85
2025-01 49 1.88 1.71 0.28 6.23
Summary Statistics: Average change in sleep duration between weeks: 0.01 hours Largest increase: 1.65 hours (Week 14) Largest decrease: -0.94 hours (Week 12)
In [17]:
# Sleep Onset Time Analysis
# Scatter plot of evening sleep onset times with trend line
# Weekly average sleep onset times and their variability
# Box plots showing the distribution of onset times by week
# Analysis of sleep onset consistency (variability) over time
# Key features:
# Filters for evening sleep periods (after 6 PM)
# Shows actual clock times instead of decimal hours
# Calculates trend in minutes per day
# Measures consistency of sleep onset timing
# Provides weekly statistics for onset times and durations
# Convert sleep start times to hours since start of day (for easier plotting)
df['Sleep Start Time'] = df['Sleep Start'].dt.hour + df['Sleep Start'].dt.minute / 60
# Filter for evening sleep onset (between 6 PM and midnight)
evening_sleep = df[
(df['Sleep Start Time'] >= 18) &
(df['Sleep Start Time'] <= 24) &
(df['Sleep Duration'] >= 3) # Filter for longer sleep periods
].copy()
# Create scatter plot of evening sleep onset times
plt.figure(figsize=(12, 6))
plt.scatter(evening_sleep['Days Old'],
evening_sleep['Sleep Start Time'],
alpha=0.5)
# Add trend line
slope, intercept, r_value, p_value, std_err = stats.linregress(
evening_sleep['Days Old'],
evening_sleep['Sleep Start Time']
)
x = evening_sleep['Days Old']
plt.plot(x, intercept + slope * x, 'r',
label=f'Trend line (R² = {r_value**2:.3f})')
# Customize y-axis to show actual times
plt.yticks(range(18, 25),
[f'{i:02d}:00' for i in range(18, 25)])
plt.title('Evening Sleep Onset Times')
plt.xlabel('Days Old')
plt.ylabel('Time of Day')
plt.legend()
# Add statistical information
stats_text = (f'Trend: {slope*60:.1f} minutes per day\n'
f'P-value: {p_value:.3f}')
plt.text(0.05, 0.95, stats_text,
transform=plt.gca().transAxes,
bbox=dict(facecolor='white', alpha=0.8))
plt.savefig(f'{visualizations_folder}sleep_onset_trend.png',
dpi=300,
bbox_inches='tight')
plt.show()
# Calculate weekly average sleep onset times
weekly_onset = evening_sleep.groupby('Weeks Old').agg({
'Sleep Start Time': ['mean', 'std', 'count'],
'Sleep Duration': 'mean'
}).round(2)
print("\nWeekly Sleep Onset Statistics:")
print(weekly_onset)
# Visualize distribution of sleep onset times by week
plt.figure(figsize=(15, 6))
sns.boxplot(data=evening_sleep,
x='Weeks Old',
y='Sleep Start Time')
plt.yticks(range(18, 25),
[f'{i:02d}:00' for i in range(18, 25)])
plt.title('Distribution of Evening Sleep Onset Times by Week')
plt.xlabel('Weeks Old')
plt.ylabel('Time of Day')
plt.savefig(f'{visualizations_folder}sleep_onset_distribution.png',
dpi=300,
bbox_inches='tight')
plt.show()
# Calculate consistency metrics
evening_sleep['Minutes Past 6PM'] = (evening_sleep['Sleep Start Time'] - 18) * 60
# Calculate weekly variability in sleep onset
weekly_variability = evening_sleep.groupby('Weeks Old')['Minutes Past 6PM'].agg([
'std',
'mean',
'count'
]).round(2)
print("\nWeekly Sleep Onset Variability (in minutes):")
print(weekly_variability)
# Visualize onset time variability
plt.figure(figsize=(12, 6))
plt.bar(weekly_variability.index,
weekly_variability['std'],
alpha=0.6)
plt.title('Weekly Variability in Sleep Onset Time')
plt.xlabel('Weeks Old')
plt.ylabel('Standard Deviation (minutes)')
plt.savefig(f'{visualizations_folder}sleep_onset_variability.png',
dpi=300,
bbox_inches='tight')
plt.show()
Weekly Sleep Onset Statistics:
Sleep Start Time Sleep Duration
mean std count mean
Weeks Old
14 21.83 NaN 1 4.97
15 21.15 0.88 7 4.35
16 21.28 0.71 4 5.45
17 22.57 0.97 5 4.45
18 22.25 0.65 7 5.34
19 21.69 0.62 7 4.74
20 20.16 1.35 8 5.44
21 20.45 0.50 6 7.36
22 20.75 0.47 7 5.98
23 20.70 0.60 7 6.80
24 20.56 0.36 7 6.07
25 20.32 1.07 7 6.51
26 20.61 0.64 6 6.48
27 20.88 1.08 7 5.58
28 19.90 0.39 6 5.64
29 21.14 0.53 3 4.99
Weekly Sleep Onset Variability (in minutes):
std mean count
Weeks Old
14 NaN 230.00 1
15 52.81 189.29 7
16 42.53 196.75 4
17 58.44 274.20 5
18 39.14 254.86 7
19 37.17 221.57 7
20 80.90 129.62 8
21 30.10 146.83 6
22 28.19 165.00 7
23 35.89 161.71 7
24 21.74 153.86 7
25 63.99 139.00 7
26 38.26 156.33 6
27 64.65 173.00 7
28 23.65 113.83 6
29 31.94 188.67 3
In [19]:
# Sleep Consistency and Variability Analysis
# 1. Daily sleep pattern consistency
def calculate_daily_pattern_metrics(group):
return pd.Series({
'number_of_naps': len(group[~group['Is Night Sleep']]),
'total_nap_hours': group[~group['Is Night Sleep']]['Sleep Duration'].sum(),
'night_sleep_hours': group[group['Is Night Sleep']]['Sleep Duration'].sum(),
'sleep_episodes': len(group),
'avg_episode_length': group['Sleep Duration'].mean()
})
daily_patterns = df.groupby([df['Sleep Start'].dt.date]).apply(calculate_daily_pattern_metrics)
daily_patterns['Days Old'] = (pd.to_datetime(daily_patterns.index) - pd.Timestamp(dob)).days
daily_patterns['Weeks Old'] = daily_patterns['Days Old'] // 7 # Add this line
# Visualize number of naps over time
plt.figure(figsize=(12, 6))
plt.scatter(daily_patterns['Days Old'], daily_patterns['number_of_naps'], alpha=0.5)
plt.title('Number of Naps per Day')
plt.xlabel('Days Old')
plt.ylabel('Number of Naps')
# Add trend line
slope, intercept, r_value, p_value, std_err = stats.linregress(
daily_patterns['Days Old'],
daily_patterns['number_of_naps']
)
x = daily_patterns['Days Old']
plt.plot(x, intercept + slope * x, 'r',
label=f'Trend (R² = {r_value**2:.3f})')
plt.legend()
plt.savefig(f'{visualizations_folder}naps_per_day.png',
dpi=300,
bbox_inches='tight')
plt.show()
# 2. Sleep episode timing consistency
plt.figure(figsize=(12, 6))
plt.scatter(df['Days Old'], df['Sleep Start Time'], alpha=0.3)
plt.title('Sleep Episode Start Times')
plt.xlabel('Days Old')
plt.ylabel('Time of Day')
plt.yticks(range(0, 24), [f'{i:02d}:00' for i in range(0, 24)])
plt.savefig(f'{visualizations_folder}sleep_timing_scatter.png',
dpi=300,
bbox_inches='tight')
plt.show()
# 3. Weekly sleep pattern stability
weekly_patterns = daily_patterns.groupby('Weeks Old').agg({ # Changed this line
'number_of_naps': ['mean', 'std'],
'total_nap_hours': ['mean', 'std'],
'night_sleep_hours': ['mean', 'std'],
'sleep_episodes': ['mean', 'std'],
'avg_episode_length': ['mean', 'std']
}).round(2)
print("\nWeekly Sleep Pattern Statistics:")
print(weekly_patterns)
# 4. Visualize sleep pattern stability
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Weekly Sleep Pattern Stability', fontsize=16)
# Plot 1: Nap consistency
axes[0, 0].errorbar(weekly_patterns.index,
weekly_patterns[('number_of_naps', 'mean')],
yerr=weekly_patterns[('number_of_naps', 'std')],
fmt='o-')
axes[0, 0].set_title('Number of Naps')
axes[0, 0].set_xlabel('Weeks Old')
axes[0, 0].set_ylabel('Average Naps per Day')
# Plot 2: Nap hours
axes[0, 1].errorbar(weekly_patterns.index,
weekly_patterns[('total_nap_hours', 'mean')],
yerr=weekly_patterns[('total_nap_hours', 'std')],
fmt='o-')
axes[0, 1].set_title('Daily Nap Hours')
axes[0, 1].set_xlabel('Weeks Old')
axes[0, 1].set_ylabel('Hours')
# Plot 3: Night sleep
axes[1, 0].errorbar(weekly_patterns.index,
weekly_patterns[('night_sleep_hours', 'mean')],
yerr=weekly_patterns[('night_sleep_hours', 'std')],
fmt='o-')
axes[1, 0].set_title('Night Sleep Hours')
axes[1, 0].set_xlabel('Weeks Old')
axes[1, 0].set_ylabel('Hours')
# Plot 4: Average episode length
axes[1, 1].errorbar(weekly_patterns.index,
weekly_patterns[('avg_episode_length', 'mean')],
yerr=weekly_patterns[('avg_episode_length', 'std')],
fmt='o-')
axes[1, 1].set_title('Average Sleep Episode Length')
axes[1, 1].set_xlabel('Weeks Old')
axes[1, 1].set_ylabel('Hours')
plt.tight_layout()
plt.savefig(f'{visualizations_folder}sleep_pattern_stability.png',
dpi=300,
bbox_inches='tight')
plt.show()
# Calculate and print overall consistency metrics
print("\nOverall Sleep Pattern Consistency Metrics:")
print(f"Average number of naps per day: {daily_patterns['number_of_naps'].mean():.2f} ± {daily_patterns['number_of_naps'].std():.2f}")
print(f"Average total nap hours: {daily_patterns['total_nap_hours'].mean():.2f} ± {daily_patterns['total_nap_hours'].std():.2f}")
print(f"Average night sleep hours: {daily_patterns['night_sleep_hours'].mean():.2f} ± {daily_patterns['night_sleep_hours'].std():.2f}")
Weekly Sleep Pattern Statistics:
number_of_naps total_nap_hours night_sleep_hours \
mean std mean std mean std
Weeks Old
9 3.33 0.58 5.47 1.92 2.39 2.50
10 1.00 NaN 2.00 NaN 0.00 NaN
11 0.00 NaN 0.00 NaN 2.92 NaN
12 1.00 NaN 0.52 NaN 0.00 NaN
14 2.00 1.41 2.93 0.75 2.48 3.51
15 4.14 0.90 5.89 1.33 8.16 1.47
16 3.43 1.72 4.90 2.59 6.90 2.61
17 3.71 1.11 6.03 2.22 7.59 2.83
18 4.14 0.38 5.99 1.56 8.32 1.25
19 4.29 0.49 6.33 1.89 7.50 2.47
20 3.57 0.98 4.87 2.14 9.51 2.50
21 3.86 1.21 4.29 1.42 10.62 2.42
22 3.57 0.98 5.40 1.26 9.15 2.09
23 4.29 0.49 5.65 1.14 8.87 1.77
24 4.86 0.90 4.58 1.67 9.77 2.38
25 3.71 0.76 4.90 1.47 9.30 2.18
26 4.43 0.98 5.04 1.13 9.13 2.03
27 4.00 0.58 3.78 1.16 9.65 1.78
28 3.57 0.53 3.25 0.37 9.72 1.49
29 3.00 2.00 2.54 1.79 7.88 3.59
sleep_episodes avg_episode_length
mean std mean std
Weeks Old
9 4.67 1.53 1.61 0.40
10 1.00 NaN 2.00 NaN
11 2.00 NaN 1.46 NaN
12 1.00 NaN 0.52 NaN
14 2.50 2.12 2.25 0.21
15 6.86 1.35 2.07 0.26
16 5.86 2.04 2.03 0.24
17 6.86 1.95 2.06 0.45
18 6.71 0.49 2.15 0.31
19 6.43 0.79 2.17 0.26
20 5.57 0.98 2.62 0.43
21 6.14 1.77 2.60 0.87
22 6.14 1.35 2.43 0.44
23 6.14 1.07 2.40 0.48
24 7.29 1.38 2.03 0.55
25 5.57 1.27 2.66 0.65
26 6.43 1.27 2.32 0.82
27 6.57 0.53 2.04 0.29
28 6.14 0.69 2.14 0.39
29 5.75 3.20 2.24 1.08
Overall Sleep Pattern Consistency Metrics: Average number of naps per day: 3.79 ± 1.15 Average total nap hours: 4.83 ± 1.90 Average night sleep hours: 8.33 ± 2.90
In [20]:
# Comparison to Typical Baby Sleep Guidelines
# Define typical sleep guidelines by age (in months)
# Source: American Academy of Pediatrics (AAP) and National Sleep Foundation
sleep_guidelines = pd.DataFrame({
'Age_Months': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
'Total_Sleep_Min': [14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13], # minimum hours
'Total_Sleep_Max': [17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15], # maximum hours
'Naps_Typical': [6, 5, 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2] # typical number of naps
})
# Calculate baby's age in months for each day
daily_patterns['Months Old'] = daily_patterns['Days Old'] // 30
# Calculate monthly averages for comparison
monthly_averages = daily_patterns.groupby('Months Old').agg({
'total_nap_hours': 'mean',
'night_sleep_hours': 'mean',
'number_of_naps': 'mean'
}).round(2)
monthly_averages['total_sleep'] = monthly_averages['total_nap_hours'] + monthly_averages['night_sleep_hours']
# Create comparison visualization
plt.figure(figsize=(12, 6))
# Plot guidelines range
plt.fill_between(sleep_guidelines['Age_Months'],
sleep_guidelines['Total_Sleep_Min'],
sleep_guidelines['Total_Sleep_Max'],
alpha=0.3,
color='gray',
label='Recommended Range')
# Plot actual total sleep
plt.plot(monthly_averages.index,
monthly_averages['total_sleep'],
'bo-',
label='Actual Total Sleep',
linewidth=2)
plt.title('Total Sleep Hours Compared to Guidelines')
plt.xlabel('Months Old')
plt.ylabel('Hours of Sleep per Day')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig(f'{visualizations_folder}sleep_guidelines_comparison.png',
dpi=300,
bbox_inches='tight')
plt.show()
# Compare number of naps
plt.figure(figsize=(12, 6))
plt.plot(sleep_guidelines['Age_Months'],
sleep_guidelines['Naps_Typical'],
'r--',
label='Typical Naps',
linewidth=2)
plt.plot(monthly_averages.index,
monthly_averages['number_of_naps'],
'bo-',
label='Actual Naps',
linewidth=2)
plt.title('Number of Naps Compared to Typical Patterns')
plt.xlabel('Months Old')
plt.ylabel('Number of Naps per Day')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig(f'{visualizations_folder}naps_guidelines_comparison.png',
dpi=300,
bbox_inches='tight')
plt.show()
# Print detailed comparison
print("\nMonthly Sleep Pattern Comparison to Guidelines:")
print("---------------------------------------------")
for month in monthly_averages.index:
if month in sleep_guidelines['Age_Months'].values:
guideline = sleep_guidelines[sleep_guidelines['Age_Months'] == month].iloc[0]
actual = monthly_averages.loc[month]
status = "WITHIN RANGE"
if actual['total_sleep'] < guideline['Total_Sleep_Min']:
status = "BELOW RANGE"
elif actual['total_sleep'] > guideline['Total_Sleep_Max']:
status = "ABOVE RANGE"
print(f"\nMonth {month}:")
print(f"Total Sleep: {actual['total_sleep']:.1f} hours (Guideline: {guideline['Total_Sleep_Min']}-{guideline['Total_Sleep_Max']} hours) - {status}")
print(f"Number of Naps: {actual['number_of_naps']:.1f} (Typical: {guideline['Naps_Typical']})")
print(f"Night Sleep: {actual['night_sleep_hours']:.1f} hours")
print(f"Nap Hours: {actual['total_nap_hours']:.1f} hours")
# Calculate percentage of days within guidelines
def within_guidelines(row):
month = row['Months Old']
if month in sleep_guidelines['Age_Months'].values:
guideline = sleep_guidelines[sleep_guidelines['Age_Months'] == month].iloc[0]
total_sleep = row['total_nap_hours'] + row['night_sleep_hours']
return guideline['Total_Sleep_Min'] <= total_sleep <= guideline['Total_Sleep_Max']
return None
daily_patterns['within_guidelines'] = daily_patterns.apply(within_guidelines, axis=1)
compliance_rate = (daily_patterns['within_guidelines'].sum() /
daily_patterns['within_guidelines'].count() * 100)
print(f"\nOverall Compliance with Sleep Guidelines: {compliance_rate:.1f}%")
Monthly Sleep Pattern Comparison to Guidelines: --------------------------------------------- Month 2: Total Sleep: 4.8 hours (Guideline: 14-17 hours) - BELOW RANGE Number of Naps: 2.0 (Typical: 4) Night Sleep: 1.7 hours Nap Hours: 3.2 hours Month 3: Total Sleep: 12.0 hours (Guideline: 14-17 hours) - BELOW RANGE Number of Naps: 3.6 (Typical: 4) Night Sleep: 6.7 hours Nap Hours: 5.2 hours Month 4: Total Sleep: 14.0 hours (Guideline: 14-17 hours) - WITHIN RANGE Number of Naps: 3.9 (Typical: 3) Night Sleep: 8.5 hours Nap Hours: 5.5 hours Month 5: Total Sleep: 14.8 hours (Guideline: 14-17 hours) - WITHIN RANGE Number of Naps: 4.1 (Typical: 3) Night Sleep: 9.7 hours Nap Hours: 5.1 hours Month 6: Total Sleep: 13.0 hours (Guideline: 14-16 hours) - BELOW RANGE Number of Naps: 3.9 (Typical: 3) Night Sleep: 9.1 hours Nap Hours: 3.9 hours Overall Compliance with Sleep Guidelines: 35.5%
In [ ]: