In [ ]:
# Baby Sleep Analysis

# By Parker Jones
# Released under the MIT License

# This script produces multiple analyses and visualizations of changes in infant sleep data over time. 
# It focuses on the following metrics:

# 1. The length of babies' longest sleep periods
# 2. Daytime and nighttime sleep distributions
# 3. Total sleep amounts
# 4. How early into the evening nighttime sleep begins

# Install required packages
!python3 -m pip install pandas numpy seaborn matplotlib statsmodels scipy

# Imports
import time
script_start_time = time.time()
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm

# Set style for all plots
plt.style.use('default')  # Using default style instead of seaborn
sns.set_theme()  # This will apply seaborn styling
sns.set_palette("husl")

# Create directories if they don't exist
import os
os.makedirs('Data_Output', exist_ok=True)
os.makedirs('Visualizations', exist_ok=True)

# Part 1: Creating and Updating a Sleep Data Table
data_for_author = False
if data_for_author == True:
    df_personal_data = pd.read_csv('personal_variables.csv', index_col = 'Variable')
    dob = pd.to_datetime(df_personal_data.loc['dob', 'Value'])
    path_to_data = df_personal_data.loc['path_to_data', 'Value']
    max_night_start_date_limit = True
    max_night_start_date = pd.to_datetime(df_personal_data.loc['max_night_start_date', 'Value']).date()
    data_output_folder = df_personal_data.loc['data_and_chart_save_path', 'Value']
    visualizations_folder = df_personal_data.loc['data_and_chart_save_path', 'Value']
else:
    dob = pd.to_datetime('2024-06-16')
    path_to_data = 'sleep_dataset.csv'
    data_output_folder = 'Data_Output/'
    visualizations_folder = 'Visualizations/'
    max_night_start_date_limit = False
    max_night_start_date = pd.to_datetime('2024-07-13').date()

# Define nighttime start and end hours
night_start_hour = 19
night_end_hour = 7
Requirement already satisfied: pandas in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (2.2.3)
Requirement already satisfied: numpy in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (2.2.2)
Requirement already satisfied: seaborn in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (0.13.2)
Requirement already satisfied: matplotlib in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (3.10.0)
Requirement already satisfied: statsmodels in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (0.14.4)
Requirement already satisfied: scipy in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (1.15.1)
Requirement already satisfied: python-dateutil>=2.8.2 in /Users/pjones/Library/Python/3.11/lib/python/site-packages (from pandas) (2.9.0.post0)
Requirement already satisfied: pytz>=2020.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pandas) (2024.2)
Requirement already satisfied: tzdata>=2022.7 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from pandas) (2025.1)
Requirement already satisfied: contourpy>=1.0.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib) (1.3.1)
Requirement already satisfied: cycler>=0.10 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib) (0.12.1)
Requirement already satisfied: fonttools>=4.22.0 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib) (4.55.8)
Requirement already satisfied: kiwisolver>=1.3.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib) (1.4.8)
Requirement already satisfied: packaging>=20.0 in /Users/pjones/Library/Python/3.11/lib/python/site-packages (from matplotlib) (24.2)
Requirement already satisfied: pillow>=8 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib) (11.1.0)
Requirement already satisfied: pyparsing>=2.3.1 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from matplotlib) (3.2.1)
Requirement already satisfied: patsy>=0.5.6 in /Library/Frameworks/Python.framework/Versions/3.11/lib/python3.11/site-packages (from statsmodels) (1.0.1)
Requirement already satisfied: six>=1.5 in /Users/pjones/Library/Python/3.11/lib/python/site-packages (from python-dateutil>=2.8.2->pandas) (1.17.0)

[notice] A new release of pip is available: 23.2.1 -> 25.0
[notice] To update, run: python3 -m pip install --upgrade pip
In [11]:
# Read in the sleep data
df = pd.read_csv(path_to_data)

print("Initial data shape:", df.shape)
print("\nInitial columns:", df.columns.tolist())

# Filter only sleep entries
df = df[df['Type'] == 'Sleep'].copy()
print("\nShape after filtering sleep entries:", df.shape)

# Convert date columns to datetime with error handling
print("\nConverting datetime columns...")
print("Sample 'Start' value:", df['Start'].iloc[0] if not df.empty else "No data")
print("Sample 'End' value:", df['End'].iloc[0] if not df.empty else "No data")

# Convert date columns to datetime with error handling
df['Sleep Start'] = pd.to_datetime(df['Start'])
df['Sleep End'] = pd.to_datetime(df['End'])

print("\nAfter datetime conversion:")
print("Sleep Start dtype:", df['Sleep Start'].dtype)
print("Sleep End dtype:", df['Sleep End'].dtype)

# Convert duration strings to hours
def duration_to_hours(duration_str):
    if pd.isna(duration_str):
        return None
    try:
        hours, minutes = map(int, duration_str.split(':'))
        return hours + minutes/60
    except (ValueError, AttributeError) as e:
        print(f"Error converting duration: {duration_str}")
        return None

df['Sleep Duration'] = df['Duration'].apply(duration_to_hours)

# Calculate age in days (modified approach)
df['Days Old'] = (df['Sleep Start'] - pd.Timestamp(dob)).dt.total_seconds() / (24 * 60 * 60)
df['Days Old'] = df['Days Old'].astype(int)
df['Weeks Old'] = df['Days Old'] // 7

# Add columns for time of day
df['Start Hour'] = df['Sleep Start'].dt.hour
df['End Hour'] = df['Sleep End'].dt.hour

# Determine if each sleep period is during day or night
def is_night_sleep(row):
    start_hour = row['Start Hour']
    end_hour = row['End Hour']
    
    # Night sleep starts between night_start_hour and 23:59
    # or ends between 00:00 and night_end_hour
    return (start_hour >= night_start_hour) or (end_hour <= night_end_hour)

df['Is Night Sleep'] = df.apply(is_night_sleep, axis=1)

# Group by date to find longest sleep period for each day
df_longest_sleep_periods = df.loc[df.groupby(df['Sleep Start'].dt.date)['Sleep Duration'].idxmax()]
df_longest_sleep_periods_by_day = df_longest_sleep_periods[['Days Old', 'Sleep Duration']].rename(
    columns={'Sleep Duration': 'Hours'})

print("\nFinal dataset info:")
print(df.info())
print("\nSample of final processed data:")
print(df[['Sleep Start', 'Sleep End', 'Sleep Duration', 'Days Old', 'Weeks Old', 'Is Night Sleep']].head())
Initial data shape: (2776, 8)

Initial columns: ['Type', 'Start', 'End', 'Duration', 'Start Condition', 'Start Location', 'End Condition', 'Notes']

Shape after filtering sleep entries: (667, 8)

Converting datetime columns...
Sample 'Start' value: 2025-01-08 02:00
Sample 'End' value: 2025-01-08 05:50

After datetime conversion:
Sleep Start dtype: datetime64[ns]
Sleep End dtype: datetime64[ns]

Final dataset info:
<class 'pandas.core.frame.DataFrame'>
Index: 667 entries, 4 to 2750
Data columns (total 16 columns):
 #   Column           Non-Null Count  Dtype         
---  ------           --------------  -----         
 0   Type             667 non-null    object        
 1   Start            667 non-null    object        
 2   End              667 non-null    object        
 3   Duration         667 non-null    object        
 4   Start Condition  4 non-null      object        
 5   Start Location   5 non-null      object        
 6   End Condition    4 non-null      object        
 7   Notes            2 non-null      object        
 8   Sleep Start      667 non-null    datetime64[ns]
 9   Sleep End        667 non-null    datetime64[ns]
 10  Sleep Duration   667 non-null    float64       
 11  Days Old         667 non-null    int64         
 12  Weeks Old        667 non-null    int64         
 13  Start Hour       667 non-null    int32         
 14  End Hour         667 non-null    int32         
 15  Is Night Sleep   667 non-null    bool          
dtypes: bool(1), datetime64[ns](2), float64(1), int32(2), int64(2), object(8)
memory usage: 95.0+ KB
None

Sample of final processed data:
           Sleep Start           Sleep End  Sleep Duration  Days Old  \
4  2025-01-08 02:00:00 2025-01-08 05:50:00        3.833333       206   
6  2025-01-07 20:56:00 2025-01-08 01:25:00        4.483333       205   
7  2025-01-07 19:38:00 2025-01-07 20:13:00        0.583333       205   
10 2025-01-07 16:19:00 2025-01-07 16:54:00        0.583333       205   
15 2025-01-07 13:19:00 2025-01-07 13:45:00        0.416667       205   

    Weeks Old  Is Night Sleep  
4          29            True  
6          29            True  
7          29            True  
10         29           False  
15         29           False  
In [15]:
# Calculate daily total sleep hours, split by day/night
daily_sleep = df.groupby([df['Sleep Start'].dt.date, 'Is Night Sleep'])['Sleep Duration'].sum().unstack()
daily_sleep.columns = ['Day Sleep', 'Night Sleep']
daily_sleep['Total Sleep'] = daily_sleep['Day Sleep'] + daily_sleep['Night Sleep']

# Add age information to daily sleep data
daily_sleep.index = pd.to_datetime(daily_sleep.index)  # Convert index to datetime
daily_sleep['Days Old'] = (daily_sleep.index - pd.Timestamp(dob)).days  # Remove .dt
daily_sleep['Weeks Old'] = daily_sleep['Days Old'] // 7

# Create stacked area plot for day/night sleep distribution
plt.figure(figsize=(12, 6))
plt.fill_between(daily_sleep['Days Old'], 0, daily_sleep['Night Sleep'], 
                 alpha=0.5, label='Night Sleep', color='navy')
plt.fill_between(daily_sleep['Days Old'], daily_sleep['Night Sleep'], 
                 daily_sleep['Total Sleep'], alpha=0.5, label='Day Sleep', color='skyblue')

plt.title('Daily Sleep Distribution: Day vs Night Sleep')
plt.xlabel('Days Old')
plt.ylabel('Hours of Sleep')
plt.legend()

plt.savefig(f'{visualizations_folder}day_night_sleep_distribution.png', 
            dpi=300, 
            bbox_inches='tight')
plt.show()

# Calculate weekly averages
weekly_sleep = daily_sleep.groupby('Weeks Old')[['Day Sleep', 'Night Sleep', 'Total Sleep']].agg(
    ['mean', 'std']).reset_index()

# Create line plot for weekly sleep averages
plt.figure(figsize=(12, 6))
for sleep_type in ['Day Sleep', 'Night Sleep', 'Total Sleep']:
    mean_col = (sleep_type, 'mean')
    std_col = (sleep_type, 'std')
    
    plt.plot(weekly_sleep['Weeks Old'], 
            weekly_sleep[mean_col], 
            label=sleep_type,
            linewidth=2)
    
    plt.fill_between(weekly_sleep['Weeks Old'],
                     weekly_sleep[mean_col] - weekly_sleep[std_col],
                     weekly_sleep[mean_col] + weekly_sleep[std_col],
                     alpha=0.2)

plt.title('Weekly Sleep Averages')
plt.xlabel('Weeks Old')
plt.ylabel('Hours of Sleep')
plt.legend()

plt.savefig(f'{visualizations_folder}weekly_sleep_averages.png', 
            dpi=300, 
            bbox_inches='tight')
plt.show()

# Create box plots for day/night sleep distribution by week
sleep_long = pd.melt(daily_sleep.reset_index(), 
                     id_vars=['Days Old', 'Weeks Old'], 
                     value_vars=['Day Sleep', 'Night Sleep'],
                     var_name='Sleep Type', 
                     value_name='Hours')

plt.figure(figsize=(15, 6))
sns.boxplot(data=sleep_long, x='Weeks Old', y='Hours', hue='Sleep Type')
plt.title('Distribution of Day/Night Sleep by Week')
plt.xticks(rotation=45)

plt.savefig(f'{visualizations_folder}sleep_distribution_boxplot.png', 
            dpi=300, 
            bbox_inches='tight')
plt.show()
No description has been provided for this image
No description has been provided for this image
No description has been provided for this image
In [16]:
# Statistical Analysis of Sleep Patterns
# This section provides:
# Linear regression analysis of longest sleep periods, showing the trend over time with statistical significance
# Monthly statistics including count, mean, standard deviation, min, and max sleep durations
# Box plots showing the distribution of sleep durations by month
# Week-over-week changes analysis to identify significant changes in sleep patterns
# The code includes:
# Trend analysis with R² values and p-values
# Monthly breakdowns of sleep patterns
# Visualization of distributions
# Analysis of changes between consecutive weeks
# 1. Linear regression for sleep duration trends
from scipy import stats

# Analyze trend in longest sleep periods
slope, intercept, r_value, p_value, std_err = stats.linregress(
    df_longest_sleep_periods_by_day['Days Old'],
    df_longest_sleep_periods_by_day['Hours']
)

# Create figure for regression analysis
plt.figure(figsize=(12, 6))
plt.scatter(df_longest_sleep_periods_by_day['Days Old'], 
           df_longest_sleep_periods_by_day['Hours'], 
           alpha=0.5)
plt.plot(df_longest_sleep_periods_by_day['Days Old'], 
         intercept + slope * df_longest_sleep_periods_by_day['Days Old'], 
         'r', label=f'Trend line (R² = {r_value**2:.3f})')

plt.title('Longest Sleep Duration Trend Analysis')
plt.xlabel('Days Old')
plt.ylabel('Hours of Sleep')
plt.legend()

# Add statistical information to plot
stats_text = (f'Slope: {slope:.3f} hours/day\n'
             f'P-value: {p_value:.3f}\n'
             f'R²: {r_value**2:.3f}')
plt.text(0.05, 0.95, stats_text, 
         transform=plt.gca().transAxes, 
         bbox=dict(facecolor='white', alpha=0.8))

plt.savefig(f'{visualizations_folder}sleep_duration_trend.png', 
            dpi=300, 
            bbox_inches='tight')
plt.show()

# 2. Monthly statistics
df['Month'] = df['Sleep Start'].dt.to_period('M')
monthly_stats = df.groupby('Month')['Sleep Duration'].agg([
    'count',
    'mean',
    'std',
    'min',
    'max'
]).round(2)

print("\nMonthly Sleep Statistics:")
print(monthly_stats)

# Visualize monthly distributions
plt.figure(figsize=(15, 6))
sns.boxplot(data=df, x=df['Sleep Start'].dt.strftime('%Y-%m'), y='Sleep Duration')
plt.xticks(rotation=45)
plt.title('Monthly Sleep Duration Distributions')
plt.xlabel('Month')
plt.ylabel('Hours of Sleep')

plt.savefig(f'{visualizations_folder}monthly_sleep_distributions.png', 
            dpi=300, 
            bbox_inches='tight')
plt.show()

# 3. Week-over-week changes
weekly_avg = df.groupby('Weeks Old')['Sleep Duration'].mean()
week_over_week_change = weekly_avg.diff()

plt.figure(figsize=(12, 6))
plt.bar(week_over_week_change.index, week_over_week_change.values)
plt.axhline(y=0, color='r', linestyle='-', alpha=0.3)
plt.title('Week-over-Week Changes in Average Sleep Duration')
plt.xlabel('Week Number')
plt.ylabel('Change in Average Sleep Duration (Hours)')

plt.savefig(f'{visualizations_folder}week_over_week_changes.png', 
            dpi=300, 
            bbox_inches='tight')
plt.show()

# Print summary statistics
print("\nSummary Statistics:")
print(f"Average change in sleep duration between weeks: {week_over_week_change.mean():.2f} hours")
print(f"Largest increase: {week_over_week_change.max():.2f} hours (Week {week_over_week_change.idxmax()})")
print(f"Largest decrease: {week_over_week_change.min():.2f} hours (Week {week_over_week_change.idxmin()})")
No description has been provided for this image
Monthly Sleep Statistics:
         count  mean   std   min    max
Month                                  
2024-08     15  1.71  0.52  0.35   2.42
2024-09     23  1.96  1.35  0.25   4.97
2024-10    200  2.07  1.45  0.18   8.13
2024-11    182  2.40  2.17  0.15  10.50
2024-12    198  2.19  2.12  0.08  10.85
2025-01     49  1.88  1.71  0.28   6.23
No description has been provided for this image
No description has been provided for this image
Summary Statistics:
Average change in sleep duration between weeks: 0.01 hours
Largest increase: 1.65 hours (Week 14)
Largest decrease: -0.94 hours (Week 12)
In [17]:
# Sleep Onset Time Analysis
# Scatter plot of evening sleep onset times with trend line
# Weekly average sleep onset times and their variability
# Box plots showing the distribution of onset times by week
# Analysis of sleep onset consistency (variability) over time
# Key features:
# Filters for evening sleep periods (after 6 PM)
# Shows actual clock times instead of decimal hours
# Calculates trend in minutes per day
# Measures consistency of sleep onset timing
# Provides weekly statistics for onset times and durations
# Convert sleep start times to hours since start of day (for easier plotting)
df['Sleep Start Time'] = df['Sleep Start'].dt.hour + df['Sleep Start'].dt.minute / 60

# Filter for evening sleep onset (between 6 PM and midnight)
evening_sleep = df[
    (df['Sleep Start Time'] >= 18) & 
    (df['Sleep Start Time'] <= 24) & 
    (df['Sleep Duration'] >= 3)  # Filter for longer sleep periods
].copy()

# Create scatter plot of evening sleep onset times
plt.figure(figsize=(12, 6))
plt.scatter(evening_sleep['Days Old'], 
           evening_sleep['Sleep Start Time'], 
           alpha=0.5)

# Add trend line
slope, intercept, r_value, p_value, std_err = stats.linregress(
    evening_sleep['Days Old'],
    evening_sleep['Sleep Start Time']
)
x = evening_sleep['Days Old']
plt.plot(x, intercept + slope * x, 'r', 
         label=f'Trend line (R² = {r_value**2:.3f})')

# Customize y-axis to show actual times
plt.yticks(range(18, 25), 
          [f'{i:02d}:00' for i in range(18, 25)])
plt.title('Evening Sleep Onset Times')
plt.xlabel('Days Old')
plt.ylabel('Time of Day')
plt.legend()

# Add statistical information
stats_text = (f'Trend: {slope*60:.1f} minutes per day\n'
             f'P-value: {p_value:.3f}')
plt.text(0.05, 0.95, stats_text, 
         transform=plt.gca().transAxes, 
         bbox=dict(facecolor='white', alpha=0.8))

plt.savefig(f'{visualizations_folder}sleep_onset_trend.png', 
            dpi=300, 
            bbox_inches='tight')
plt.show()

# Calculate weekly average sleep onset times
weekly_onset = evening_sleep.groupby('Weeks Old').agg({
    'Sleep Start Time': ['mean', 'std', 'count'],
    'Sleep Duration': 'mean'
}).round(2)

print("\nWeekly Sleep Onset Statistics:")
print(weekly_onset)

# Visualize distribution of sleep onset times by week
plt.figure(figsize=(15, 6))
sns.boxplot(data=evening_sleep, 
           x='Weeks Old', 
           y='Sleep Start Time')

plt.yticks(range(18, 25), 
          [f'{i:02d}:00' for i in range(18, 25)])
plt.title('Distribution of Evening Sleep Onset Times by Week')
plt.xlabel('Weeks Old')
plt.ylabel('Time of Day')

plt.savefig(f'{visualizations_folder}sleep_onset_distribution.png', 
            dpi=300, 
            bbox_inches='tight')
plt.show()

# Calculate consistency metrics
evening_sleep['Minutes Past 6PM'] = (evening_sleep['Sleep Start Time'] - 18) * 60

# Calculate weekly variability in sleep onset
weekly_variability = evening_sleep.groupby('Weeks Old')['Minutes Past 6PM'].agg([
    'std',
    'mean',
    'count'
]).round(2)

print("\nWeekly Sleep Onset Variability (in minutes):")
print(weekly_variability)

# Visualize onset time variability
plt.figure(figsize=(12, 6))
plt.bar(weekly_variability.index, 
        weekly_variability['std'], 
        alpha=0.6)
plt.title('Weekly Variability in Sleep Onset Time')
plt.xlabel('Weeks Old')
plt.ylabel('Standard Deviation (minutes)')

plt.savefig(f'{visualizations_folder}sleep_onset_variability.png', 
            dpi=300, 
            bbox_inches='tight')
plt.show()
No description has been provided for this image
Weekly Sleep Onset Statistics:
          Sleep Start Time             Sleep Duration
                      mean   std count           mean
Weeks Old                                            
14                   21.83   NaN     1           4.97
15                   21.15  0.88     7           4.35
16                   21.28  0.71     4           5.45
17                   22.57  0.97     5           4.45
18                   22.25  0.65     7           5.34
19                   21.69  0.62     7           4.74
20                   20.16  1.35     8           5.44
21                   20.45  0.50     6           7.36
22                   20.75  0.47     7           5.98
23                   20.70  0.60     7           6.80
24                   20.56  0.36     7           6.07
25                   20.32  1.07     7           6.51
26                   20.61  0.64     6           6.48
27                   20.88  1.08     7           5.58
28                   19.90  0.39     6           5.64
29                   21.14  0.53     3           4.99
No description has been provided for this image
Weekly Sleep Onset Variability (in minutes):
             std    mean  count
Weeks Old                      
14           NaN  230.00      1
15         52.81  189.29      7
16         42.53  196.75      4
17         58.44  274.20      5
18         39.14  254.86      7
19         37.17  221.57      7
20         80.90  129.62      8
21         30.10  146.83      6
22         28.19  165.00      7
23         35.89  161.71      7
24         21.74  153.86      7
25         63.99  139.00      7
26         38.26  156.33      6
27         64.65  173.00      7
28         23.65  113.83      6
29         31.94  188.67      3
No description has been provided for this image
In [19]:
# Sleep Consistency and Variability Analysis

# 1. Daily sleep pattern consistency
def calculate_daily_pattern_metrics(group):
    return pd.Series({
        'number_of_naps': len(group[~group['Is Night Sleep']]),
        'total_nap_hours': group[~group['Is Night Sleep']]['Sleep Duration'].sum(),
        'night_sleep_hours': group[group['Is Night Sleep']]['Sleep Duration'].sum(),
        'sleep_episodes': len(group),
        'avg_episode_length': group['Sleep Duration'].mean()
    })

daily_patterns = df.groupby([df['Sleep Start'].dt.date]).apply(calculate_daily_pattern_metrics)
daily_patterns['Days Old'] = (pd.to_datetime(daily_patterns.index) - pd.Timestamp(dob)).days
daily_patterns['Weeks Old'] = daily_patterns['Days Old'] // 7  # Add this line

# Visualize number of naps over time
plt.figure(figsize=(12, 6))
plt.scatter(daily_patterns['Days Old'], daily_patterns['number_of_naps'], alpha=0.5)
plt.title('Number of Naps per Day')
plt.xlabel('Days Old')
plt.ylabel('Number of Naps')

# Add trend line
slope, intercept, r_value, p_value, std_err = stats.linregress(
    daily_patterns['Days Old'],
    daily_patterns['number_of_naps']
)
x = daily_patterns['Days Old']
plt.plot(x, intercept + slope * x, 'r', 
         label=f'Trend (R² = {r_value**2:.3f})')
plt.legend()

plt.savefig(f'{visualizations_folder}naps_per_day.png', 
            dpi=300, 
            bbox_inches='tight')
plt.show()

# 2. Sleep episode timing consistency
plt.figure(figsize=(12, 6))
plt.scatter(df['Days Old'], df['Sleep Start Time'], alpha=0.3)
plt.title('Sleep Episode Start Times')
plt.xlabel('Days Old')
plt.ylabel('Time of Day')
plt.yticks(range(0, 24), [f'{i:02d}:00' for i in range(0, 24)])

plt.savefig(f'{visualizations_folder}sleep_timing_scatter.png', 
            dpi=300, 
            bbox_inches='tight')
plt.show()

# 3. Weekly sleep pattern stability
weekly_patterns = daily_patterns.groupby('Weeks Old').agg({  # Changed this line
    'number_of_naps': ['mean', 'std'],
    'total_nap_hours': ['mean', 'std'],
    'night_sleep_hours': ['mean', 'std'],
    'sleep_episodes': ['mean', 'std'],
    'avg_episode_length': ['mean', 'std']
}).round(2)

print("\nWeekly Sleep Pattern Statistics:")
print(weekly_patterns)

# 4. Visualize sleep pattern stability
fig, axes = plt.subplots(2, 2, figsize=(15, 12))
fig.suptitle('Weekly Sleep Pattern Stability', fontsize=16)

# Plot 1: Nap consistency
axes[0, 0].errorbar(weekly_patterns.index, 
                    weekly_patterns[('number_of_naps', 'mean')],
                    yerr=weekly_patterns[('number_of_naps', 'std')],
                    fmt='o-')
axes[0, 0].set_title('Number of Naps')
axes[0, 0].set_xlabel('Weeks Old')
axes[0, 0].set_ylabel('Average Naps per Day')

# Plot 2: Nap hours
axes[0, 1].errorbar(weekly_patterns.index, 
                    weekly_patterns[('total_nap_hours', 'mean')],
                    yerr=weekly_patterns[('total_nap_hours', 'std')],
                    fmt='o-')
axes[0, 1].set_title('Daily Nap Hours')
axes[0, 1].set_xlabel('Weeks Old')
axes[0, 1].set_ylabel('Hours')

# Plot 3: Night sleep
axes[1, 0].errorbar(weekly_patterns.index, 
                    weekly_patterns[('night_sleep_hours', 'mean')],
                    yerr=weekly_patterns[('night_sleep_hours', 'std')],
                    fmt='o-')
axes[1, 0].set_title('Night Sleep Hours')
axes[1, 0].set_xlabel('Weeks Old')
axes[1, 0].set_ylabel('Hours')

# Plot 4: Average episode length
axes[1, 1].errorbar(weekly_patterns.index, 
                    weekly_patterns[('avg_episode_length', 'mean')],
                    yerr=weekly_patterns[('avg_episode_length', 'std')],
                    fmt='o-')
axes[1, 1].set_title('Average Sleep Episode Length')
axes[1, 1].set_xlabel('Weeks Old')
axes[1, 1].set_ylabel('Hours')

plt.tight_layout()
plt.savefig(f'{visualizations_folder}sleep_pattern_stability.png', 
            dpi=300, 
            bbox_inches='tight')
plt.show()

# Calculate and print overall consistency metrics
print("\nOverall Sleep Pattern Consistency Metrics:")
print(f"Average number of naps per day: {daily_patterns['number_of_naps'].mean():.2f} ± {daily_patterns['number_of_naps'].std():.2f}")
print(f"Average total nap hours: {daily_patterns['total_nap_hours'].mean():.2f} ± {daily_patterns['total_nap_hours'].std():.2f}")
print(f"Average night sleep hours: {daily_patterns['night_sleep_hours'].mean():.2f} ± {daily_patterns['night_sleep_hours'].std():.2f}")
No description has been provided for this image
No description has been provided for this image
Weekly Sleep Pattern Statistics:
          number_of_naps       total_nap_hours       night_sleep_hours        \
                    mean   std            mean   std              mean   std   
Weeks Old                                                                      
9                   3.33  0.58            5.47  1.92              2.39  2.50   
10                  1.00   NaN            2.00   NaN              0.00   NaN   
11                  0.00   NaN            0.00   NaN              2.92   NaN   
12                  1.00   NaN            0.52   NaN              0.00   NaN   
14                  2.00  1.41            2.93  0.75              2.48  3.51   
15                  4.14  0.90            5.89  1.33              8.16  1.47   
16                  3.43  1.72            4.90  2.59              6.90  2.61   
17                  3.71  1.11            6.03  2.22              7.59  2.83   
18                  4.14  0.38            5.99  1.56              8.32  1.25   
19                  4.29  0.49            6.33  1.89              7.50  2.47   
20                  3.57  0.98            4.87  2.14              9.51  2.50   
21                  3.86  1.21            4.29  1.42             10.62  2.42   
22                  3.57  0.98            5.40  1.26              9.15  2.09   
23                  4.29  0.49            5.65  1.14              8.87  1.77   
24                  4.86  0.90            4.58  1.67              9.77  2.38   
25                  3.71  0.76            4.90  1.47              9.30  2.18   
26                  4.43  0.98            5.04  1.13              9.13  2.03   
27                  4.00  0.58            3.78  1.16              9.65  1.78   
28                  3.57  0.53            3.25  0.37              9.72  1.49   
29                  3.00  2.00            2.54  1.79              7.88  3.59   

          sleep_episodes       avg_episode_length        
                    mean   std               mean   std  
Weeks Old                                                
9                   4.67  1.53               1.61  0.40  
10                  1.00   NaN               2.00   NaN  
11                  2.00   NaN               1.46   NaN  
12                  1.00   NaN               0.52   NaN  
14                  2.50  2.12               2.25  0.21  
15                  6.86  1.35               2.07  0.26  
16                  5.86  2.04               2.03  0.24  
17                  6.86  1.95               2.06  0.45  
18                  6.71  0.49               2.15  0.31  
19                  6.43  0.79               2.17  0.26  
20                  5.57  0.98               2.62  0.43  
21                  6.14  1.77               2.60  0.87  
22                  6.14  1.35               2.43  0.44  
23                  6.14  1.07               2.40  0.48  
24                  7.29  1.38               2.03  0.55  
25                  5.57  1.27               2.66  0.65  
26                  6.43  1.27               2.32  0.82  
27                  6.57  0.53               2.04  0.29  
28                  6.14  0.69               2.14  0.39  
29                  5.75  3.20               2.24  1.08  
No description has been provided for this image
Overall Sleep Pattern Consistency Metrics:
Average number of naps per day: 3.79 ± 1.15
Average total nap hours: 4.83 ± 1.90
Average night sleep hours: 8.33 ± 2.90
In [20]:
# Comparison to Typical Baby Sleep Guidelines

# Define typical sleep guidelines by age (in months)
# Source: American Academy of Pediatrics (AAP) and National Sleep Foundation
sleep_guidelines = pd.DataFrame({
    'Age_Months': [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    'Total_Sleep_Min': [14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 13, 13, 13],  # minimum hours
    'Total_Sleep_Max': [17, 17, 17, 17, 17, 17, 16, 16, 16, 16, 15, 15, 15],  # maximum hours
    'Naps_Typical': [6, 5, 4, 4, 3, 3, 3, 2, 2, 2, 2, 2, 2]  # typical number of naps
})

# Calculate baby's age in months for each day
daily_patterns['Months Old'] = daily_patterns['Days Old'] // 30

# Calculate monthly averages for comparison
monthly_averages = daily_patterns.groupby('Months Old').agg({
    'total_nap_hours': 'mean',
    'night_sleep_hours': 'mean',
    'number_of_naps': 'mean'
}).round(2)

monthly_averages['total_sleep'] = monthly_averages['total_nap_hours'] + monthly_averages['night_sleep_hours']

# Create comparison visualization
plt.figure(figsize=(12, 6))

# Plot guidelines range
plt.fill_between(sleep_guidelines['Age_Months'], 
                 sleep_guidelines['Total_Sleep_Min'],
                 sleep_guidelines['Total_Sleep_Max'],
                 alpha=0.3, 
                 color='gray',
                 label='Recommended Range')

# Plot actual total sleep
plt.plot(monthly_averages.index, 
         monthly_averages['total_sleep'],
         'bo-',
         label='Actual Total Sleep',
         linewidth=2)

plt.title('Total Sleep Hours Compared to Guidelines')
plt.xlabel('Months Old')
plt.ylabel('Hours of Sleep per Day')
plt.legend()
plt.grid(True, alpha=0.3)

plt.savefig(f'{visualizations_folder}sleep_guidelines_comparison.png', 
            dpi=300, 
            bbox_inches='tight')
plt.show()

# Compare number of naps
plt.figure(figsize=(12, 6))
plt.plot(sleep_guidelines['Age_Months'], 
         sleep_guidelines['Naps_Typical'],
         'r--',
         label='Typical Naps',
         linewidth=2)
plt.plot(monthly_averages.index,
         monthly_averages['number_of_naps'],
         'bo-',
         label='Actual Naps',
         linewidth=2)

plt.title('Number of Naps Compared to Typical Patterns')
plt.xlabel('Months Old')
plt.ylabel('Number of Naps per Day')
plt.legend()
plt.grid(True, alpha=0.3)

plt.savefig(f'{visualizations_folder}naps_guidelines_comparison.png', 
            dpi=300, 
            bbox_inches='tight')
plt.show()

# Print detailed comparison
print("\nMonthly Sleep Pattern Comparison to Guidelines:")
print("---------------------------------------------")
for month in monthly_averages.index:
    if month in sleep_guidelines['Age_Months'].values:
        guideline = sleep_guidelines[sleep_guidelines['Age_Months'] == month].iloc[0]
        actual = monthly_averages.loc[month]
        
        status = "WITHIN RANGE"
        if actual['total_sleep'] < guideline['Total_Sleep_Min']:
            status = "BELOW RANGE"
        elif actual['total_sleep'] > guideline['Total_Sleep_Max']:
            status = "ABOVE RANGE"
            
        print(f"\nMonth {month}:")
        print(f"Total Sleep: {actual['total_sleep']:.1f} hours (Guideline: {guideline['Total_Sleep_Min']}-{guideline['Total_Sleep_Max']} hours) - {status}")
        print(f"Number of Naps: {actual['number_of_naps']:.1f} (Typical: {guideline['Naps_Typical']})")
        print(f"Night Sleep: {actual['night_sleep_hours']:.1f} hours")
        print(f"Nap Hours: {actual['total_nap_hours']:.1f} hours")

# Calculate percentage of days within guidelines
def within_guidelines(row):
    month = row['Months Old']
    if month in sleep_guidelines['Age_Months'].values:
        guideline = sleep_guidelines[sleep_guidelines['Age_Months'] == month].iloc[0]
        total_sleep = row['total_nap_hours'] + row['night_sleep_hours']
        return guideline['Total_Sleep_Min'] <= total_sleep <= guideline['Total_Sleep_Max']
    return None

daily_patterns['within_guidelines'] = daily_patterns.apply(within_guidelines, axis=1)
compliance_rate = (daily_patterns['within_guidelines'].sum() / 
                  daily_patterns['within_guidelines'].count() * 100)

print(f"\nOverall Compliance with Sleep Guidelines: {compliance_rate:.1f}%")
No description has been provided for this image
No description has been provided for this image
Monthly Sleep Pattern Comparison to Guidelines:
---------------------------------------------

Month 2:
Total Sleep: 4.8 hours (Guideline: 14-17 hours) - BELOW RANGE
Number of Naps: 2.0 (Typical: 4)
Night Sleep: 1.7 hours
Nap Hours: 3.2 hours

Month 3:
Total Sleep: 12.0 hours (Guideline: 14-17 hours) - BELOW RANGE
Number of Naps: 3.6 (Typical: 4)
Night Sleep: 6.7 hours
Nap Hours: 5.2 hours

Month 4:
Total Sleep: 14.0 hours (Guideline: 14-17 hours) - WITHIN RANGE
Number of Naps: 3.9 (Typical: 3)
Night Sleep: 8.5 hours
Nap Hours: 5.5 hours

Month 5:
Total Sleep: 14.8 hours (Guideline: 14-17 hours) - WITHIN RANGE
Number of Naps: 4.1 (Typical: 3)
Night Sleep: 9.7 hours
Nap Hours: 5.1 hours

Month 6:
Total Sleep: 13.0 hours (Guideline: 14-16 hours) - BELOW RANGE
Number of Naps: 3.9 (Typical: 3)
Night Sleep: 9.1 hours
Nap Hours: 3.9 hours

Overall Compliance with Sleep Guidelines: 35.5%
In [ ]: