import librosa import librosa.display import numpy as np import matplotlib.pyplot as plt def process_child_speech(file_path): # 1. Load the audio file # We use sr=16000 because most speech models expect 16kHz audio, sample_rate = librosa.load

import librosa

import librosa.display

import numpy as np

import matplotlib.pyplot as plt

def process_child_speech(file_path):

\# 1. Load the audio file

\# We use sr=16000 because most speech models expect 16kHz

audio, sample_rate = librosa.load(file_path, sr=16000)



\# 2. Extract MFCCs

\# These represent the 'phones' (sounds) the child is making

mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=13)



\# 3. Calculate the mean (average) of the MFCCs

\# This gives us a single 'summary' vector for the whole clip

mfccs_processed = np.mean(mfccs.T, axis=0)



return mfccs, mfccs_processed

# — Visualization (To see what the AI ‘sees’) —

def plot_speech_features(mfccs):

plt.figure(figsize=(10, 4))

librosa.display.specshow(mfccs, x_axis='time')

plt.colorbar()

plt.title('MFCC: The Sound Fingerprint')

plt.tight_layout()

plt.show()

# Example Usage:

# replace ‘child_voice.wav’ with a file from your training data

# mfccs, processed_data = process_child_speech(‘child_voice.wav’)

# plot_speech_features(mfccs)