sonosketch/archive/midi_to_audio.py
John Lightner 262ee6f7d1 chore(01-01): archive experimental scripts
- Move midi_to_audio.py to archive/
- Move musicgen_melody.py to archive/
2026-04-11 02:10:59 -05:00

142 lines
5.2 KiB
Python

"""
MIDI to Audio Synthesizer
=========================
Renders MIDI files to clean WAV audio using sine-wave synthesis with pitch bend support.
Produces a clean melody signal suitable for MusicGen melody conditioning.
Usage:
python midi_to_audio.py input.mid # outputs input_synth.wav
python midi_to_audio.py input.mid -o output.wav # custom output path
python midi_to_audio.py input.mid --sample-rate 32000 # match MusicGen's 32kHz
"""
import argparse
import os
import mido
import numpy as np
import scipy.io.wavfile as wavfile
def midi_note_to_freq(note: int) -> float:
"""Convert MIDI note number to frequency in Hz."""
return 440.0 * (2.0 ** ((note - 69) / 12.0))
def render_midi(midi_path: str, sample_rate: int = 32000) -> np.ndarray:
"""Render a MIDI file to audio using sine synthesis with pitch bend."""
mid = mido.MidiFile(midi_path)
# Calculate total duration
total_seconds = mid.length
total_samples = int(total_seconds * sample_rate) + sample_rate # +1s padding
audio = np.zeros(total_samples, dtype=np.float64)
# Process each track
for track in mid.tracks:
current_time = 0.0 # in seconds
tempo = 500000 # default 120 BPM
active_notes = {} # note -> (start_sample, velocity)
current_pitch_bend = 0 # in MIDI pitch bend units (-8192 to 8191)
pitch_bend_range = 2 # semitones (standard GM default)
for msg in track:
# Advance time
if msg.time > 0:
delta_seconds = mido.tick2second(msg.time, mid.ticks_per_beat, tempo)
current_time += delta_seconds
if msg.type == 'set_tempo':
tempo = msg.tempo
elif msg.type == 'pitchwheel':
current_pitch_bend = msg.pitch
elif msg.type == 'note_on' and msg.velocity > 0:
sample_pos = int(current_time * sample_rate)
active_notes[msg.note] = (sample_pos, msg.velocity, current_pitch_bend)
elif msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0):
if msg.note in active_notes:
start_sample, velocity, start_bend = active_notes.pop(msg.note)
end_sample = int(current_time * sample_rate)
if end_sample <= start_sample:
continue
# Average the pitch bend (simplification - uses start bend)
bend_semitones = (start_bend / 8192.0) * pitch_bend_range
freq = midi_note_to_freq(msg.note + bend_semitones)
# Generate tone
n_samples = end_sample - start_sample
t = np.arange(n_samples) / sample_rate
tone = np.sin(2 * np.pi * freq * t)
# Apply ADSR envelope
envelope = np.ones(n_samples)
attack = min(int(0.01 * sample_rate), n_samples) # 10ms attack
release = min(int(0.05 * sample_rate), n_samples) # 50ms release
if attack > 0:
envelope[:attack] = np.linspace(0, 1, attack)
if release > 0:
envelope[-release:] = np.linspace(1, 0, release)
# Scale by velocity
amplitude = velocity / 127.0 * 0.5
tone *= envelope * amplitude
# Mix into output
end_idx = min(start_sample + n_samples, len(audio))
actual_len = end_idx - start_sample
audio[start_sample:end_idx] += tone[:actual_len]
# Normalize
peak = np.max(np.abs(audio))
if peak > 0:
audio = audio / peak * 0.9
# Trim trailing silence
nonzero = np.nonzero(np.abs(audio) > 0.001)[0]
if len(nonzero) > 0:
end = min(nonzero[-1] + sample_rate, len(audio)) # +1s tail
audio = audio[:end]
return audio.astype(np.float32)
def main():
parser = argparse.ArgumentParser(description="Render MIDI to clean audio")
parser.add_argument("input", help="Input MIDI file")
parser.add_argument("-o", "--output", help="Output WAV path")
parser.add_argument("--sample-rate", "-sr", type=int, default=32000,
help="Sample rate (default: 32000, matches MusicGen)")
args = parser.parse_args()
print(f"Reading MIDI: {args.input}")
mid = mido.MidiFile(args.input)
print(f" Duration: {mid.length:.1f}s")
print(f" Tracks: {len(mid.tracks)}")
# Count notes
note_count = sum(1 for track in mid.tracks for msg in track
if msg.type == 'note_on' and msg.velocity > 0)
print(f" Notes: {note_count}")
print(f"Rendering at {args.sample_rate}Hz...")
audio = render_midi(args.input, args.sample_rate)
print(f" Output duration: {len(audio) / args.sample_rate:.1f}s")
if args.output:
output_path = args.output
else:
base = os.path.splitext(args.input)[0]
output_path = f"{base}_synth.wav"
os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True)
wavfile.write(output_path, args.sample_rate, audio)
print(f"Saved: {output_path}")
if __name__ == "__main__":
main()