""" MIDI to Audio Synthesizer ========================= Renders MIDI files to clean WAV audio using sine-wave synthesis with pitch bend support. Produces a clean melody signal suitable for MusicGen melody conditioning. Usage: python midi_to_audio.py input.mid # outputs input_synth.wav python midi_to_audio.py input.mid -o output.wav # custom output path python midi_to_audio.py input.mid --sample-rate 32000 # match MusicGen's 32kHz """ import argparse import os import mido import numpy as np import scipy.io.wavfile as wavfile def midi_note_to_freq(note: int) -> float: """Convert MIDI note number to frequency in Hz.""" return 440.0 * (2.0 ** ((note - 69) / 12.0)) def render_midi(midi_path: str, sample_rate: int = 32000) -> np.ndarray: """Render a MIDI file to audio using sine synthesis with pitch bend.""" mid = mido.MidiFile(midi_path) # Calculate total duration total_seconds = mid.length total_samples = int(total_seconds * sample_rate) + sample_rate # +1s padding audio = np.zeros(total_samples, dtype=np.float64) # Process each track for track in mid.tracks: current_time = 0.0 # in seconds tempo = 500000 # default 120 BPM active_notes = {} # note -> (start_sample, velocity) current_pitch_bend = 0 # in MIDI pitch bend units (-8192 to 8191) pitch_bend_range = 2 # semitones (standard GM default) for msg in track: # Advance time if msg.time > 0: delta_seconds = mido.tick2second(msg.time, mid.ticks_per_beat, tempo) current_time += delta_seconds if msg.type == 'set_tempo': tempo = msg.tempo elif msg.type == 'pitchwheel': current_pitch_bend = msg.pitch elif msg.type == 'note_on' and msg.velocity > 0: sample_pos = int(current_time * sample_rate) active_notes[msg.note] = (sample_pos, msg.velocity, current_pitch_bend) elif msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0): if msg.note in active_notes: start_sample, velocity, start_bend = active_notes.pop(msg.note) end_sample = int(current_time * sample_rate) if end_sample <= start_sample: continue # Average the pitch bend (simplification - uses start bend) bend_semitones = (start_bend / 8192.0) * pitch_bend_range freq = midi_note_to_freq(msg.note + bend_semitones) # Generate tone n_samples = end_sample - start_sample t = np.arange(n_samples) / sample_rate tone = np.sin(2 * np.pi * freq * t) # Apply ADSR envelope envelope = np.ones(n_samples) attack = min(int(0.01 * sample_rate), n_samples) # 10ms attack release = min(int(0.05 * sample_rate), n_samples) # 50ms release if attack > 0: envelope[:attack] = np.linspace(0, 1, attack) if release > 0: envelope[-release:] = np.linspace(1, 0, release) # Scale by velocity amplitude = velocity / 127.0 * 0.5 tone *= envelope * amplitude # Mix into output end_idx = min(start_sample + n_samples, len(audio)) actual_len = end_idx - start_sample audio[start_sample:end_idx] += tone[:actual_len] # Normalize peak = np.max(np.abs(audio)) if peak > 0: audio = audio / peak * 0.9 # Trim trailing silence nonzero = np.nonzero(np.abs(audio) > 0.001)[0] if len(nonzero) > 0: end = min(nonzero[-1] + sample_rate, len(audio)) # +1s tail audio = audio[:end] return audio.astype(np.float32) def main(): parser = argparse.ArgumentParser(description="Render MIDI to clean audio") parser.add_argument("input", help="Input MIDI file") parser.add_argument("-o", "--output", help="Output WAV path") parser.add_argument("--sample-rate", "-sr", type=int, default=32000, help="Sample rate (default: 32000, matches MusicGen)") args = parser.parse_args() print(f"Reading MIDI: {args.input}") mid = mido.MidiFile(args.input) print(f" Duration: {mid.length:.1f}s") print(f" Tracks: {len(mid.tracks)}") # Count notes note_count = sum(1 for track in mid.tracks for msg in track if msg.type == 'note_on' and msg.velocity > 0) print(f" Notes: {note_count}") print(f"Rendering at {args.sample_rate}Hz...") audio = render_midi(args.input, args.sample_rate) print(f" Output duration: {len(audio) / args.sample_rate:.1f}s") if args.output: output_path = args.output else: base = os.path.splitext(args.input)[0] output_path = f"{base}_synth.wav" os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True) wavfile.write(output_path, args.sample_rate, audio) print(f"Saved: {output_path}") if __name__ == "__main__": main()