142 lines
5.2 KiB
Python
142 lines
5.2 KiB
Python
"""
|
|
MIDI to Audio Synthesizer
|
|
=========================
|
|
Renders MIDI files to clean WAV audio using sine-wave synthesis with pitch bend support.
|
|
Produces a clean melody signal suitable for MusicGen melody conditioning.
|
|
|
|
Usage:
|
|
python midi_to_audio.py input.mid # outputs input_synth.wav
|
|
python midi_to_audio.py input.mid -o output.wav # custom output path
|
|
python midi_to_audio.py input.mid --sample-rate 32000 # match MusicGen's 32kHz
|
|
"""
|
|
|
|
import argparse
|
|
import os
|
|
|
|
import mido
|
|
import numpy as np
|
|
import scipy.io.wavfile as wavfile
|
|
|
|
|
|
def midi_note_to_freq(note: int) -> float:
|
|
"""Convert MIDI note number to frequency in Hz."""
|
|
return 440.0 * (2.0 ** ((note - 69) / 12.0))
|
|
|
|
|
|
def render_midi(midi_path: str, sample_rate: int = 32000) -> np.ndarray:
|
|
"""Render a MIDI file to audio using sine synthesis with pitch bend."""
|
|
mid = mido.MidiFile(midi_path)
|
|
|
|
# Calculate total duration
|
|
total_seconds = mid.length
|
|
total_samples = int(total_seconds * sample_rate) + sample_rate # +1s padding
|
|
audio = np.zeros(total_samples, dtype=np.float64)
|
|
|
|
# Process each track
|
|
for track in mid.tracks:
|
|
current_time = 0.0 # in seconds
|
|
tempo = 500000 # default 120 BPM
|
|
active_notes = {} # note -> (start_sample, velocity)
|
|
current_pitch_bend = 0 # in MIDI pitch bend units (-8192 to 8191)
|
|
pitch_bend_range = 2 # semitones (standard GM default)
|
|
|
|
for msg in track:
|
|
# Advance time
|
|
if msg.time > 0:
|
|
delta_seconds = mido.tick2second(msg.time, mid.ticks_per_beat, tempo)
|
|
current_time += delta_seconds
|
|
|
|
if msg.type == 'set_tempo':
|
|
tempo = msg.tempo
|
|
|
|
elif msg.type == 'pitchwheel':
|
|
current_pitch_bend = msg.pitch
|
|
|
|
elif msg.type == 'note_on' and msg.velocity > 0:
|
|
sample_pos = int(current_time * sample_rate)
|
|
active_notes[msg.note] = (sample_pos, msg.velocity, current_pitch_bend)
|
|
|
|
elif msg.type == 'note_off' or (msg.type == 'note_on' and msg.velocity == 0):
|
|
if msg.note in active_notes:
|
|
start_sample, velocity, start_bend = active_notes.pop(msg.note)
|
|
end_sample = int(current_time * sample_rate)
|
|
|
|
if end_sample <= start_sample:
|
|
continue
|
|
|
|
# Average the pitch bend (simplification - uses start bend)
|
|
bend_semitones = (start_bend / 8192.0) * pitch_bend_range
|
|
freq = midi_note_to_freq(msg.note + bend_semitones)
|
|
|
|
# Generate tone
|
|
n_samples = end_sample - start_sample
|
|
t = np.arange(n_samples) / sample_rate
|
|
tone = np.sin(2 * np.pi * freq * t)
|
|
|
|
# Apply ADSR envelope
|
|
envelope = np.ones(n_samples)
|
|
attack = min(int(0.01 * sample_rate), n_samples) # 10ms attack
|
|
release = min(int(0.05 * sample_rate), n_samples) # 50ms release
|
|
if attack > 0:
|
|
envelope[:attack] = np.linspace(0, 1, attack)
|
|
if release > 0:
|
|
envelope[-release:] = np.linspace(1, 0, release)
|
|
|
|
# Scale by velocity
|
|
amplitude = velocity / 127.0 * 0.5
|
|
tone *= envelope * amplitude
|
|
|
|
# Mix into output
|
|
end_idx = min(start_sample + n_samples, len(audio))
|
|
actual_len = end_idx - start_sample
|
|
audio[start_sample:end_idx] += tone[:actual_len]
|
|
|
|
# Normalize
|
|
peak = np.max(np.abs(audio))
|
|
if peak > 0:
|
|
audio = audio / peak * 0.9
|
|
|
|
# Trim trailing silence
|
|
nonzero = np.nonzero(np.abs(audio) > 0.001)[0]
|
|
if len(nonzero) > 0:
|
|
end = min(nonzero[-1] + sample_rate, len(audio)) # +1s tail
|
|
audio = audio[:end]
|
|
|
|
return audio.astype(np.float32)
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Render MIDI to clean audio")
|
|
parser.add_argument("input", help="Input MIDI file")
|
|
parser.add_argument("-o", "--output", help="Output WAV path")
|
|
parser.add_argument("--sample-rate", "-sr", type=int, default=32000,
|
|
help="Sample rate (default: 32000, matches MusicGen)")
|
|
args = parser.parse_args()
|
|
|
|
print(f"Reading MIDI: {args.input}")
|
|
mid = mido.MidiFile(args.input)
|
|
print(f" Duration: {mid.length:.1f}s")
|
|
print(f" Tracks: {len(mid.tracks)}")
|
|
|
|
# Count notes
|
|
note_count = sum(1 for track in mid.tracks for msg in track
|
|
if msg.type == 'note_on' and msg.velocity > 0)
|
|
print(f" Notes: {note_count}")
|
|
|
|
print(f"Rendering at {args.sample_rate}Hz...")
|
|
audio = render_midi(args.input, args.sample_rate)
|
|
print(f" Output duration: {len(audio) / args.sample_rate:.1f}s")
|
|
|
|
if args.output:
|
|
output_path = args.output
|
|
else:
|
|
base = os.path.splitext(args.input)[0]
|
|
output_path = f"{base}_synth.wav"
|
|
|
|
os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True)
|
|
wavfile.write(output_path, args.sample_rate, audio)
|
|
print(f"Saved: {output_path}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|