本文档旨在介绍将未压缩的 PCM 音频文件转为WAV文件,只需要在 PCM 文件的起始位置加上44个字节的 WAV 头信息即可。
所有 WAV 文件都有一个文件头,是 WAV 文件的开始。用于提供关于文件类型、采样率、样本大小和文件位大小的规格,以及其总长度。
WAV 格式介绍:
WAV 文件的头部长度为 44 字节,格式如下:
PCM转WAV格式的 Kotlin 实现
package cn.itmob.audio.process
object AudioUtil {
//采用频率: 16K
internal val AUDIO_SAMPLE_RATE = 16000
//声道: 单声道
internal val AUDIO_CHANNEL: Int = AudioFormat.CHANNEL_IN_MONO
//编码: PCM
internal val AUDIO_ENCODING: Int = AudioFormat.ENCODING_PCM_16BIT
//位宽/采样位数
internal val BIT_PER_SAMPLE: Int = 16
//声道数
internal var CHANNEL_COUNT = 1
/**
* Convert pcm file to wav file
*
* @param inFilename pcm file
* @param outFilename wav file
*/
fun pcmToWav(context: Context, fcmPath: String, wavPath: String): String {
val wavOutputFile = File(wavPath)
val totalAudioLen: Long
val bufferSize = AudioRecord.getMinBufferSize(AUDIO_SAMPLE_RATE, AUDIO_CHANNEL, AUDIO_ENCODING);
val data = ByteArray(bufferSize)
try {
val input = FileInputStream(File(fcmPath))
val output = FileOutputStream(wavOutputFile)
totalAudioLen = input.channel.size()
writeWaveFileHeader(output, totalAudioLen)
while (input.read(data) != -1) {
output.write(data)
}
input.close()
output.close()
} catch (e: IOException) {
e.printStackTrace()
}
return wavOutputFile.absolutePath
}
/**
* Add WAV header
*/
private fun writeWaveFileHeader(
out: FileOutputStream,
totalAudioLen: Long,
) {
val header = getWavFileHeader(totalAudioLen)
out.write(header, 0, 44)
}
private fun getWavFileHeader(totalAudioLen: Long): ByteArray {
val totalDataLen = totalAudioLen + 36 //44-8(RIFF+dadasize(4个字节))
val longSampleRate = AUDIO_SAMPLE_RATE.toLong()
val channels = CHANNEL_COUNT
val byteRate = (BIT_PER_SAMPLE * AUDIO_SAMPLE_RATE * channels / 8).toLong()
val header = ByteArray(44)
header[0] = 'R'.code.toByte() // RIFF/WAVE header
header[1] = 'I'.code.toByte()
header[2] = 'F'.code.toByte()
header[3] = 'F'.code.toByte()
header[4] = (totalDataLen and 0xff).toByte()
header[5] = (totalDataLen shr 8 and 0xff).toByte()
header[6] = (totalDataLen shr 16 and 0xff).toByte()
header[7] = (totalDataLen shr 24 and 0xff).toByte()
header[8] = 'W'.code.toByte() //WAVE
header[9] = 'A'.code.toByte()
header[10] = 'V'.code.toByte()
header[11] = 'E'.code.toByte()
header[12] = 'f'.code.toByte() // 'fmt ' chunk
header[13] = 'm'.code.toByte()
header[14] = 't'.code.toByte()
header[15] = ' '.code.toByte()
header[16] = 16 // 4 bytes: size of 'fmt ' chunk
header[17] = 0
header[18] = 0
header[19] = 0
header[20] = 1 // format = 1
header[21] = 0
header[22] = channels.toByte()
header[23] = 0
header[24] = (longSampleRate and 0xffL).toByte()
header[25] = (longSampleRate shr 8 and 0xffL).toByte()
header[26] = (longSampleRate shr 16 and 0xffL).toByte()
header[27] = (longSampleRate shr 24 and 0xffL).toByte()
header[28] = (byteRate and 0xffL).toByte()
header[29] = (byteRate shr 8 and 0xffL).toByte()
header[30] = (byteRate shr 16 and 0xffL).toByte()
header[31] = (byteRate shr 24 and 0xffL).toByte()
header[32] = (channels * BIT_PER_SAMPLE / 8).toByte()
header[33] = 0
header[34] = BIT_PER_SAMPLE // bits per sample
header[35] = 0
header[36] = 'd'.code.toByte() //data
header[37] = 'a'.code.toByte()
header[38] = 't'.code.toByte()
header[39] = 'a'.code.toByte()
header[40] = (totalAudioLen and 0xffL).toByte()
header[41] = (totalAudioLen shr 8 and 0xffL).toByte()
header[42] = (totalAudioLen shr 16 and 0xffL).toByte()
header[43] = (totalAudioLen shr 24 and 0xffL).toByte()
return header
}