diff -Nru sonic-0.1.17/ChangeLog sonic-0.1.18/ChangeLog --- sonic-0.1.17/ChangeLog 2011-04-19 05:52:00.000000000 -0600 +++ sonic-0.1.18/ChangeLog 2011-07-16 10:13:17.000000000 -0600 @@ -1,3 +1,29 @@ +2011-07-16 Bill Cox + + * README, Sonic.java, sonic.c: Updated license to allow easier + inclusion in projects + +2011-07-16 Bill Cox + + * Main.java, README, Sonic.java: Native Java port working + +2011-07-15 Bill Cox + + * README, Sonic.java, sonic.c: Working on Java native port + +2011-07-15 Bill Cox + + * sonic.c: Spelling typo + +2011-07-15 Bill Cox + + * sonic.c: Just fixed a goober on a return value that wasn't a bug, + but was wrong + +2011-05-11 Bill Cox + + * doc/index.md: Just refreshed the web documentation + 2011-04-19 Bill Cox * Makefile: Dont remove Changelog and version in make clean diff -Nru sonic-0.1.17/debian/changelog sonic-0.1.18/debian/changelog --- sonic-0.1.17/debian/changelog 2011-10-23 16:47:06.000000000 -0600 +++ sonic-0.1.18/debian/changelog 2012-09-26 21:58:00.000000000 -0600 @@ -1,3 +1,15 @@ +sonic (0.1.18-0ubuntu1) UNRELEASED; urgency=low + + * Non-maintainer upload with maintainer's approval + * New upstream release (LP: #1057272) + - Native java port + - Refreshed documentation + - sonic.c - typo and return value fixesa + - License updated to allow Sonic.java and sonic.c to be directly included + in a project rather than linking + + -- Matthew Fischer Wed, 26 Sep 2012 21:32:27 -0600 + sonic (0.1.17-1.1) unstable; urgency=low [ Luke Yelavich ] diff -Nru sonic-0.1.17/debian/control sonic-0.1.18/debian/control --- sonic-0.1.17/debian/control 2011-10-23 16:44:24.000000000 -0600 +++ sonic-0.1.18/debian/control 2012-09-26 22:03:01.000000000 -0600 @@ -1,7 +1,8 @@ Source: sonic Section: sound Priority: extra -Maintainer: Bill Cox +Maintainer: Ubuntu Developers +XSBC-Original-Maintainer: Bill Cox Build-Depends: libsndfile1-dev, debhelper (>= 7.0.50~) Standards-Version: 3.9.2 Homepage: http://dev.vinux-project.org/sonic diff -Nru sonic-0.1.17/debian/copyright sonic-0.1.18/debian/copyright --- sonic-0.1.17/debian/copyright 2011-10-23 16:44:24.000000000 -0600 +++ sonic-0.1.18/debian/copyright 2012-09-26 21:34:09.000000000 -0600 @@ -35,6 +35,11 @@ They are main.c, and all sound samples in the samples directory. All other files are LGPL 2.1. + As a special exception, you may add the source code for sonic.c or + Sonic.java to your project, rather than linking against a libsonic or + adding Sonic.jar, but any changes to these two files must be published + according to the LGPL terms. + The Debian packaging is: Copyright (C) 2010 Bill Cox diff -Nru sonic-0.1.17/debian/patches/add-libdir.patch sonic-0.1.18/debian/patches/add-libdir.patch --- sonic-0.1.17/debian/patches/add-libdir.patch 2011-10-23 16:44:24.000000000 -0600 +++ sonic-0.1.18/debian/patches/add-libdir.patch 2012-09-26 21:34:36.000000000 -0600 @@ -3,7 +3,7 @@ --- sonic-0.1.17.orig/Makefile 2011-10-20 14:07:47.620217671 +1100 +++ sonic-0.1.17/Makefile 2011-10-20 14:23:54.464242424 +1100 @@ -7,6 +7,7 @@ - LIB_TAG=0.1.17 + LIB_TAG=0.1.18 CC=gcc PREFIX=/usr +LIBDIR=$(PREFIX)/lib diff -Nru sonic-0.1.17/doc/index.md sonic-0.1.18/doc/index.md --- sonic-0.1.17/doc/index.md 2011-03-05 04:40:11.000000000 -0700 +++ sonic-0.1.18/doc/index.md 2011-06-27 16:25:48.000000000 -0600 @@ -19,6 +19,13 @@ it's optimized for speed ups of over 2X, unlike previous algorithms for changing speech rate. The sonic library is a very simple ANSI C library that is designed to easily be integrated into streaming voice applications, like TTS back ends. +While a very new project, it is already integrated into: + +- espeak +- Debian Sid as package libsonic +- Android Astro Player Nova +- Android Osplayer +- Multiple closed source TTS engines The primary motivation behind sonic is to enable the blind and visually impaired to improve their productivity with open source speech engines, like espeak. @@ -35,20 +42,22 @@ ## Comparison to Other Solutions -Sonic is not like SoundTouch. SoundTouch uses WSOLA, an algorithm optimized for -changing the tempo of music. No WSOLA based program performs well for speech -(contrary to the inventor's estimate of WSOLA). Listen to [this soundstretch -sample](soundstretch.wav), which uses SoundTouch, and compare it to [this sonic -sample](sonic.wav). Both are sped up by 2X. WSOLA introduces unacceptable -levels of distortion, making speech impossible to understand at high speed (over -2.5X) by blind speed listeners. +In short, Sonic is better for speech, while WSOLA is better for music. + +A popular alternative is SoundTouch. SoundTouch uses WSOLA, an algorithm +optimized for changing the tempo of music. No WSOLA based program performs well +for speech (contrary to the inventor's estimate of WSOLA). Listen to [this +soundstretch sample](soundstretch.wav), which uses SoundTouch, and compare +it to [this sonic sample](sonic.wav). Both are sped up by 2X. WSOLA +introduces unacceptable levels of distortion, making speech impossible to +understand at high speed (over 2.5X) by blind speed listeners. However, there are decent open-source algorithms for speeding up speech. They are all in the TD-PSOLA family. For speech rates below 2X, sonic uses PICOLA, which I find to be the best algorithm available. A slightly buggy implementation of PICOLA is available in the spandsp library. I find the one in RockBox quite good, though it's limited to 2X speed up. So far as I know, only -sonic is optimized for speed factors needed by the blind, up to 8X. +sonic is optimized for speed factors needed by the blind, up to 6X. Sonic does all of it's CPU intensive work with integer math, and works well on ARM CPUs without FPUs. It supports multiple channels (stereo), and is also able @@ -59,10 +68,10 @@ ## Using libsonic in your program -Sonic is still a new library, and has not yet been incorporated into Debian or -other major distros. For now, feel free to simply add sonic.c and -sonic.h to your application, but consider switching to -lsonic once the library -is available on your distro. +Sonic is still a new library, but is in Debian Sid. It will take a while +for it to filter out into all the other distros. For now, feel free to simply +add sonic.c and sonic.h to your application, but consider switching to -lsonic +once the library is available on your distro. The file [main.c](main.c) is the source code for the sonic command-line application. It is meant to be useful as example code. Feel free to copy directly from main.c diff -Nru sonic-0.1.17/Main.java sonic-0.1.18/Main.java --- sonic-0.1.17/Main.java 1969-12-31 17:00:00.000000000 -0700 +++ sonic-0.1.18/Main.java 2011-07-16 09:52:45.000000000 -0600 @@ -0,0 +1,82 @@ +/* This file was written by me, Bill Cox in 2011, and placed into the public domain. */ +package sonic; + +import java.io.File; +import java.io.IOException; + +import javax.sound.sampled.AudioFormat; +import javax.sound.sampled.AudioInputStream; +import javax.sound.sampled.AudioSystem; +import javax.sound.sampled.DataLine; +import javax.sound.sampled.LineUnavailableException; +import javax.sound.sampled.SourceDataLine; +import javax.sound.sampled.UnsupportedAudioFileException; + +public class Main { + + // Run sonic. + private static void runSonic( + AudioInputStream audioStream, + SourceDataLine line, + float speed, + float pitch, + float rate, + float volume, + boolean emulateChordPitch, + int quality, + int sampleRate, + int numChannels) throws IOException + { + Sonic sonic = new Sonic(sampleRate, numChannels); + int bufferSize = line.getBufferSize(); + byte inBuffer[] = new byte[bufferSize]; + byte outBuffer[] = new byte[bufferSize]; + int numRead, numWritten; + + sonic.setSpeed(speed); + sonic.setPitch(pitch); + sonic.setRate(rate); + sonic.setVolume(volume); + sonic.setChordPitch(emulateChordPitch); + sonic.setQuality(quality); + do { + numRead = audioStream.read(inBuffer, 0, bufferSize); + if(numRead <= 0) { + sonic.flushStream(); + } else { + sonic.writeBytesToStream(inBuffer, numRead); + } + do { + numWritten = sonic.readBytesFromStream(outBuffer, bufferSize); + if(numWritten > 0) { + line.write(outBuffer, 0, numWritten); + } + } while(numWritten > 0); + } while(numRead > 0); + } + + public static void main( + String[] argv) throws UnsupportedAudioFileException, IOException, LineUnavailableException + { + float speed = 2.0f; + float pitch = 1.0f; + float rate = 1.0f; + float volume = 1.0f; + boolean emulateChordPitch = false; + int quality = 0; + + AudioInputStream stream = AudioSystem.getAudioInputStream(new File("talking.wav")); + AudioFormat format = stream.getFormat(); + int sampleRate = (int)format.getSampleRate(); + int numChannels = format.getChannels(); + SourceDataLine.Info info = new DataLine.Info(SourceDataLine.class, format, + ((int)stream.getFrameLength()*format.getFrameSize())); + SourceDataLine line = (SourceDataLine)AudioSystem.getLine(info); + line.open(stream.getFormat()); + line.start(); + runSonic(stream, line, speed, pitch, rate, volume, emulateChordPitch, quality, + sampleRate, numChannels); + line.drain(); + line.stop(); + } +} diff -Nru sonic-0.1.17/Makefile sonic-0.1.18/Makefile --- sonic-0.1.17/Makefile 2011-04-19 05:51:59.000000000 -0600 +++ sonic-0.1.18/Makefile 2011-07-16 10:13:16.000000000 -0600 @@ -4,7 +4,7 @@ #CFLAGS=-Wall -g -ansi -fPIC -pthread CFLAGS=-Wall -O2 -ansi -fPIC -pthread -LIB_TAG=0.1.17 +LIB_TAG=0.1.18 CC=gcc PREFIX=/usr Binary files /tmp/UVvZHj4B0w/sonic-0.1.17/out.wav and /tmp/lzjiyqpPF4/sonic-0.1.18/out.wav differ diff -Nru sonic-0.1.17/README sonic-0.1.18/README --- sonic-0.1.17/README 2011-04-16 14:49:14.000000000 -0600 +++ sonic-0.1.18/README 2011-07-16 10:06:33.000000000 -0600 @@ -8,10 +8,34 @@ Sonic can also be used by the sighted. For example, Sonic can improve the experience of listening to an audio book on an Android phone. -Sonic is Copyright 2010, Bill Cox, all rights reserved. It is released as open -source under the Lesser Gnu Public License version 2.1. All files except main.c -and all the sound samples are LGPL. main.c and the sound samples in the samples -directory are in the public domain. +A native Java port of Sonic is in Sonic.java. Main.java is a simple example of +how to use Sonic.java. To play with it, you'll need a "talking.wav" file in the +current directory, and you'll want to change the speed, pitch or other +parameters manually in Main.java, in the main method. + +Sonic is Copyright 2010, 2011, Bill Cox, all rights reserved. It is released as +open source under the Lesser Gnu Public License version 2.1. All files except +main.c, Main.java and all the sound samples are LGPL. main.c Main.java and the +sound samples in the samples directory are in the public domain. As a special +exception, you may add the source code for sonic.c or Sonic.java to your +project, rather than linking against a libsonic or adding Sonic.jar, but any +changes to these two files must be published according to the LGPL terms. + +Performance test: + +I sped up a 751958176 byte wav file with sonic (a 9 hour, 28 minute mono audio +file encoded at 16-bit 11.KHz), but with the output writing disabled. The +reported time, running Ubuntu 11.04 on my HP Pavilion dm4 laptop was: + +real 0m50.839s +user 0m47.370s +sys 0m0.620s + +The Java version is not much slower. It reported: + +real 0m52.043s +user 0m51.190s +sys 0m0.310s Author: Bill Cox email: waywardgeek@gmail.com Binary files /tmp/UVvZHj4B0w/sonic-0.1.17/samples/talking.wav and /tmp/lzjiyqpPF4/sonic-0.1.18/samples/talking.wav differ diff -Nru sonic-0.1.17/sonic.c sonic-0.1.18/sonic.c --- sonic-0.1.17/sonic.c 2011-03-04 03:32:50.000000000 -0700 +++ sonic-0.1.18/sonic.c 2011-07-16 10:07:49.000000000 -0600 @@ -16,7 +16,13 @@ You should have received a copy of the GNU Lesser General Public License along with the GNU C Library; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA - 02111-1307 USA. */ + 02111-1307 USA. + + As a special exception, you may add the source code for this file, sonic.c, + to your project, rather than linking against a libsonic or adding Sonic.jar, + but any changes to these two files must be published according to the LGPL + terms. +*/ #include #include @@ -41,8 +47,6 @@ float rate; int oldRatePosition; int newRatePosition; - int oldSampleRate; - int newSampleRate; int useChordPitch; int quality; int numChannels; @@ -58,7 +62,6 @@ int remainingInputToCopy; int sampleRate; int prevPeriod; - int prevMaxDiff; int prevMinDiff; }; @@ -443,7 +446,7 @@ memcpy(stream->outputBuffer + stream->numOutputSamples*stream->numChannels, samples, numSamples*sizeof(short)*stream->numChannels); stream->numOutputSamples += numSamples; - return numSamples; + return 1; } /* Just copy from the input buffer to the output buffer. Return 0 if we fail to @@ -596,7 +599,7 @@ return stream->numOutputSamples; } -/* If skip is greater than one, average skip samples togther and write them to +/* If skip is greater than one, average skip samples together and write them to the down-sample buffer. If numChannels is greater than one, mix the channels together as we down sample. */ static void downSampleInput( @@ -621,7 +624,7 @@ } /* Find the best frequency match in the range, and given a sample skip multiple. - For now, just find the pitch of the first channel. */ + For now, just find the pitch of the first channel. */ static int findPitchPeriodInRange( short *samples, int minPeriod, @@ -662,7 +665,7 @@ } /* At abrupt ends of voiced words, we can have pitch periods that are better - aproximated by the previous pitch period estimate. Try to detect this case. */ + approximated by the previous pitch period estimate. Try to detect this case. */ static int prevPeriodBetter( sonicStream stream, int period, @@ -741,7 +744,6 @@ retPeriod = period; } stream->prevMinDiff = minDiff; - stream->prevMaxDiff = maxDiff; stream->prevPeriod = period; return retPeriod; } @@ -810,7 +812,7 @@ } } -/* Just move the new samples in the output buffer to the pitch bufer */ +/* Just move the new samples in the output buffer to the pitch buffer */ static int moveNewSamplesToPitchBuffer( sonicStream stream, int originalNumOutputSamples) @@ -980,7 +982,7 @@ if(speed >= 2.0f) { newSamples = period/(speed - 1.0f); - } else if(speed > 1.0f) { + } else { newSamples = period; stream->remainingInputToCopy = period*(2.0f - speed)/(speed - 1.0f); } diff -Nru sonic-0.1.17/Sonic.java sonic-0.1.18/Sonic.java --- sonic-0.1.17/Sonic.java 1969-12-31 17:00:00.000000000 -0700 +++ sonic-0.1.18/Sonic.java 2011-07-16 10:08:35.000000000 -0600 @@ -0,0 +1,1013 @@ +/* Sonic library + Copyright 2010, 2011 + Bill Cox + This file is part of the Sonic Library. + + The Sonic Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. + + As a special exception, you may add the source code for this file, Sonic.java, + to your project, rather than linking against a libsonic or adding Sonic.jar, + but any changes to these two files must be published according to the LGPL + terms. +*/ + +package sonic; + +public class Sonic { + + private static final int SONIC_MIN_PITCH = 65; + private static final int SONIC_MAX_PITCH = 400; + /* This is used to down-sample some inputs to improve speed */ + private static final int SONIC_AMDF_FREQ = 4000; + + private short inputBuffer[]; + private short outputBuffer[]; + private short pitchBuffer[]; + private short downSampleBuffer[]; + private float speed; + private float volume; + private float pitch; + private float rate; + private int oldRatePosition; + private int newRatePosition; + private boolean useChordPitch; + private int quality; + private int numChannels; + private int inputBufferSize; + private int pitchBufferSize; + private int outputBufferSize; + private int numInputSamples; + private int numOutputSamples; + private int numPitchSamples; + private int minPeriod; + private int maxPeriod; + private int maxRequired; + private int remainingInputToCopy; + private int sampleRate; + private int prevPeriod; + private int prevMinDiff; + + // Resize the array. + private short[] resize( + short[] oldArray, + int newLength) + { + short[] newArray = new short[newLength]; + int length = oldArray.length <= newLength? oldArray.length : newLength; + + length *= numChannels; + for(int x = 0; x < length; x++) { + newArray[x] = oldArray[x]; + } + return newArray; + } + + // Move samples from one array to another. May move samples down within an array, but not up. + private void move( + short dest[], + int destPos, + short source[], + int sourcePos, + int numSamples) + { + for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { + dest[destPos*numChannels + xSample] = source[sourcePos*numChannels + xSample]; + } + } + + // Scale the samples by the factor. + private void scaleSamples( + short samples[], + int position, + int numSamples, + float volume) + { + int fixedPointVolume = (int)(volume*4096.0f); + int start = position*numChannels; + int stop = start + numSamples*numChannels; + + for(int xSample = start; xSample < stop; xSample++) { + int value = (samples[xSample]*fixedPointVolume) >> 12; + if(value > 32767) { + value = 32767; + } else if(value < -32767) { + value = -32767; + } + samples[xSample] = (short)value; + } + } + + // Get the speed of the stream. + public float getSpeed() + { + return speed; + } + + // Set the speed of the stream. + public void setSpeed( + float speed) + { + this.speed = speed; + } + + // Get the pitch of the stream. + public float getPitch() + { + return pitch; + } + + // Set the pitch of the stream. + public void setPitch( + float pitch) + { + this.pitch = pitch; + } + + // Get the rate of the stream. + public float getRate() + { + return rate; + } + + // Set the playback rate of the stream. This scales pitch and speed at the same time. + public void setRate( + float rate) + { + this.rate = rate; + this.oldRatePosition = 0; + this.newRatePosition = 0; + } + + // Get the vocal chord pitch setting. + public boolean getChordPitch() + { + return useChordPitch; + } + + // Set the vocal chord mode for pitch computation. Default is off. + public void setChordPitch( + boolean useChordPitch) + { + this.useChordPitch = useChordPitch; + } + + // Get the quality setting. + public int getQuality() + { + return quality; + } + + // Set the "quality". Default 0 is virtually as good as 1, but very much faster. + public void setQuality( + int quality) + { + this.quality = quality; + } + + // Get the scaling factor of the stream. + public float getVolume() + { + return volume; + } + + // Set the scaling factor of the stream. + public void setVolume( + float volume) + { + this.volume = volume; + } + + // Allocate stream buffers. + private void allocateStreamBuffers( + int sampleRate, + int numChannels) + { + minPeriod = sampleRate/SONIC_MAX_PITCH; + maxPeriod = sampleRate/SONIC_MIN_PITCH; + maxRequired = 2*maxPeriod; + inputBufferSize = maxRequired; + inputBuffer = new short[maxRequired*numChannels]; + outputBufferSize = maxRequired; + outputBuffer = new short[maxRequired*numChannels]; + pitchBufferSize = maxRequired; + pitchBuffer = new short[maxRequired*numChannels]; + downSampleBuffer = new short[maxRequired]; + this.sampleRate = sampleRate; + this.numChannels = numChannels; + oldRatePosition = 0; + newRatePosition = 0; + prevPeriod = 0; + } + + // Create a sonic stream. + public Sonic( + int sampleRate, + int numChannels) + { + allocateStreamBuffers(sampleRate, numChannels); + speed = 1.0f; + pitch = 1.0f; + volume = 1.0f; + rate = 1.0f; + oldRatePosition = 0; + newRatePosition = 0; + useChordPitch = false; + quality = 0; + } + + // Get the sample rate of the stream. + public int getSampleRate() + { + return sampleRate; + } + + // Set the sample rate of the stream. This will cause samples buffered in the stream to be lost. + public void setSampleRate( + int sampleRate) + { + allocateStreamBuffers(sampleRate, numChannels); + } + + // Get the number of channels. + public int getNumChannels() + { + return numChannels; + } + + // Set the num channels of the stream. This will cause samples buffered in the stream to be lost. + public void setNumChannels( + int numChannels) + { + allocateStreamBuffers(sampleRate, numChannels); + } + + // Enlarge the output buffer if needed. + private void enlargeOutputBufferIfNeeded( + int numSamples) + { + if(numOutputSamples + numSamples > outputBufferSize) { + outputBufferSize += (outputBufferSize >> 1) + numSamples; + outputBuffer = resize(outputBuffer, outputBufferSize); + } + } + + // Enlarge the input buffer if needed. + private void enlargeInputBufferIfNeeded( + int numSamples) + { + if(numInputSamples + numSamples > inputBufferSize) { + inputBufferSize += (inputBufferSize >> 1) + numSamples; + inputBuffer = resize(inputBuffer, inputBufferSize); + } + } + + // Add the input samples to the input buffer. + private void addFloatSamplesToInputBuffer( + float samples[], + int numSamples) + { + if(numSamples == 0) { + return; + } + enlargeInputBufferIfNeeded(numSamples); + int xBuffer = numInputSamples*numChannels; + for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { + inputBuffer[xBuffer++] = (short)(samples[xSample]*32767.0f); + } + numInputSamples += numSamples; + } + + // Add the input samples to the input buffer. + private void addShortSamplesToInputBuffer( + short samples[], + int numSamples) + { + if(numSamples == 0) { + return; + } + enlargeInputBufferIfNeeded(numSamples); + move(inputBuffer, numInputSamples, samples, 0, numSamples); + numInputSamples += numSamples; + } + + // Add the input samples to the input buffer. + private void addUnsignedByteSamplesToInputBuffer( + byte samples[], + int numSamples) + { + short sample; + + enlargeInputBufferIfNeeded(numSamples); + int xBuffer = numInputSamples*numChannels; + for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { + sample = (short)((samples[xSample] & 0xff) - 128); // Convert from unsigned to signed + inputBuffer[xBuffer++] = (short) (sample << 8); + } + numInputSamples += numSamples; + } + + // Add the input samples to the input buffer. They must be 16-bit little-endian encoded in a byte array. + private void addBytesToInputBuffer( + byte inBuffer[], + int numBytes) + { + int numSamples = numBytes/(2*numChannels); + short sample; + + enlargeInputBufferIfNeeded(numSamples); + int xBuffer = numInputSamples*numChannels; + for(int xByte = 0; xByte + 1 < numBytes; xByte += 2) { + sample = (short)((inBuffer[xByte] & 0xff) | (inBuffer[xByte + 1] << 8)); + inputBuffer[xBuffer++] = sample; + } + numInputSamples += numSamples; + } + + // Remove input samples that we have already processed. + private void removeInputSamples( + int position) + { + int remainingSamples = numInputSamples - position; + + move(inputBuffer, 0, inputBuffer, position, remainingSamples); + numInputSamples = remainingSamples; + } + + // Just copy from the array to the output buffer + private void copyToOutput( + short samples[], + int position, + int numSamples) + { + enlargeOutputBufferIfNeeded(numSamples); + move(outputBuffer, numOutputSamples, samples, position, numSamples); + numOutputSamples += numSamples; + } + + // Just copy from the input buffer to the output buffer. Return num samples copied. + private int copyInputToOutput( + int position) + { + int numSamples = remainingInputToCopy; + + if(numSamples > maxRequired) { + numSamples = maxRequired; + } + copyToOutput(inputBuffer, position, numSamples); + remainingInputToCopy -= numSamples; + return numSamples; + } + + // Read data out of the stream. Sometimes no data will be available, and zero + // is returned, which is not an error condition. + public int readFloatFromStream( + float samples[], + int maxSamples) + { + int numSamples = numOutputSamples; + int remainingSamples = 0; + + if(numSamples == 0) { + return 0; + } + if(numSamples > maxSamples) { + remainingSamples = numSamples - maxSamples; + numSamples = maxSamples; + } + for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { + samples[xSample++] = (outputBuffer[xSample])/32767.0f; + } + move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples); + numOutputSamples = remainingSamples; + return numSamples; + } + + // Read short data out of the stream. Sometimes no data will be available, and zero + // is returned, which is not an error condition. + public int readShortFromStream( + short samples[], + int maxSamples) + { + int numSamples = numOutputSamples; + int remainingSamples = 0; + + if(numSamples == 0) { + return 0; + } + if(numSamples > maxSamples) { + remainingSamples = numSamples - maxSamples; + numSamples = maxSamples; + } + move(samples, 0, outputBuffer, 0, numSamples); + move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples); + numOutputSamples = remainingSamples; + return numSamples; + } + + // Read unsigned byte data out of the stream. Sometimes no data will be available, and zero + // is returned, which is not an error condition. + public int readUnsignedByteFromStream( + byte samples[], + int maxSamples) + { + int numSamples = numOutputSamples; + int remainingSamples = 0; + + if(numSamples == 0) { + return 0; + } + if(numSamples > maxSamples) { + remainingSamples = numSamples - maxSamples; + numSamples = maxSamples; + } + for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { + samples[xSample] = (byte)((outputBuffer[xSample] >> 8) + 128); + } + move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples); + numOutputSamples = remainingSamples; + return numSamples; + } + + // Read unsigned byte data out of the stream. Sometimes no data will be available, and zero + // is returned, which is not an error condition. + public int readBytesFromStream( + byte outBuffer[], + int maxBytes) + { + int maxSamples = maxBytes/(2*numChannels); + int numSamples = numOutputSamples; + int remainingSamples = 0; + + if(numSamples == 0 || maxSamples == 0) { + return 0; + } + if(numSamples > maxSamples) { + remainingSamples = numSamples - maxSamples; + numSamples = maxSamples; + } + for(int xSample = 0; xSample < numSamples*numChannels; xSample++) { + short sample = outputBuffer[xSample]; + outBuffer[xSample << 1] = (byte)(sample & 0xff); + outBuffer[(xSample << 1) + 1] = (byte)(sample >> 8); + } + move(outputBuffer, 0, outputBuffer, numSamples, remainingSamples); + numOutputSamples = remainingSamples; + return 2*numSamples*numChannels; + } + + // Force the sonic stream to generate output using whatever data it currently + // has. No extra delay will be added to the output, but flushing in the middle of + // words could introduce distortion. + public void flushStream() + { + int remainingSamples = numInputSamples; + float s = speed/pitch; + float r = rate*pitch; + int expectedOutputSamples = numOutputSamples + (int)((remainingSamples/s + numPitchSamples)/r + 0.5f); + + // Add enough silence to flush both input and pitch buffers. + enlargeInputBufferIfNeeded(remainingSamples + 2*maxRequired); + for(int xSample = 0; xSample < 2*maxRequired*numChannels; xSample++) { + inputBuffer[remainingSamples*numChannels + xSample] = 0; + } + numInputSamples += 2*maxRequired; + writeShortToStream(null, 0); + // Throw away any extra samples we generated due to the silence we added. + if(numOutputSamples > expectedOutputSamples) { + numOutputSamples = expectedOutputSamples; + } + // Empty input and pitch buffers. + numInputSamples = 0; + remainingInputToCopy = 0; + numPitchSamples = 0; + } + + // Return the number of samples in the output buffer + public int samplesAvailable() + { + return numOutputSamples; + } + + // If skip is greater than one, average skip samples together and write them to + // the down-sample buffer. If numChannels is greater than one, mix the channels + // together as we down sample. + private void downSampleInput( + short samples[], + int position, + int skip) + { + int numSamples = maxRequired/skip; + int samplesPerValue = numChannels*skip; + int value; + + position *= numChannels; + for(int i = 0; i < numSamples; i++) { + value = 0; + for(int j = 0; j < samplesPerValue; j++) { + value += samples[position + i*samplesPerValue + j]; + } + value /= samplesPerValue; + downSampleBuffer[i] = (short)value; + } + } + + // Find the best frequency match in the range, and given a sample skip multiple. + // For now, just find the pitch of the first channel. Note that retMinDiff and + // retMaxDiff are Int objects, which the caller will need to create with new. + private int findPitchPeriodInRange( + short samples[], + int position, + int minPeriod, + int maxPeriod, + Integer retMinDiff, + Integer retMaxDiff) + { + int bestPeriod = 0, worstPeriod = 255; + int minDiff = 1, maxDiff = 0; + + position *= numChannels; + for(int period = minPeriod; period <= maxPeriod; period++) { + int diff = 0; + for(int i = 0; i < period; i++) { + short sVal = samples[position + i]; + short pVal = samples[position + period + i]; + diff += sVal >= pVal? sVal - pVal : pVal - sVal; + } + /* Note that the highest number of samples we add into diff will be less + than 256, since we skip samples. Thus, diff is a 24 bit number, and + we can safely multiply by numSamples without overflow */ + if(diff*bestPeriod < minDiff*period) { + minDiff = diff; + bestPeriod = period; + } + if(diff*worstPeriod > maxDiff*period) { + maxDiff = diff; + worstPeriod = period; + } + } + retMinDiff = minDiff/bestPeriod; + retMaxDiff = maxDiff/worstPeriod; + return bestPeriod; + } + + // At abrupt ends of voiced words, we can have pitch periods that are better + // approximated by the previous pitch period estimate. Try to detect this case. + private boolean prevPeriodBetter( + int period, + int minDiff, + int maxDiff, + boolean preferNewPeriod) + { + if(minDiff == 0 || prevPeriod == 0) { + return false; + } + if(preferNewPeriod) { + if(maxDiff > minDiff*3) { + // Got a reasonable match this period + return false; + } + if(minDiff*2 <= prevMinDiff*3) { + // Mismatch is not that much greater this period + return false; + } + } else { + if(minDiff <= prevMinDiff) { + return false; + } + } + return true; + } + + // Find the pitch period. This is a critical step, and we may have to try + // multiple ways to get a good answer. This version uses AMDF. To improve + // speed, we down sample by an integer factor get in the 11KHz range, and then + // do it again with a narrower frequency range without down sampling + private int findPitchPeriod( + short samples[], + int position, + boolean preferNewPeriod) + { + Integer minDiff = new Integer(0); + Integer maxDiff = new Integer(0); + int period, retPeriod; + int skip = 1; + + if(sampleRate > SONIC_AMDF_FREQ && quality == 0) { + skip = sampleRate/SONIC_AMDF_FREQ; + } + if(numChannels == 1 && skip == 1) { + period = findPitchPeriodInRange(samples, position, minPeriod, maxPeriod, minDiff, maxDiff); + } else { + downSampleInput(samples, position, skip); + period = findPitchPeriodInRange(downSampleBuffer, 0, minPeriod/skip, + maxPeriod/skip, minDiff, maxDiff); + if(skip != 1) { + period *= skip; + int minP = period - (skip << 2); + int maxP = period + (skip << 2); + if(minP < minPeriod) { + minP = minPeriod; + } + if(maxP > maxPeriod) { + maxP = maxPeriod; + } + if(numChannels == 1) { + period = findPitchPeriodInRange(samples, position, minP, maxP, minDiff, maxDiff); + } else { + downSampleInput(samples, position, 1); + period = findPitchPeriodInRange(downSampleBuffer, 0, minP, maxP, minDiff, maxDiff); + } + } + } + if(prevPeriodBetter(period, minDiff, maxDiff, preferNewPeriod)) { + retPeriod = prevPeriod; + } else { + retPeriod = period; + } + prevMinDiff = minDiff; + prevPeriod = period; + return retPeriod; + } + + // Overlap two sound segments, ramp the volume of one down, while ramping the + // other one from zero up, and add them, storing the result at the output. + private void overlapAdd( + int numSamples, + int numChannels, + short out[], + int outPos, + short rampDown[], + int rampDownPos, + short rampUp[], + int rampUpPos) + { + for(int i = 0; i < numChannels; i++) { + int o = outPos*numChannels + i; + int u = rampUpPos*numChannels + i; + int d = rampDownPos*numChannels + i; + for(int t = 0; t < numSamples; t++) { + out[o] = (short)((rampDown[d]*(numSamples - t) + rampUp[u]*t)/numSamples); + o += numChannels; + d += numChannels; + u += numChannels; + } + } + } + + // Overlap two sound segments, ramp the volume of one down, while ramping the + // other one from zero up, and add them, storing the result at the output. + private void overlapAddWithSeparation( + int numSamples, + int numChannels, + int separation, + short out[], + int outPos, + short rampDown[], + int rampDownPos, + short rampUp[], + int rampUpPos) + { + for(int i = 0; i < numChannels; i++) { + int o = outPos*numChannels + i; + int u = rampUpPos*numChannels + i; + int d = rampDownPos*numChannels + i; + for(int t = 0; t < numSamples + separation; t++) { + if(t < separation) { + out[o] = (short)(rampDown[d]*(numSamples - t)/numSamples); + d += numChannels; + } else if(t < numSamples) { + out[o] = (short)((rampDown[d]*(numSamples - t) + rampUp[u]*(t - separation))/numSamples); + d += numChannels; + u += numChannels; + } else { + out[o] = (short)(rampUp[u]*(t - separation)/numSamples); + u += numChannels; + } + o += numChannels; + } + } + } + + // Just move the new samples in the output buffer to the pitch buffer + private void moveNewSamplesToPitchBuffer( + int originalNumOutputSamples) + { + int numSamples = numOutputSamples - originalNumOutputSamples; + + if(numPitchSamples + numSamples > pitchBufferSize) { + pitchBufferSize += (pitchBufferSize >> 1) + numSamples; + pitchBuffer = resize(pitchBuffer, pitchBufferSize); + } + move(pitchBuffer, numPitchSamples, outputBuffer, originalNumOutputSamples, numSamples); + numOutputSamples = originalNumOutputSamples; + numPitchSamples += numSamples; + } + + // Remove processed samples from the pitch buffer. + private void removePitchSamples( + int numSamples) + { + if(numSamples == 0) { + return; + } + move(pitchBuffer, 0, pitchBuffer, numSamples, numPitchSamples - numSamples); + numPitchSamples -= numSamples; + } + + // Change the pitch. The latency this introduces could be reduced by looking at + // past samples to determine pitch, rather than future. + private void adjustPitch( + int originalNumOutputSamples) + { + int period, newPeriod, separation; + int position = 0; + + if(numOutputSamples == originalNumOutputSamples) { + return; + } + moveNewSamplesToPitchBuffer(originalNumOutputSamples); + while(numPitchSamples - position >= maxRequired) { + period = findPitchPeriod(pitchBuffer, position, false); + newPeriod = (int)(period/pitch); + enlargeOutputBufferIfNeeded(newPeriod); + if(pitch >= 1.0f) { + overlapAdd(newPeriod, numChannels, outputBuffer, numOutputSamples, pitchBuffer, + position, pitchBuffer, position + period - newPeriod); + } else { + separation = newPeriod - period; + overlapAddWithSeparation(period, numChannels, separation, outputBuffer, numOutputSamples, + pitchBuffer, position, pitchBuffer, position); + } + numOutputSamples += newPeriod; + position += period; + } + removePitchSamples(position); + } + + // Interpolate the new output sample. + private short interpolate( + short in[], + int inPos, + int oldSampleRate, + int newSampleRate) + { + short left = in[inPos*numChannels]; + short right = in[inPos*numChannels + numChannels]; + int position = newRatePosition*oldSampleRate; + int leftPosition = oldRatePosition*newSampleRate; + int rightPosition = (oldRatePosition + 1)*newSampleRate; + int ratio = rightPosition - position; + int width = rightPosition - leftPosition; + + return (short)((ratio*left + (width - ratio)*right)/width); + } + + // Change the rate. + private void adjustRate( + float rate, + int originalNumOutputSamples) + { + int newSampleRate = (int)(sampleRate/rate); + int oldSampleRate = sampleRate; + int position; + + // Set these values to help with the integer math + while(newSampleRate > (1 << 14) || oldSampleRate > (1 << 14)) { + newSampleRate >>= 1; + oldSampleRate >>= 1; + } + if(numOutputSamples == originalNumOutputSamples) { + return; + } + moveNewSamplesToPitchBuffer(originalNumOutputSamples); + // Leave at least one pitch sample in the buffer + for(position = 0; position < numPitchSamples - 1; position++) { + while((oldRatePosition + 1)*newSampleRate > newRatePosition*oldSampleRate) { + enlargeOutputBufferIfNeeded(1); + for(int i = 0; i < numChannels; i++) { + outputBuffer[numOutputSamples*numChannels + i] = interpolate(pitchBuffer, position + i, + oldSampleRate, newSampleRate); + } + newRatePosition++; + numOutputSamples++; + } + oldRatePosition++; + if(oldRatePosition == oldSampleRate) { + oldRatePosition = 0; + if(newRatePosition != newSampleRate) { + System.out.printf("Assertion failed: newRatePosition != newSampleRate\n"); + assert false; + } + newRatePosition = 0; + } + } + removePitchSamples(position); + } + + + // Skip over a pitch period, and copy period/speed samples to the output + private int skipPitchPeriod( + short samples[], + int position, + float speed, + int period) + { + int newSamples; + + if(speed >= 2.0f) { + newSamples = (int)(period/(speed - 1.0f)); + } else { + newSamples = period; + remainingInputToCopy = (int)(period*(2.0f - speed)/(speed - 1.0f)); + } + enlargeOutputBufferIfNeeded(newSamples); + overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples, samples, position, + samples, position + period); + numOutputSamples += newSamples; + return newSamples; + } + + // Insert a pitch period, and determine how much input to copy directly. + private int insertPitchPeriod( + short samples[], + int position, + float speed, + int period) + { + int newSamples; + + if(speed < 0.5f) { + newSamples = (int)(period*speed/(1.0f - speed)); + } else { + newSamples = period; + remainingInputToCopy = (int)(period*(2.0f*speed - 1.0f)/(1.0f - speed)); + } + enlargeOutputBufferIfNeeded(period + newSamples); + move(outputBuffer, numOutputSamples, samples, position, period); + overlapAdd(newSamples, numChannels, outputBuffer, numOutputSamples + period, samples, + position + period, samples, position); + numOutputSamples += period + newSamples; + return newSamples; + } + + // Resample as many pitch periods as we have buffered on the input. Return 0 if + // we fail to resize an input or output buffer. Also scale the output by the volume. + private void changeSpeed( + float speed) + { + int numSamples = numInputSamples; + int position = 0, period, newSamples; + + if(numInputSamples < maxRequired) { + return; + } + do { + if(remainingInputToCopy > 0) { + newSamples = copyInputToOutput(position); + position += newSamples; + } else { + period = findPitchPeriod(inputBuffer, position, true); + if(speed > 1.0) { + newSamples = skipPitchPeriod(inputBuffer, position, speed, period); + position += period + newSamples; + } else { + newSamples = insertPitchPeriod(inputBuffer, position, speed, period); + position += newSamples; + } + } + } while(position + maxRequired <= numSamples); + removeInputSamples(position); + } + + // Resample as many pitch periods as we have buffered on the input. Scale the output by the volume. + private void processStreamInput() + { + int originalNumOutputSamples = numOutputSamples; + float s = speed/pitch; + float r = rate; + + if(!useChordPitch) { + r *= pitch; + } + if(s > 1.00001 || s < 0.99999) { + changeSpeed(s); + } else { + copyToOutput(inputBuffer, 0, numInputSamples); + numInputSamples = 0; + } + if(useChordPitch) { + if(pitch != 1.0f) { + adjustPitch(originalNumOutputSamples); + } + } else if(r != 1.0f) { + adjustRate(r, originalNumOutputSamples); + } + if(volume != 1.0f) { + // Adjust output volume. + scaleSamples(outputBuffer, originalNumOutputSamples, numOutputSamples - originalNumOutputSamples, + volume); + } + } + + // Write floating point data to the input buffer and process it. + public void writeFloatToStream( + float samples[], + int numSamples) + { + addFloatSamplesToInputBuffer(samples, numSamples); + processStreamInput(); + } + + // Write the data to the input stream, and process it. + public void writeShortToStream( + short samples[], + int numSamples) + { + addShortSamplesToInputBuffer(samples, numSamples); + processStreamInput(); + } + + // Simple wrapper around sonicWriteFloatToStream that does the unsigned byte to short + // conversion for you. + public void writeUnsignedByteToStream( + byte samples[], + int numSamples) + { + addUnsignedByteSamplesToInputBuffer(samples, numSamples); + processStreamInput(); + } + + // Simple wrapper around sonicWriteBytesToStream that does the byte to 16-bit LE conversion. + public void writeBytesToStream( + byte inBuffer[], + int numBytes) + { + addBytesToInputBuffer(inBuffer, numBytes); + processStreamInput(); + } + + // This is a non-stream oriented interface to just change the speed of a sound sample + public static int changeFloatSpeed( + float samples[], + int numSamples, + float speed, + float pitch, + float rate, + float volume, + boolean useChordPitch, + int sampleRate, + int numChannels) + { + Sonic stream = new Sonic(sampleRate, numChannels); + + stream.setSpeed(speed); + stream.setPitch(pitch); + stream.setRate(rate); + stream.setVolume(volume); + stream.setChordPitch(useChordPitch); + stream.writeFloatToStream(samples, numSamples); + stream.flushStream(); + numSamples = stream.samplesAvailable(); + stream.readFloatFromStream(samples, numSamples); + return numSamples; + } + + /* This is a non-stream oriented interface to just change the speed of a sound sample */ + public int sonicChangeShortSpeed( + short samples[], + int numSamples, + float speed, + float pitch, + float rate, + float volume, + boolean useChordPitch, + int sampleRate, + int numChannels) + { + Sonic stream = new Sonic(sampleRate, numChannels); + + stream.setSpeed(speed); + stream.setPitch(pitch); + stream.setRate(rate); + stream.setVolume(volume); + stream.setChordPitch(useChordPitch); + stream.writeShortToStream(samples, numSamples); + stream.flushStream(); + numSamples = stream.samplesAvailable(); + stream.readShortFromStream(samples, numSamples); + return numSamples; + } +} diff -Nru sonic-0.1.17/version sonic-0.1.18/version --- sonic-0.1.17/version 2011-04-19 05:52:00.000000000 -0600 +++ sonic-0.1.18/version 2011-07-16 10:13:16.000000000 -0600 @@ -1 +1 @@ -sonic_0.1.17 (git: 1be6c13) +sonic_0.1.18 (git: e720065)