置いておこう。

portaudioが、マイクからデータが取れたとき、スピーカーに音を流せるとき、にcallbackを呼んでくれる形なので、
・マイクから音をとってくるところ
・スピーカーに音を流すところ
にバッファを持っている。

覚えたてのC++のSTLの明示的cast、queueを使っています。
C++おもろいなー

libjingleが確保してくれたP2Pセッションに音声データを流すときに、
“` network_interface_->SendPacket(const void, size_t);[/code]
ってのを呼ぶんだけど、
queueをconst voidに変換するとこが気持ち悪い。こんなもんなのかなぁ

音声は生のwavを送っているので、遅延は
DirectAudioを使ってたらそこで50msくらい（らしい？）
＋バッファ分の遅延、
＋portaudioの遅延
＋通信の遅延
くらいなのかな。なんか体感かなり遅延してるのは後で調べる。

portaudiomediaengine.cc
[cpp]

include “talk/third_party/mediastreamer/mediastream.h”

include

include “talk/base/logging.h”

include “talk/base/thread.h”

include “talk/session/phone/codec.h”

include “talk/session/phone/portaudiomediaengine.h”

include

include “portaudio.h”

include

using namespace cricket;

static int playbackCallback( const void inputBuffer,
void outputBuffer,
unsigned long framesPerBuffer,
const PaStreamCallbackTimeInfo timeInfo,
PaStreamCallbackFlags statusFlags,
void userData
)
{
PortAudioMediaChannel* channel = static_cast (userData);
if ( inputBuffer!=0 ) {
channel->saveFromMicrophone( static_cast(inputBuffer), framesPerBuffer );
}
channel->pushToSpeaker( static_cast(outputBuffer), framesPerBuffer );
channel->SignalReadFromMicEvent( channel );
return paContinue;
}

PortAudioMediaChannel::PortAudioMediaChannel(PortAudioMediaEngine* eng) :
pt(-1),
audio_stream_(0),
engine(eng)
{
PaStreamParameters inputParameters;
inputParameters.device = Pa_GetDefaultInputDevice(); /* default input device */
if (inputParameters.device == paNoDevice) {
fprintf(stderr,”Error: No default input device.\n”);
//goto done;
}
inputParameters.channelCount = 2; /* stereo input */
inputParameters.sampleFormat = PA_SAMPLE_TYPE;
inputParameters.suggestedLatency = Pa_GetDeviceInfo( inputParameters.device )->defaultLowInputLatency;
inputParameters.hostApiSpecificStreamInfo = 0;

PaStreamParameters outputParameters;
outputParameters.device = Pa_GetDefaultOutputDevice(); /* default output device */
if (outputParameters.device == paNoDevice) {
fprintf(stderr,”Error: No default output device.\n”);
//goto done;
}
outputParameters.channelCount = 2; /* stereo output */
outputParameters.sampleFormat = PA_SAMPLE_TYPE;
outputParameters.suggestedLatency = Pa_GetDeviceInfo( outputParameters.device )->defaultLowOutputLatency;
outputParameters.hostApiSpecificStreamInfo = 0;

SignalReadFromMicEvent.connect(this, &PortAudioMediaChannel::OnReadFromMic);

// Initialize PortAudio
int err = Pa_OpenStream(
&stream_,
&inputParameters,
&outputParameters,
SAMPLE_RATE, // 44100
paFramesPerBufferUnspecified, // FRAMES_PER_BUFFER
paClipOff,
playbackCallback,
this
);
if (err != paNoError)
fprintf(stderr, “Error creating a PortAudio stream: %s\n”, Pa_GetErrorText(err));

}

PortAudioMediaChannel::~PortAudioMediaChannel() {
if (stream_) {
Pa_CloseStream(stream_);
}
}

void PortAudioMediaChannel::SetCodecs(const std::vector &codecs) {
bool first = true;
std::vector::const_iterator i;

for (i = codecs.begin(); i < codecs.end(); i++) {
if (!engine->FindCodec(*i))
continue;
if (first) {
LOG(LS_INFO) < < “Using ” << i->name < < “/” << i->clockrate;
pt = i->id;
first = false;
}
}

if (first) {
// We’re being asked to set an empty list of codecs. This will only happen when
// working with a buggy client; let’s try PCMU.
LOG(LS_WARNING) < < “Received empty list of codces; using PCMU/8000”;
}
}

// マイクから新しいデータがとれたら、リモートに送る
void PortAudioMediaChannel::OnReadFromMic( PortAudioMediaChannel channel )
{
//char buf[max_size];
int size = PORTAUDIO_PACKET_LENGTH;
float buff[PORTAUDIO_PACKET_LENGTH/4];
int len;
talk_base::CritScope cs(&crit_microphone);

int i=0;
for ( i=0; i<size/sizeof(float) && !qu_microphone.empty(); i++ ) {
buff[i] = qu_microphone.front();
qu_microphone.pop();
}
if ( network_interface_ && !mute ) {
const void *buf = static_cast(buff);
network_interface_->SendPacket( buff, i * sizeof(float) );
}
char dateStr [9];
strtime( dateStr );
std::cout < < “[” << dateStr << “][OnReadFromMic]qu_microphone.size(): ” << qu_microphone.size() << std::endl;
}

// 届いたパケットをスピーカーのバッファに送る
void PortAudioMediaChannel::OnPacketReceived( const void data, int len ) {
const float reader = static_cast(data);
talk_base::CritScope cs(&crit_speaker);

LOG(INFO) < < “[PMC]OnPacketReceived data: ” << std::setprecision(3) << reader[0] << “ len: ” << len << “qu_speaker.size(): ” << qu_speaker.size();

for ( int i=0; i<len/4; i++ ) {
qu_speaker.push( *reader );
reader++;
}
}

void PortAudioMediaChannel::SetPlayout(bool playout) {

if (!stream_)
return;

if (play && !playout) {
if ( Pa_IsStreamActive(stream_) ) {
int err = Pa_StopStream(stream_);
if (err != paNoError) {
fprintf(stderr, “Error stopping PortAudio stream: %s\n”, Pa_GetErrorText(err));
LOG(LS_INFO) << “Error stopping PortAudio stream: %s\n”, Pa_GetErrorText(err);
return;
}
}
play = false;
}
else if (!play && playout) {
if ( !Pa_IsStreamActive(stream_) ) {
int err = Pa_StartStream(stream_);
if (err != paNoError) {
fprintf(stderr, “Error starting PortAudio stream: %s\n”, Pa_GetErrorText(err));
LOG(LS_INFO) << “Error starting PortAudio stream: %s\n”, Pa_GetErrorText(err);
return;
}
}
play = true;
}

}

void PortAudioMediaChannel::SetSend(bool send) {
mute_ = !send;
}

int PortAudioMediaChannel::GetOutputLevel() {
return 1;
}

// bufにlen分だけ書き込む => 音が出る
// リモートから届いたのを書き込む
void PortAudioMediaChannel::pushToSpeaker( float* buf, int len ) {
talk_base::CritScope cs(&crit_speaker);
int i=0;

// 遅延が大きかったら古いの消す
while ( qu_speaker.size() > (MAX_SPEAKER_QUEUE_SIZE) ) {
qu_speaker.pop();
}

for( i=0; i < len && qu_speaker.size() > 0; i++ ) {
buf++ = qu_speaker.front();
qu_speaker.pop();
if( NUM_CHANNELS == 2 ){
buf++ = qu_speaker.front();
qu_speaker.pop();
}
}
bool is_empty = false;
for( i=i; i < len; i++ ) {
is_empty = true;
buf++ = SAMPLE_SILENCE;
if( NUM_CHANNELS == 2 ){
buf++ = SAMPLE_SILENCE;
}
}
char dateStr [9];
_strtime( dateStr );
if ( is_empty ) {
std::cout << “[” << dateStr << “][pushToSpeaker]empty!!” << std::endl;
}
std::cout << “[” << dateStr << “][pushToSpeaker]qu_speaker.size(): ” << qu_speaker.size() << std::endl;
}

// マイクから来たのをためとく
void PortAudioMediaChannel::saveFromMicrophone( const float* buf, int len ) {
talk_base::CritScope cs(&crit_microphone);

for( int i=0; i<len; i++ ) {
qu_microphone.push( buf++ );
if( NUM_CHANNELS == 2 ){
qu_microphone.push( buf++ );
}
}
char dateStr [9];
_strtime( dateStr );
std::cout << “[” << dateStr << “][saveFromMicrophone]qu_microphone.size(): ” << qu_microphone.size() << std::endl;
}

[/cpp]

portaudiomediaengine.h
[cpp]
/
* Jingle call example
* Copyright 2004–2005, Google Inc.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
/

// PortAudioMediaEngine is a Linphone implementation of MediaEngine

ifndef TALK_SESSION_PHONE_PORTAUDIOMEDIAENGINE_H_

define TALK_SESSION_PHONE_PORTAUDIOMEDIAENGINE_H_

include “talk/third_party/mediastreamer/mediastream.h”

include “talk/base/asyncsocket.h”

include “talk/base/scoped_ptr.h”

include “talk/session/phone/mediaengine.h”

include “talk/base/criticalsection.h”

include “portaudio.h”

include

// Engine settings

define ENGINE_BUFFER_SIZE 2048

// PortAudio settings

define PA_SAMPLE_TYPE (paFloat32) // 32 bit floating point output

typedef float SAMPLE;

define SAMPLE_RATE (44100)

//#define SAMPLE_RATE (11025)
//#define FRAMES_PER_BUFFER (1024)

define FRAMES_PER_BUFFER (256)

define SAMPLE_SILENCE (0.0f)

define NUM_CHANNELS (2)

//#define NUM_CHANNELS (1)

//#define PORTAUDIO_PACKET_LENGTH (2048)

define PORTAUDIO_PACKET_LENGTH (40960) // 一度に送る音声パケットのサイズ,これなら聞ける

define MAX_ALLOWED_LATENCY (0.5) // 許容する音声の最大遅延[s]

define MAX_SPEAKER_QUEUE_SIZE (MAX_ALLOWED_LATENCY * SAMPLE_RATE * NUM_CHANNELS) // キューの最大サイズ。これを超えたら先頭のは破棄する

ifndef M_PI

define M_PI (3.14159265)

endif

namespace cricket {

define TABLE_SIZE (200)

typedef struct
{
float sine[TABLE_SIZE];
int left_phase;
int right_phase;
char message[20];
}
paTestData;

typedef struct {
std::queue
qu;
}
paTestData2;

class PortAudioMediaEngine;

class PortAudioMediaChannel : public MediaChannel {
public:
PortAudioMediaChannel(PortAudioMediaEngine *eng);
virtual ~PortAudioMediaChannel();

virtual void SetCodecs(const std::vector &codecs);
virtual void OnPacketReceived(const void *data, int len);

virtual void SetPlayout(bool playout);
virtual void SetSend(bool send);

virtual int GetOutputLevel();
bool mute() {return mute_;}

virtual void StartMediaMonitor(VoiceChannel * voice_channel, uint32 cms) {}
virtual void StopMediaMonitor() {}

// portaudio
void pushToSpeaker( float, int );
void saveFromMicrophone( const float, int );
sigslot::signal1 SignalReadFromMicEvent; // ready to read

paTestData data;
std::queue
qu_sine;

paTestData2 data2;
int totalFrames;

protected:
// portaudio
void readBuffer(float*, float**, float*, float*, float*, int);
void writeBuffer(float*, float*, float*, float*, float, int);

private:
PortAudioMediaEngine engine_;
AudioStream audio_stream_;
talk_base::scoped_ptr socket_;
void OnReadFromMic(PortAudioMediaChannel*);

int pt;
bool mute;
bool play_;

talk_base::CriticalSection crit_speaker;
talk_base::CriticalSection crit_microphone;

// portaudio
PaStream* stream_;
std::queue
qu_microphone; // ローカルのマイク用
std::queue qu_speaker; // リモートから届いたのをためておく

};

class PortAudioMediaEngine : public MediaEngine {
public:
PortAudioMediaEngine();
~PortAudioMediaEngine();
virtual bool Init();
virtual void Terminate();

virtual MediaChannel *CreateChannel();

virtual int SetAudioOptions(int options);
virtual int SetSoundDevices(int wave_in_device, int wave_out_device);

virtual float GetCurrentQuality();
virtual int GetInputLevel();

virtual std::vector<codec , std::allocator > codecs() {return codecs_;}
virtual bool FindCodec(const Codec&);

private:
std::vector<codec , std::allocator > codecs_;
};

} // namespace cricket

endif // TALK_SESSION_PHONE_PORTAUDIOMEDIAENGINE_H_

[/cpp]

やりたいのは電話でなく一方向のストリーミングなので、
そろそろ自前のxmppサーバをたてて、
libjingleのxmppclientをいじることになるのかな。
あとspeexコーデックをいれてみたい。

include “talk/third_party/mediastreamer/mediastream.h”

include

include

include “talk/base/logging.h”

include “talk/base/thread.h”

include “talk/session/phone/codec.h”

include “talk/session/phone/portaudiomediaengine.h”

include

include

include

include

include

include “portaudio.h”

include

ifndef TALK_SESSION_PHONE_PORTAUDIOMEDIAENGINE_H_

define TALK_SESSION_PHONE_PORTAUDIOMEDIAENGINE_H_

include “talk/third_party/mediastreamer/mediastream.h”

include “talk/base/asyncsocket.h”

include “talk/base/scoped_ptr.h”

include “talk/session/phone/mediaengine.h”

include “talk/base/criticalsection.h”

include “portaudio.h”

include

include

define ENGINE_BUFFER_SIZE 2048

define PA_SAMPLE_TYPE (paFloat32) // 32 bit floating point output

define SAMPLE_RATE (44100)

define FRAMES_PER_BUFFER (256)

define SAMPLE_SILENCE (0.0f)

define NUM_CHANNELS (2)

define PORTAUDIO_PACKET_LENGTH (40960) // 一度に送る音声パケットのサイズ,これなら聞ける

define MAX_ALLOWED_LATENCY (0.5) // 許容する音声の最大遅延[s]

define MAX_SPEAKER_QUEUE_SIZE (MAX_ALLOWED_LATENCY * SAMPLE_RATE * NUM_CHANNELS) // キューの最大サイズ。これを超えたら先頭のは破棄する

ifndef M_PI

define M_PI (3.14159265)

endif

define TABLE_SIZE (200)

endif // TALK_SESSION_PHONE_PORTAUDIOMEDIAENGINE_H_

Comments