Sprachsteuerung

Lesedauer ~3 Minuten

Die fertige Lösung Jasper übernimmt die Sprachsteuerung aus dem Raspberry Pi bzw. Linux Betriebssystemen. Der „Artikel“ im folgenden sind Aufzeichnungen mit einigen Versuchen die ich selber ausprobiert haben und dient nur als Referenz!

UPDATE: Nach einigen Versuchen bin ich der Meinung, dass die Sprachqualität dem Aufwand der Einrichtung in keiner Weise gerecht wird. Bessere Lösungen sind Systeme wie Watson und Co.

Sound testen

Sound testen (http://www.aonsquared.co.uk/raspi_voice_control)

arecord -d 10 -D plughw:1,0 test.wav
aplay test.wav

Möglichkeiten der Sprachsteuerung

Google API

Das Problem besteht darin, dass Google immer alle gesprochenen Informationen bekommt. Das möchte man nicht wirklich.

Julius

CMUSphinx / pocketsphinx

http://www.moop.org.uk/index.php/2013/08/10/voice-controlled-lights/

http://stackoverflow.com/questions/17778532/raspberrypi-pocketsphinx-ps3eye-error-failed-to-open-audio-device

    sudo apt-get install autoconf libtool automake bison
    sudo apt-get install gstreamer0.10-pulseaudio libao4 libasound2-plugins libgconfmm-2.6-1c2 libglademm-2.4-1c2a libpulse-dev libpulse-mainloop-glib0 libpulse-mainloop-glib0-dbg libpulse0 libpulse0-dbg libsox-fmt-pulse paman paprefs pavucontrol pavumeter pulseaudio pulseaudio-dbg pulseaudio-esound-compat pulseaudio-esound-compat-dbg pulseaudio-module-bluetooth pulseaudio-module-gconf pulseaudio-module-jack pulseaudio-module-lirc pulseaudio-module-lirc-dbg pulseaudio-module-x11 pulseaudio-module-zeroconf pulseaudio-module-zeroconf-dbg pulseaudio-utils oss-compat
    sudo cp -pf /etc/asound.conf /etc/asound.conf.ORIG 
    echo 'pcm.pulse {
        type pulse
    }
    
    ctl.pulse {
        type pulse
    }
    
    pcm.!default {
        type pulse
    }
    
    ctl.!default {
        type pulse
    }' | sudo tee /etc/asound.conf
    
    
    #Change default sound driver from alsa to pulseaudio
    #######################################################
    sudo cp -fvp /etc/libao.conf /etc/libao.conf.ORIG
    sudo sed -i "s,default_driver=alsa,default_driver=pulse,g" /etc/libao.conf 
    
    # daemon settings according to Pi-Musicbox ( https://github.com/woutervanwijk/Pi-MusicBox )
    sudo cp -fvp /etc/pulse/daemon.conf /etc/pulse/daemon.conf.ORIG
    
    echo "
    # ScarlettPi added this
    high-priority = yes
    nice-level = 5
    exit-idle-time = -1
    resample-method = src-sinc-medium-quality
    default-sample-format = s16le
    default-sample-rate = 48000
    default-sample-channels = 2" | sudo tee -a /etc/pulse/daemon.conf
    
    # Add pi user to the pulse access group
    #######################################################
    sudo adduser pi pulse-access
    sudo shutdown -r now

    mkdir cmusphinx
    cd cmusphinx
    wget sphinxbase-0.8 und pocketsphinx-0.8 (<a href="http://cmusphinx.sourceforge.net/wiki/download" target="_blank" rel="noopener noreferrer">CMUSphinx Download Seite</a>)
    tar -xvzf pocketsphinx-0.8.tar.gz
    tar -xvzf sphinxbase-0.8.tar.gz
    
    cd sphinxbase-0.8
    ./autogen.sh
    ./configure
    make
    sudo make install
    cd -
    
    cd pocketsphinx-0.8
    export LD_LIBRARY_PATH=/usr/local/lib
    export PKG_CONFIG_PATH=/usr/local/lib/pkgconfig
    ./configure
    make
    sudo make install

Test

pocketsphinx_continuous

http://www.speech.cs.cmu.edu/tools/lmtool.html

    $ cat voice.txt
    ALFRED LIGHT ONE ON
    ALFRED TV ON
    ALFRED TV OFF
    ALFRED SLEEP
    JAMES HELLO
    ALFRED HELLO
    $ wget YOUR FILE
    $ tar -xvzf TAR1354.tar.gz
    1354.corpus
    1354.dic
    1354.lm
    1354.sent
    1354.sent.arpabo
    1354.token
    1354.vocab
    $ pocketsphinx_continuous -lm 1354.lm -dict 1354.dic  -silprob  0.1 -wip 1e-4 -bestpath 0
    READY....
    Listening...
    Stopped listening, please wait...
    000000000: ALFRED LIGHT ONE ON
    READY....

## pocketsphinx_continous zu alfred

    mkdir alfred
    cd alfred
    cp ./pocketsphinx-0.8/src/programs/continuous.c alfred.c
    gcc -g -O2 -Wall -I/usr/local/include/sphinxbase -I/usr/local/include/pocketsphinx -lsphinxad -lsphinxbase -lpocketsphinx alfred.c -o alfred
    
    ./alfred -lm ../1354.lm -dict ../1354.dic -silprob 0.1 -wip 1e-4 -bestpath 0
    
    static void
    sayfestival(const char* text)
    {
    	// No command processor
    	if (system(NULL) == 0)
    		return;
    	
    	printf("%sn", text);
    	
    	const char* festivalcmdstart = "echo '";
    	const char* festivalcmdend = "' | festival -b --tts";
    	char* command = malloc(strlen(festivalcmdstart) + strlen(text) + strlen(festivalcmdend) + 2);
    	if (command)
        {	
    		if (sprintf(command, "%s %s %s", festivalcmdstart, text, festivalcmdend) > 0)
            {
    			system(command);
    		}
    		free(command);
    	}
    }
    	
    static void
    fhemparse(const char* hyp)
    {	
    	if (!hyp)
    		return;
    
    	char word[256];
    	
    	sscanf(hyp, "%s", word);
    	if (strcmp(word, "ALFRED") != 0) {
    		printf("Not for alfred :(n");
    		return;
    	}
    	
    	if (strcmp(hyp, "ALFRED TV ON") == 0) {
    		sayfestival("Turning TV on!");
    	}
    }

    READY....
    Listening...
    Stopped listening, please wait...
    TEXT:000000000:HUGO HELLO
    Not for alfred :(
    READY....
    Listening...
    Stopped listening, please wait...
    TEXT:000000001:ALFRED TV ON
    Turning TV on!

Julius

http://julius.sourceforge.jp/en_index.php?q=index-en.html

http://www.raspiprojects.com/raspberry-pi-voice-recognition-julius.html

    sudo apt-get install alsa-tools alsa-oss flex zlib1g-dev libc-bin libc-dev-bin python-pexpect libasound2 libasound2-dev cvs
    cvs -z3 -d:pserver:anonymous@cvs.sourceforge.jp:/cvsroot/julius co julius4
    export CFLAGS="-O2 -mcpu=arm1176jzf-s -mfpu=vfp -mfloat-abi=hard -pipe -fomit-frame-pointer"
    cd julius4
    ./configure --with-mictype=alsa
    make
    sudo make install

    export ALSADEV="plughw:1,0"

    julius --help

http://bloc.eurion.net/archives/2008/writing-a-command-and-control-application-with-voice-recognition/ VOCA UND GRAMMAR FILES http://www.voxforge.org/home/dev/acousticmodels/linux/adapt/htkjulius/live-testing

    mkdfa.pl alfred
    alfred.grammar has 1 rules
    alfred.voca    has 4 categories and 11 words
    ---
    Now parsing grammar file
    Now modifying grammar to minimize states[-1]
    Now parsing vocabulary file
    Now making nondeterministic finite automaton[5/5]
    Now making deterministic finite automaton[5/5]
    Now making triplet list[5/5]
    4 categories, 5 nodes, 4 arcs
    -> minimized: 5 nodes, 4 arcs
    ---
    generated: alfred.dfa alfred.term alfred.dict

    julius -quiet -input mic -C julian.jconf 2>/dev/null | ./command.py