# Silvius microphone client based on Tanel's client.py
__author__ = "dwk"
import argparse
from ws4py.client.threadedclient import WebSocketClient
import threading
import sys
import urllib.request, urllib.parse, urllib.error
import json
reconnect_mode = False
fatal_error = False
[docs]class MyClient(WebSocketClient):
def __init__(
self,
url,
mic=1,
protocols=None,
extensions=None,
heartbeat_freq=None,
byterate=16000,
show_hypotheses=True,
save_adaptation_state_filename=None,
send_adaptation_state_filename=None,
audio_gate=0,
):
super(MyClient, self).__init__(url, protocols, extensions, heartbeat_freq)
self.mic = mic
self.show_hypotheses = show_hypotheses
self.byterate = byterate
self.save_adaptation_state_filename = save_adaptation_state_filename
self.send_adaptation_state_filename = send_adaptation_state_filename
self.chunk = 0
self.audio_gate = audio_gate
[docs] def send_data(self, data):
self.send(data, binary=True)
[docs] def opened(self):
import pyaudio
import audioop
pa = pyaudio.PyAudio()
sample_rate = self.byterate
stream = None
while stream is None:
try:
# try adjusting this if you want fewer network packets
self.chunk = 2048 * 2 * sample_rate // self.byterate
mic = self.mic
if mic == -1:
mic = pa.get_default_input_device_info()["index"]
print("Selecting default mic", file=sys.stderr)
print("Using mic #", mic, file=sys.stderr)
stream = pa.open(
rate=sample_rate,
format=pyaudio.paInt16,
channels=1,
input=True,
input_device_index=mic,
frames_per_buffer=self.chunk,
)
except IOError as e:
if e.errno == -9997 or e.errno == "Invalid sample rate":
new_sample_rate = int(
pa.get_device_info_by_index(mic)["defaultSampleRate"]
)
if sample_rate != new_sample_rate:
sample_rate = new_sample_rate
continue
print("\n", e, file=sys.stderr)
print(
"\nCould not open microphone. Please try a different device.",
file=sys.stderr,
)
global fatal_error
fatal_error = True
sys.exit(0)
def mic_to_ws(): # uses stream
try:
print("\nLISTENING TO MICROPHONE", file=sys.stderr)
last_state = None
while True:
data = stream.read(self.chunk)
if self.audio_gate > 0:
rms = audioop.rms(data, 2)
if rms < self.audio_gate:
data = "\00" * len(data)
# if sample_chan == 2:
# data = audioop.tomono(data, 2, 1, 1)
if sample_rate != self.byterate:
(data, last_state) = audioop.ratecv(
data, 2, 1, sample_rate, self.byterate, last_state
)
self.send_data(data)
except IOError as e:
# usually a broken pipe
print(e)
except AttributeError:
# currently raised when the socket gets closed by main thread
pass
# to voluntarily close the connection, we would use
# self.send_data("")
# self.send("EOS")
try:
self.close()
except IOError:
pass
threading.Thread(target=mic_to_ws).start()
[docs] def received_message(self, m):
response = json.loads(str(m))
# print >> sys.stderr, "RESPONSE:", response
# print >> sys.stderr, "JSON was:", m
if response["status"] == 0:
if "result" in response:
trans = response["result"]["hypotheses"][0]["transcript"]
if response["result"]["final"]:
if self.show_hypotheses:
print("\r%s" % trans.replace("\n", "\\n"), file=sys.stderr)
# print("%s" % trans.replace("\n", "\\n"), flush=True) # final result!
print(m, flush=True)
elif self.show_hypotheses:
print_trans = trans.replace("\n", "\\n")
if len(print_trans) > 80:
print_trans = "... %s" % print_trans[-76:]
print("\r%s" % print_trans, end=" ", file=sys.stderr)
if "adaptation_state" in response:
if self.save_adaptation_state_filename:
print(
"Saving adaptation state to %s"
% self.save_adaptation_state_filename,
file=sys.stderr,
)
with open(self.save_adaptation_state_filename, "w") as f:
f.write(json.dumps(response["adaptation_state"]))
else:
print(
"Received error from server (status %d)" % response["status"],
file=sys.stderr,
)
if "message" in response:
print("Error message:", response["message"], file=sys.stderr)
global reconnect_mode
if reconnect_mode:
import time
print("Sleeping for five seconds before reconnecting", file=sys.stderr)
time.sleep(5)
[docs] def closed(self, code, reason=None):
# print "Websocket closed() called"
# print >> sys.stderr
pass
[docs]def setup():
content_type = "audio/x-raw, layout=(string)interleaved, rate=(int)16000, format=(string)S16LE, channels=(int)1"
path = "client/ws/speech"
parser = argparse.ArgumentParser(description="Microphone client for silvius")
parser.add_argument(
"-s",
"--server",
default="localhost",
dest="server",
help="Speech-recognition server",
)
parser.add_argument("-p", "--port", default="8019", dest="port", help="Server port")
# parser.add_argument('-r', '--rate', default=16000, dest="rate", type=int, help="Rate in bytes/sec at which audio should be sent to the server.")
parser.add_argument(
"-d",
"--device",
default="-1",
dest="device",
type=int,
help="Select a different microphone (give device ID)",
)
parser.add_argument(
"-k",
"--keep-going",
action="store_true",
help="Keep reconnecting to the server after periods of silence",
)
parser.add_argument("--save-adaptation-state", help="Save adaptation state to file")
parser.add_argument(
"--send-adaptation-state", help="Send adaptation state from file"
)
parser.add_argument(
"--content-type",
default=content_type,
help="Use the specified content type (default is " + content_type + ")",
)
parser.add_argument(
"--hypotheses",
default=True,
type=int,
help="Show partial recognition hypotheses (default: 1)",
)
parser.add_argument(
"-g",
"--audio-gate",
default=0,
type=int,
help="Audio-gate level to reduce detections when not talking",
)
args = parser.parse_args()
content_type = args.content_type
print("Content-Type:", content_type, file=sys.stderr)
if args.keep_going:
global reconnect_mode
global fatal_error
reconnect_mode = True
while fatal_error == False:
print("Reconnecting...", file=sys.stderr)
run(args, content_type, path)
else:
run(args, content_type, path)
[docs]def run(args, content_type, path):
uri = "ws://%s:%s/%s?%s" % (
args.server,
args.port,
path,
urllib.parse.urlencode([("content-type", content_type)]),
)
print("Connecting to", uri, file=sys.stderr)
ws = MyClient(
uri,
byterate=16000,
mic=args.device,
show_hypotheses=args.hypotheses,
save_adaptation_state_filename=args.save_adaptation_state,
send_adaptation_state_filename=args.send_adaptation_state,
audio_gate=args.audio_gate,
)
ws.connect()
# result = ws.get_full_hyp()
# print result.encode('utf-8')
ws.run_forever()
[docs]def main():
try:
setup()
except KeyboardInterrupt:
print("\nexiting...", file=sys.stderr)
if __name__ == "__main__":
main()