这年头说实在有了peering coding之后写这些都是笑话。今天算记一个互动类程序的常识吧。简言之程序里面一定要带多线程这是一个基本思路。以下代码由可可爱爱花了钱的opus 4.7写的flask本地小服务端转送语音请求去googleapis的speech-to-text云上服务。 Bridge HTTP service: emulates the ovos-stt-plugin-server endpoint and forwards the audio to Google Cloud Speech-to-Text REST API using an API key (AIza...). OVOS side: POST /stt multipart/form-data audio wav file lang en-US (optional) - 200 text/plain transcription Google side: POST https://speech.googleapis.com/v1/speech:recognize?keyAPI_KEY import base64 import io import logging import os import wave import requests from flask import Flask, request from requests.adapters import HTTPAdapter from urllib3.util.retry import Retry API_KEY os.environ.get(STT_PW, ).strip() DEFAULT_LANG os.environ.get(STT_LANG, en-US) GOOGLE_URL https://speech.googleapis.com/v1/speech:recognize app Flask(__name__) logging.basicConfig(levellogging.INFO, format%(asctime)s %(levelname)s %(message)s) log app.logger # Reuse TCP/TLS to Google auto-retry on transient failures. # urllib3 Retry on POST also retries on connection errors (e.g. RemoteDisconnected). _session requests.Session() _session.mount( https://, HTTPAdapter(max_retriesRetry( total1, connect1, read0, backoff_factor0.5, status_forcelist(500, 502, 503, 504), allowed_methodsfrozenset([POST]), respect_retry_after_headerTrue, )), ) def _wav_to_pcm(wav_bytes: bytes): Return (raw_pcm_bytes, sample_rate, channels). If not a valid WAV, assume raw 16-bit mono 16 kHz PCM (what OVOS records by default). try: with wave.open(io.BytesIO(wav_bytes), rb) as w: sr w.getframerate() ch w.getnchannels() sw w.getsampwidth() frames w.readframes(w.getnframes()) if sw ! 2: raise ValueError(funsupported sample width {sw}) return frames, sr, ch except (wave.Error, EOFError, ValueError) as e: log.warning(Not a valid WAV (%s) — treating as raw 16k mono PCM, e) return wav_bytes, 16000, 1 app.route(/stt, methods[POST]) def stt(): if not API_KEY: return STT_PW not set on server, 500 if audio in request.files: audio_bytes request.files[audio].read() else: audio_bytes request.get_data() or b if not audio_bytes: return no audio, 400 lang request.form.get(lang) or request.args.get(lang) or DEFAULT_LANG pcm, sr, ch _wav_to_pcm(audio_bytes) payload { config: { encoding: LINEAR16, sampleRateHertz: sr, audioChannelCount: ch, languageCode: lang, enableAutomaticPunctuation: True, }, audio: {content: base64.b64encode(pcm).decode(ascii)}, } import time as _time t0 _time.monotonic() log.info(recv lang%s sr%s ch%s bytes%d, lang, sr, ch, len(pcm)) try: r _session.post( GOOGLE_URL, params{key: API_KEY}, jsonpayload, timeout(5, 15), # (connect, read) — fail fast so OVOS gets an error within its 30s window ) except requests.RequestException as e: log.error(Google request failed after %.1fs: %s, _time.monotonic() - t0, e) return fgoogle request failed: {e}, 502 if r.status_code ! 200: log.error(Google error %s: %s, r.status_code, r.text[:500]) return fgoogle error {r.status_code}: {r.text}, 502 data r.json() transcript for res in data.get(results, []): alts res.get(alternatives) or [] if alts: transcript alts[0].get(transcript, ) break transcript transcript.strip() log.info(done %.2fs - %r, _time.monotonic() - t0, transcript) return transcript, 200, {Content-Type: text/plain; charsetutf-8} app.route(/, methods[GET]) def index(): return ok, 200 if __name__ __main__: port int(os.environ.get(PORT, 9090)) app.run(host127.0.0.1, portport, threadedTrue)