?? ?????? ?? ?? ??? ??? ????. ??? ??? ?? ?? ?? ?? ??????? ??. ??? ??? ??? ?? ??? ???? ??? ??? ?????. ??? ????? ?? ?????? ??? ??? ??? ??? ????, ??? ??? ?? ?? ?? ??? ????. ?? ??? ??? ?? ?? ??? ????? ???. ? ?? ??? ???? ?????? ??? ?? ?? ?? ?????? ?? ?? ???? ?? ??? ? ??? ???.
?????? ??? ?? ?????? ???? ??/?? ????? ??? ???? ?? ??? ???????. ??? ?? ????, ?? ??? ??? ??? ?????? ?? ??? ??? ??? ?? ??.
????? ?? ?? ?? ?????? ??? ??? ???? ??? ???? ??? ????? ???.
- ??? ??—??? ??? ??? ?????. ?? ?????? ??? ??? ???, ???? ?? ???? ???? ??? ? ??? ??. ??? ??? ?? ?? ???? ??? ?? ??? ??????. ?? ?????? ??? ??? ???? ???? ??? ???? ??? ??, ??, ??? ???? ???? ???? ??? ? ????.
- ??? ??? ???—???? ?? ??? ??? ?? ?????? ??? ??? ???? ??? ???? ???. ??? ?? 200???? ??? ??? ? ???, ?? ?? ??? ??? ??? ?????. ?? ?? ??? ?? ????? ?? ?????? ???? ?? ?? ??????(trade-off)? ?? ??? ???? ??? ??????. ??? ?? ???? ?? ??? ??? ????? ???? ??? ?????.
?? ???? ?? ??? ??? ????? ?? ????? ???? NVIDIA Riva? Rasa? ?? ??????? ?????. ?? ??? ????? ??? ?? ?? ?? ??????? ??? ???? ??? ?? ?????. ?? ??? Riva? ?? ??? ?? ????? ??? ?????.
?? 1. ?? ?????? ?? ??.
?? ?? ????? ????? ??? ?? ??? ?????.
- ?? ??(DM)
- ?? ?? ??(ASR)
- ??? ??(NLP) ?? ??? ??(NLU)
- ?? ?? ?? ????(TTS)
???? NLU? DM ??? Rasa, TTS? ASR? Riva? ?????.
Rasa ??
Rasa? ???? ?? ?? AI ????? ??? ?? ?? ?? ?? ?? ????????. ?????? ?? ??? ?? ? ?????.
- ??? ??? ??
- ?? ???? ??
Rasa ?????? ? ??? ?? ??? ?????. ???? ?? ??? ??? ?? ???? ???? ???? ??? ?????? ???? ???.
? ?? ?????? Rasa NLU? DM ??? ?????. ? ??? ??? Rasa ??????? ?????.
Riva ??
Riva? ??? AI ?????? ??? ?? NVIDIA AI ?? SDK???. Riva? ASR? TTS ??? ??? ?? ?????? ?? ?????? ??? ? ??. NVIDIA GPU ?? Riva SDK? ?? ??? ?? ??? ???? ?????.
? ?? ?????? ?? ASR ???? ?? ??? ???? ???? ???? ??? ???. TTS? ??????? ??? ?? ??? ????? ???? ??. ? ?? ?? ??? ??? ???? Riva? ?? ?? ?? ????? ??? ?? ??? ?????. Riva? ??? ??? ?? ??? NVIDIA Riva Speech Skills?? ?????.
???? ??
??? ASR? TTS, NLU? DM? ????? ?? ?? ?? ?????? ???? ?????. ?? 1? ? ?? ?????? ????? ?????.
??1. Riva/Rasa ?? ?????? Rasa NLU ????.
??? ??? ????? ??????? ???(weatherbot) ? ?????????. ???? ???? ????? ??????? ?? ?????? ???? ??? ?? ?????.
??? ??? Riva ASR ???? ??? ?????. ??? ??? ???? Rasa ??(wrapper)? ????, Rasa DM? Rasa NLU? API? ??? ??? ?? ??(action)? ????. Rasa DM? ??? ??? ??? ??? ?? ???? ?????. Rasa ??? ??? ???? ??? ??? gRPC ??? ?? Riva TTS ??? ????, ?? ???? ?? ??? ????? ?? ??? ?????.
Riva AI ???? Rasa? GPU? ??? ???? ??? ? ?? ??? ??? ?????.
?? ????? ????
?? ?????? ?? ??? ?? ????? ?? ??? ?????. Rasa NLU? DM? ??? ??? ????? Rasa NLU/DM? Riva ASR/TTS? ??? ?? ?? ?? ?????? ???? ??? ???? ??? ??? ?? ???.
? ?? ??????? ??? ?????? ????? ??? ????(Python Flask) ?????? ??? ??? ????? ??????.
? ???? ??? ?? ?? ??? Riva ??????? ?? ?????(? Rasa) ??? ?????.
?? ??
?? ?????? ???? ?? Riva ??? ????? ?? ???? ???. Riva AI Services? ???? gRPC ?????? ?? ?? ???? ???? Riva ?? ?? ???(Quick Start Guide)? ??? ????.
Rasa ?????? Riva ASR ????
? ????? asr.py ??? ?? Riva ASR? Rasa ?????? ?????.
Riva ASR? ???? ?? ??(batch) ??? ?? ?????. ???? ??? ???? ??? ???? ??/??? ?? ???? ??? ???? ?????. ?? ??? ??? ????? ??? ??? ???? ?????. ? ?? ????? Riva ASR? ???? ??? ?????.
?? ??
import grpc
import riva.modules.client.src.riva_proto.audio_pb2 as riimport riva.modules.client.src.riva_proto.riva_asr_pb2 as risrimport riva.modules.client.src.riva_proto.riva_asr_pb2_grpc as risr_srv
?? Riva ?????? ?? ???? ???? ???.
class ASRPipe(object): def __init__(self): . . . . self.chunk = int(self.sampling_rate / 10) # 100ms self._buff = queue.Queue() self._transcript = queue.Queue() self.closed = False
ASRPipe ???? ??? Riva ASR ??? ?????. __init__ ????? ??? ???? ??? ??? ?????? _buff? _transcript ???? ?? ??????.
def start(self): . . . . self.channel = grpc.insecure_channel(riva_config["RIVA_SPEECH_API_URL"]) self.asr_client = risr_srv.RivaSpeechRecognitionStub(self.channel)
start ??? ??? Riva ??? gRPC ??? ?????.
def fill_buffer(self, in_data): """Continuously collect data from the audio stream, into the buffer.""" self._buff.put(in_data)
?? ?????? ASR? ??? ????? ???????. ????? ???? ???? ????? ?? ???? ?? ????. ??? ????? ??? ???? ??? ?? _buff? ????? ????? fill_buffer??? ?????.
def main_asr(self): . . . . config = risr.RecognitionConfig( encoding=ri.AudioEncoding.LINEAR_PCM, sample_rate_hertz=self.sampling_rate, language_code=self.language_code, max_alternatives=1, enable_automatic_punctuation=self.enable_automatic_punctuation ) streaming_config = risr.StreamingRecognitionConfig( config=config, interim_results=self.stream_interim_results) if self.verbose: print("[Riva ASR] Starting Background ASR process") self.request_generator = self.build_request_generator() requests = (risr.StreamingRecognizeRequest(audio_content=content) for content in self.request_generator) def build_generator(cfg, gen): yield risr.StreamingRecognizeRequest(streaming_config=cfg) for x in gen: yield x yield cfg if self.verbose: print("[Riva ASR] StreamingRecognize Start") responses = self.asr_client.StreamingRecognize(build_generator( streaming_config, requests)) # Now, put the transcription responses to use. self.listen_print_loop(responses)
???? ??? ? main_asr??? ???? ?????.
main_asr???? ??, ??? ?, ??? ???, ??? ? Riva ASR ??? ??? ?? ????? ?????. ???? main_asr??? build_generator??? ?????. ? ??? Riva ASR StreamingRecognizeRequest ??? ??? ??, ASR ??? ?? ?? ???? ??????. ????? main_asr ? Riva ASR StreamingRecognize ??? ?????. ? ??? ??/?? ????? ???? ???? ??? ??? ???? ???? ????, ??? ?? ????? ?????.
Rasa ?????? Riva TTS ????
? ????? tts.py ? tts_stream.py??? ?? Riva TTS? Rasa ?????? ?????.
ASR? ????? TTS ?? ???? ?? ?? ??? ?? ?????. tts.py? ?? ??? ???? ???? ??? ??? ??? ?????. ts_stream.py? ???? ??? ???? ???? ??? ??? ????? ?????.
?? ??
import grpc import riva.modules.client.src.riva_proto.audio_pb2 as riimport riva.modules.client.src.riva_proto.riva_tts_pb2 as rttsimport riva.modules.client.src.riva_proto.riva_tts_pb2_grpc as rtts_srvfrom riva.tts.tts_processing.main_pronunciation import RunPronunciation
?? Riva ?????? ?? ???? ???? ???.
class TTSPipe(object): def __init__(self): . . . . self._buff = queue.Queue() self._flusher = bytes(np.zeros(dtype=np.int16, shape=(self.sample_rate, 1))) # Silence audio self.pronounce = RunPronunciation(pronounce_dict_path)
TTSPipe ???? ??? Riva TTS ??? ?????. __init__???? _buff???? ??? ?? ???? ?????.
def start(self): . . . . self.channel = grpc.insecure_channel( riva_config["Riva_SPEECH_API_URL"]) self.tts_client = rtts_srv.RivaSpeechSynthesisStub(self.channel)
start ??? ??? Riva ??? gRPC ??? ?????.
def fill_buffer(self, in_data): """To collect text responses and fill TTS buffer.""" if len(in_data): self._buff.put(in_data)
fill_buffer???? ??? ???? ??? ???? ?? _buff? ?????.
def get_speech(self): . . . . while not self.closed: if not self._buff.empty(): # Enter if queue/buffer is not empty. try: text = self._buff.get(block=False, timeout=0) req = rtts.SynthesizeSpeechRequest() req.text = self.pronounce.get_text(text) req.language_code = self.language_code req.encoding = self.audio_encoding req.sample_rate_hz = self.sample_rate req.voice_name = self.voice_name duration = 0 self.current_tts_duration = 0 responses = self.tts_client.SynthesizeOnline(req) for resp in responses: datalen = len(resp.audio) // 4 data32 = np.ndarray(buffer=resp.audio, dtype=np.float32, shape=(datalen, 1)) data16 = np.int16(data32 * 23173.26) speech = bytes(data16.data) duration += len(data16)*2/(self.sample_rate*1*16/8) self.current_tts_duration += duration yield speech except Exception as e: print('[Riva TTS] ERROR:', e) . . . .
get_speech???? ??? TTS? ?????.
get_speech????? ??, ??? ???, ???, ??? ? Riva TTS ??? ??? ?? ????? ?????. ???? get_speech???? Riva TTS SynthesizeOnline ???? ???? ?? ???? ????, ??? ??? ???? ?????. ? ?? ??? ??? ??? ?? ?? ?? ??? ???? ??? ????? ???????.
?? ????
?? NLU? DM? ?? Rasa ??? rasa.py??? ?? ?????.
class RASAPipe(object): def __init__(self, user_conversation_index): . . . . self.user_conversation_index = user_conversation_index
NLU? DM? ?? Rasa ??? ?? ??? ??? RASAPipe ???? ?????.
def request_rasa_for_question(self, message): rasa_requestdata = {"message": message, "sender": self.user_conversation_index} x = requests.post(self.messages_url, json = rasa_requestdata) rasa_response = x.json() processed_rasa_response = self.process_rasa_response(rasa_response) return processed_rasa_response
? ???? ?? ??? request_rasa_for_question??????. ? ???? ??? ??? ???? ??? ??, ? ???? ??? Rasa API? ??? ID? Rasa? ?????. ??? Rasa? ??? ?? ????? ????.
???? chatbot.py??? ??? ?? ?? ?????? ?????.
class ChatBot(object): def __init__(self, user_conversation_index, verbose=False): self.id = user_conversation_index self.asr = ASclass ChatBot(object): def __init__(self, user_conversation_index, verbose=False): self.id = user_conversation_index self.asr = ASRPipe() self.rasa = RASAPipe(user_conversation_index) self.tts = TTSPipe() self.thread_asr = None self.pause_asr_flag = False self.enableTTS = False
chatbot.py?? ChatBot ???? ????. ??? ??? ChatBot ????? ????, ?? ?? ??? ASR, TTS, Rasa ?? ??? ??? ?????. ChatBot ???? ????? ???? ??? ?? ASR, Rasa, TTS ???? ????? ??????.
def server_asr(self): self.asr.main_asr() def start_asr(self, sio): self.thread_asr = sio.start_background_task(self.server_asr)
?? ?? ???? start_asr ???? ????? ????? ASR? ???? ??? ?? ????? ??? ? ??? ????.
def asr_fill_buffer(self, audio_in): if not self.pause_asr_flag: self.asr.fill_buffer(audio_in) def def asr_fill_buffer(self, audio_in): if not self.pause_asr_flag: self.asr.fill_buffer(audio_in) def get_asr_transcript(self): return self.asr.get_transcript()
???? asr_fill_buffer??? ASRPipe ????? fill_buffer??? ??? ???? ?? ??? ???? ASR ??? ?????. Riva ASR? ??? ???? ?? ?????? ???? get_asr_transcript??? ???? ??? ???? ????? ?????.
def rasa_tts_pipeline(self, text): response_text = self.rasa.request_rasa_for_question(text) if len(response_text) and self.enableTTS == True: self.tts_fill_buffer(response_text) return response_text
??? ???? ??? rasa_tts_pipeline???? ?????. ? ???? Rasa? Riva TTS ??? ??????? ??????. ?? RASAPipe ????? request_rasa_for_question????? ?????. ?? ?? ???? ?? ???? Rasa? ???? Rasa NLP? Rasa DM? ??? ??? ????, ??? ??? ??? ???? ?????. ? ??? ???? ?? tts_fill_buffer? ???? ????? ?????.
def def tts_fill_buffer(self, response_text): if self.enableTTS: self.tts.fill_buffer(response_text) def get_tts_speech(self): return self.tts.get_speech()
?? ???? tts_fill_buffer? TTSPipe ????? fill_buffer ???? ??? TTS ??? ?? ???? ????. TTS? ??? ????? ???? get_tts_speech???? ????, ?? ???? ????? ?? ???????.
??? Rasa? TTS ??? ??????? ??? ??? ???? rasa_tts_pipeline? ???????.
?? ????? ????
?? ????? ??? ????? ?? ???? ??(Network Configuration) ??? ??? ?? API ?????? ??? ???? ???.
?? ?? ?? ?????? ????? ?? ??(Running the Demo) ??? ??? ????.
?? ??? ???? ?? Rasa ?? ??? Rasa ??? ?? ????? ??????. ? ??? ?? ?????. ?? ??? ???? ?? ?? ??? Rasa ??? ??? ??(Docker Compose Installation)? ?????.
??? ????
???? ??? ?? ?? ?? ?????? ???? ???? ??? ? ?????.
Rasa ?? ?? ??? Rasa X
?? ?? ??(CDD)? ???? ?? ????, ?? ????? ??? AI ?????? ???? ???????. ??? AI ?????? ??? ?? ?? ???? ??? ???? ?? ??? ?????. ??? ?? ???? ???? ??? ?? ??? ??? ? ??? ??? ????. ??? AI ??? ? ???? CDD? ???? ?? ?????? ?? ???? ??? ??? ?? ???? ??? ?? ???.
Rasa X? CDD ??? ????. Rasa X? ???? CDD ??? ??? ????.
- ??(Share): ????? ?????? ??? ??? ??? ?? ????? ???. ?? ??? ?? ???? ?? ??? Rasa X?? ??? ??? ???? ?????? ??? ? ????. ?? ???? ?????? ?? ??? ????? ?? ??? ????? ??? ?? ??? ? ????.
- ??(Review): ???? ???? ?????? ?? ?? ??? ?? ?? ?????. ????? ???? ??? ??? ? ???? ??? ???. Rasa X? ???? ??? ?? ??? ?? ??? ???? ?????.
- ???(Annotation): ?? NLU ??? ?? ??? ???? ??? ?????. ??? ???? ??? ??? ????? ??(badge)? ??? ??? ? ???? ??? ?????. ???? ‘??’?? ????, ???? ??? ????, ?? ???? ??????? ???? ??? ??? ???? ??? ? ????.
- ??(Test): ???? ???? ?????? ?? ?? ??? ??? ?????. Rasa X? ?? ??? ????? ??? ??? ??? ?? ??? ?? ??? ? ????. Rasa X? ??? ??? ??? ?? ??(CI) ?????? ??? ??? ??????. Rasa X? ??? ?? ???(Integrated version control)? ?? ????? ????? CI ?????? ?? ??? ??? ??? ?? ?????.
- ??(Track): ???? ??? ??? ?? ??? ??? ?????? ??? ?????. ? ????? ?????? ?? ??? ??? ?? ??? Rasa X API? ?? ??? ??????. ???? ????? ????? ??? ???? ??? ?? ?? ???? ?? ?????.
- ??(Fix): ??? ??? ??? ??? ?????? ??? ?? ?? ??? ???? ??? ????, ?? ??? ?? ??? ????.
CDD? ???? ????? ???, ? ??? ???? ??? ?????. ??? ??, ?? ??? ?? ????? ?? ???? ??? ????? ??. ?????? ??? ??? ? ?? ???? ??? ?? ??? ??, ?????? ???? ??? ????? ????. ?? ??? ??? Rasa X ?? ???(Rasa X Installation Guide)? ?????.
NVIDIA TAO Toolkit
???? ??, ???? ?? ??? ?? ??? ??? ??, ???? ???? ???. ??? ????????? ???? ??? ?? ???? ??? ?? ??? Riva ???? ???? ??? ???? NVIDIA TAO Toolkit? ??? ?? ??(transfer learning)? ???? ????.
NVIDIA TAO Toolkit? ???? ??? ?????? AI? ?? ???? ??? ??? ?? ??? ??? ???? ??? ???? ?? ??? ? ?? ??? AI ?? ??? ????????. ?? ??? ??? ??, ?? ?? ??? ??? ??? AI ?????? ??? ?? ??? ????.
TAO Toolkit? ?? ??? ??? ??? ??? ??? ????? ??? AI/DL ?????? ???? ????? ????. ???? ?? ??? ?? ?? ?????? ?? ??? ?????. ? ??? ????? ?? ??? ?? ??? ??? ??? ?? ??? ??? ????? ?????. ? ???? TAO Toolkit? ??? ??? ??, ??? ?? ??, ??? ?? ????? ????? ???? ??? ? ??? ??? ?????.
? ??? ??? TAO Toolkit ??????? ?????.
??
???? ?? ?? ?? ?????? ???? Riva ASR/TTS, Rasa NLP/DM? ???? ??? ??????. ?? NVIDIA Riva SDK? Riva ASR/TTS? ?? ??? ??????.
Riva ??????? ? ?? ?? ???? ??????? ???? ???, ?? ???? ?? ?????. Riva ??? ??? Riva ????? ?????. Rasa? ? ??? ????, Rasa ??? ???? ?? ????? ?? ?? ??? ?????!