Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ssip-client-async/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ log = { version = "0.4", features = ["max_level_debug", "release_max_level_info"
mio = { version = "0.8", optional = true }
tokio = { version = "1.0", features = ["io-util", "rt", "macros", "net"] }
async-std = { version = "1.0", default-features = true }
lingua = "1.7.1"

[features]
async-mio = ["mio/net", "mio/os-poll"]
Expand Down
27 changes: 27 additions & 0 deletions ssip-client-async/examples/automatic_language_detection.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
use lingua::DetectionResult;
use lingua::Language::{English, Spanish, Chinese};
use lingua::LanguageDetectorBuilder;

fn main() {
let languages = vec![English, Chinese];
let detector = LanguageDetectorBuilder::from_languages(&languages).build();
let sentence = "Hello my name is Joe. 你好世界";

let results: Vec<DetectionResult> = detector.detect_multiple_languages_of(sentence);

println!("{:?}", results);

if let [first, second] = &results[..] {
assert_eq!(first.language(), English);
assert_eq!(
&sentence[first.start_index()..first.end_index()],
"Hello my name is Joe."
);

assert_eq!(second.language(), Spanish);
assert_eq!(
&sentence[second.start_index()..second.end_index()],
"你好世界"
);
}
}
14 changes: 12 additions & 2 deletions ssip-client-async/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ use crate::types::*;
#[cfg(all(not(feature = "async-mio"), unix))]
pub use std::os::unix::io::AsRawFd as Source;

use lingua::LanguageDetector;
#[cfg(feature = "async-mio")]
pub use mio::event::Source;

Expand Down Expand Up @@ -69,13 +70,22 @@ macro_rules! send_range {
pub struct Client<S: Read + Write + Source> {
input: io::BufReader<S>,
output: io::BufWriter<S>,
pub language_detector: Option<LanguageDetector>,
}

impl<S: Read + Write + Source> Client<S> {
/// Create a SSIP client on the reader and writer.
pub(crate) fn new(input: io::BufReader<S>, output: io::BufWriter<S>) -> Self {
pub(crate) fn new(
input: io::BufReader<S>,
output: io::BufWriter<S>,
language_detector: Option<LanguageDetector>,
) -> Self {
// https://stackoverflow.com/questions/58467659/how-to-store-tcpstream-with-bufreader-and-bufwriter-in-a-data-structure
Self { input, output }
Self {
input,
output,
language_detector,
}
}

#[cfg(all(not(feature = "async-mio"), unix))]
Expand Down
29 changes: 28 additions & 1 deletion ssip-client-async/src/fifo.rs
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,15 @@ mod synchronous {
pub struct Builder {
path: FifoPath,
mode: StreamMode,
pub languages_to_detect: Option<Vec<lingua::IsoCode639_1>>,
}

impl Builder {
pub fn new() -> Self {
Self {
path: FifoPath::new(),
mode: StreamMode::Blocking,
languages_to_detect: None,
}
}

Expand Down Expand Up @@ -103,6 +105,17 @@ mod synchronous {
Ok(self)
}

fn init_language_detector(
languages: &Vec<lingua::IsoCode639_1>,
) -> Option<lingua::LanguageDetector> {
Some(
lingua::LanguageDetectorBuilder::from_iso_codes_639_1(languages)
// preload all language models into memory for faster client detection
.with_preloaded_language_models()
.build(),
)
}

pub fn build(&self) -> io::Result<Client<UnixStream>> {
let input = UnixStream::connect(self.path.get()?)?;
match self.mode {
Expand All @@ -112,7 +125,21 @@ mod synchronous {
}

let output = input.try_clone()?;
Ok(Client::new(BufReader::new(input), BufWriter::new(output)))
match &self.languages_to_detect {
Some(languages) => {
let language_detector = Self::init_language_detector(&languages).unwrap();
Ok(Client::new(
BufReader::new(input),
BufWriter::new(output),
Some(language_detector),
))
}
None => Ok(Client::new(
BufReader::new(input),
BufWriter::new(output),
None,
)),
}
}
}
}
Expand Down
48 changes: 48 additions & 0 deletions ssip-client-async/src/language_detection.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
use std::io::{Read, Write};
// Trick to have common implementation for std and mio streams..
#[cfg(all(not(feature = "async-mio"), unix))]
pub use std::os::unix::io::AsRawFd as Source;

use crate::{fifo::Builder, Client, OK_LANGUAGE_SET};
use lingua::IsoCode639_1;
use ssip::{ClientError, ClientResult};

impl Builder {
/// Initialize the language detection model with a list of languages to distinguish between
/// Use the ISO 639-3 language codes to distinguish between languages
pub fn with_automatic_detection_languages(
&mut self,
languages: &Vec<IsoCode639_1>,
) -> &mut Self {
self.languages_to_detect = Some(languages.clone());
self
}
}

impl<S: Read + Write + Source> Client<S> {
/// A wrapper over the `send_lines` method to send lines in multiple languages. Uses whatever languages were set when the client was built
pub fn send_lines_multilingual(&mut self, lines: &String) -> ClientResult<&mut Self> {
let detector =
self.language_detector
.as_ref()
.ok_or(ClientError::LanguageDetectionError(
"Language detection not initialized".to_string(),
))?;

let detection_results = detector.detect_multiple_languages_of(lines);

for result in detection_results {
let language_code = result.language().iso_code_639_1().to_string();
// the status check stalls for some reason and never returns
self.set_language(ssip::ClientScope::Current, &language_code)?
.check_status(OK_LANGUAGE_SET)?;
let subsection = lines[result.start_index()..result.end_index()].to_string();
self.speak()?
.check_receiving_data()?
.send_lines(&[subsection])?
.receive()?;
}

Ok(self)
}
}
1 change: 1 addition & 0 deletions ssip-client-async/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ pub mod client;
pub mod constants;
#[cfg(unix)]
pub mod fifo;
pub mod language_detection;
pub mod net;
pub mod tcp;

Expand Down
6 changes: 5 additions & 1 deletion ssip-client-async/src/tcp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ mod synchronous {
StreamMode::TimeOut(timeout) => input.set_read_timeout(Some(timeout))?,
}
let output = input.try_clone()?;
Ok(Client::new(BufReader::new(input), BufWriter::new(output)))
Ok(Client::new(
BufReader::new(input),
BufWriter::new(output),
None,
))
}
}
}
Expand Down
2 changes: 2 additions & 0 deletions ssip/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -417,6 +417,8 @@ pub enum ClientError {
TooManyLines,
#[error("Unexpected status: {0}")]
UnexpectedStatus(ReturnCode),
#[error("Failure automatically detecting language: {0}")]
LanguageDetectionError(String),
}

impl ClientError {
Expand Down