From 680b8a11fb2ce95134b6f7dc60940a1f4d4cae8f Mon Sep 17 00:00:00 2001 From: Alex Kristiansen Date: Sat, 12 Aug 2023 20:07:56 -0700 Subject: [PATCH 1/4] first pass --- kirum/src/entries.rs | 7 +- kirum/src/new.rs | 5 +- libkirum/Cargo.toml | 4 +- libkirum/src/kirum.rs | 114 +++++++++++++++-- libkirum/src/lemma.rs | 48 +++++++- libkirum/src/lexcreate.rs | 252 ++++++++++++++++++++++++++++++++++++++ libkirum/src/lib.rs | 1 + libkirum/src/matching.rs | 3 +- 8 files changed, 419 insertions(+), 15 deletions(-) create mode 100644 libkirum/src/lexcreate.rs diff --git a/kirum/src/entries.rs b/kirum/src/entries.rs index dda786d..9b8b9e4 100644 --- a/kirum/src/entries.rs +++ b/kirum/src/entries.rs @@ -39,6 +39,8 @@ pub struct RawLexicalEntry { pub archaic: bool, pub tags: Option>, + /// A tag that tells Kirum to generate the word based on the phonetic ruleset specified by the tag + pub generate: Option, /// Words that will be added as a derivative of the enclosing Lexis; any value not specified will be taken from the enclosing entry. pub derivatives: Option> } @@ -68,7 +70,9 @@ impl From for Lexis{ lexis_type: source.word_type.unwrap_or("".to_string()), definition: source.definition, archaic: source.archaic, - tags: source.tags.unwrap_or(Vec::new())} + tags: source.tags.unwrap_or(Vec::new()), + word_create: source.generate + } } } @@ -83,6 +87,7 @@ impl From for RawLexicalEntry{ archaic: value.archaic, tags: if !value.tags.is_empty() {Some(value.tags)} else {None}, derivatives: None, + generate: value.word_create } } } diff --git a/kirum/src/new.rs b/kirum/src/new.rs index f4288f4..45d400f 100644 --- a/kirum/src/new.rs +++ b/kirum/src/new.rs @@ -39,6 +39,7 @@ pub fn create_new_project(name: &str) -> Result<()> { archaic: true, tags: None, derivatives: None, + generate: None, }); word_map.insert("latin_example".into(), RawLexicalEntry { word: None, @@ -49,6 +50,7 @@ pub fn create_new_project(name: &str) -> Result<()> { etymology: Some(Etymology { etymons: vec![Edge{etymon: "latin_verb".into(), transforms: Some(vec!["latin-from-verb".into()]), agglutination_order: None}] }), archaic: true, tags: Some(vec!["example".into(), "default".into()]), + generate: None, derivatives: Some(vec![Derivative{lexis: RawLexicalEntry { word: None, word_type: None, @@ -58,7 +60,8 @@ pub fn create_new_project(name: &str) -> Result<()> { etymology: None, archaic: true, tags: None, - derivatives: None + derivatives: None, + generate: None, }, transforms: Some(vec!["of-from-latin".to_owned()]), }]) diff --git a/libkirum/Cargo.toml b/libkirum/Cargo.toml index f604fe2..1ad2e53 100644 --- a/libkirum/Cargo.toml +++ b/libkirum/Cargo.toml @@ -13,4 +13,6 @@ regex = "1.7.0" thiserror = "1.0.38" serde_with = {version = "3.0.0", features= ["json"]} log = "0.4.17" -unicode-segmentation = "1.10.1" \ No newline at end of file +unicode-segmentation = "1.10.1" +rand = "0.8.5" +env_logger = "0.9.0" \ No newline at end of file diff --git a/libkirum/src/kirum.rs b/libkirum/src/kirum.rs index 2466a66..09e5ad0 100644 --- a/libkirum/src/kirum.rs +++ b/libkirum/src/kirum.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use crate::lemma::Lemma; +use crate::lexcreate; use crate::transforms::Transform; use crate::word::{PartOfSpeech, Etymology, Edge}; use petgraph::Direction::{Incoming, Outgoing}; @@ -11,7 +12,7 @@ use petgraph::Graph; use log::{trace, debug}; #[derive(Clone, Default, PartialEq, serde::Deserialize, serde::Serialize)] -/// A Lexis represents a single entry in the language tree, be it a word, word stem, morpheme, etc. +/// A Lexis represents a headword in Kirum's lexicon, be it a word, word stem, morpheme, etc. pub struct Lexis { /// Optional ID for the lex, used by connect_etymology_id pub id: String, @@ -30,7 +31,11 @@ pub struct Lexis { pub archaic: bool, /// Optional user-supplied tags //#[serde(skip)] - pub tags: Vec + pub tags: Vec, + /// Optional field that can be used to randomly generate a word value if none exists, separate from any etymology. + /// If the given word has not etymology, this value takes prescience. + /// The string value is used to generate a word based on the underlying phonology rules supplied to the TreeEtymology structure. + pub word_create: Option } @@ -55,7 +60,7 @@ impl std::fmt::Debug for Lexis { } -/// TreeEtymology represents the grpah edge of the language tree, and +/// TreeEtymology represents the graph edge of the language tree, and /// determines the relationship of one word to another. #[derive(Default, Debug, Clone)] pub struct TreeEtymology { @@ -65,7 +70,8 @@ pub struct TreeEtymology { /// Determines what order this morpheme is agglutinated in to create derived lexii. /// For example, if a lexis has two upstream etymons, Word A with agglutination_order=1 /// and Word B with agglutination_order=2, the lexis will by generated by agglutinating A+B - pub agglutination_order: Option + pub agglutination_order: Option, + } impl TreeEtymology{ @@ -91,6 +97,10 @@ impl TreeEtymology{ pub struct LanguageTree { //the Node type represents a lexical entry, the edge is a tuple of the transform, and a "holding" string that's used to "trickle down" words as they're generated graph: Graph, + + /// A set of phonology rules that can be used generate new words without etymology. + /// Using these rules, Kirum will randomly stitch together phonemes to create a new lexis. + pub word_creator_phonology: lexcreate::LexPhonology } impl Default for LanguageTree{ @@ -115,7 +125,8 @@ impl IntoIterator for LanguageTree { impl LanguageTree { pub fn new() -> Self { - LanguageTree {graph: Graph::::new()} + LanguageTree {graph: Graph::::new(), + word_creator_phonology: lexcreate::LexPhonology { phonemes: HashMap::new(), lexis_types: HashMap::new() }} } @@ -212,6 +223,20 @@ impl LanguageTree { let mut upstreams: Vec<(i32, Lemma)> = Vec::new(); if !updated.contains_key(&node){ + + // try word creation + if self.graph[node].word_create.is_some() && self.graph[node].word.is_none() { + trace!("word_create has value, no word found, creating one..."); + let word_type = self.graph[node].word_create.clone().unwrap(); + let new_gen = self.word_creator_phonology.create_word(&word_type); + if let Some(found_new) = new_gen { + let debug_iter: Vec = found_new.clone().into_iter().collect(); + trace!("created new word ({:?}) from phonology rules for ID {}", debug_iter, self.graph[node].id); + self.graph[node].word = Some(found_new); + //continue; + } + } + let mut etymons_in_lex = 0; for edge in self.graph.edges_directed(node, petgraph::Direction::Incoming){ etymons_in_lex += 1; @@ -236,7 +261,7 @@ impl LanguageTree { } // we have a lexis with no upstream edges, but contains a word. mark as updated. if self.graph[node].word.is_some() && etymons_in_lex == 0 { - trace!("updated node {} with no upstreams: {:?}", self.graph[node].id, self.graph[node].word); + trace!("updated node '{}' with no upstreams: {:?}", self.graph[node].id, self.graph[node].word); changes+=1; updated.insert(node, true); } @@ -328,9 +353,19 @@ impl LanguageTree { } dict.sort_by_key(|k| k.word.clone().unwrap()); dict + } + /// Get a Lemma entry by the ID value + pub fn get_by_id(&self, id: &str) -> Option { + for node in self.graph.node_indices(){ + if self.graph[node].id == id { + return Some(self.graph[node].clone()) + } + } + return None } + /// Reduce the language graph to a vector of words that match the provided function. Returns a vector of tuples for each matching word and any associated etymological data. pub fn to_vec_etymons(self, filter: F) -> Vec<(Lexis, Etymology)> where @@ -370,7 +405,10 @@ fn join_string_vectors(words: &mut [(i32, Lemma)]) -> Lemma{ #[cfg(test)] mod tests { - use crate::{kirum::{LanguageTree, Lexis}, transforms::{Transform, LetterArrayValues, TransformFunc, self, LetterValues}, matching::{LexisMatch, Value}}; + use std::collections::HashMap; + use log::LevelFilter; + use crate::{kirum::{LanguageTree, Lexis}, transforms::{Transform, LetterArrayValues, TransformFunc, self, LetterValues}, matching::{LexisMatch, Value}, lexcreate::{LexPhonology, CreateValue, PhoneticReference}}; + use env_logger::Builder; fn create_basic_words() -> LanguageTree { let parent = Lexis{id: "parent".to_string(), word: Some("wrh".into()), language: "gauntlet".to_string(), lexis_type: "root".to_string(), ..Default::default()}; @@ -385,7 +423,7 @@ mod tests { let transform_two = Transform{name: "second_transform".to_string(), lex_match: None, transforms: vec![TransformFunc::Prefix { value: "au".into() }], - }; + }; // a basic three-word graph, two words auto-generated let mut tree = LanguageTree::new(); @@ -395,6 +433,66 @@ mod tests { tree } + #[test] + fn test_word_create() { + let log_level = LevelFilter::Trace; + Builder::new().filter_level(log_level).init(); + let test_phon_rules = LexPhonology{ + phonemes: HashMap::from([ + ("C".to_string(), + vec![ + "h".into(), + "r".into(), + "x".into(), + "k".into() + ]), + ("V".to_string(), + vec![ + "u".into(), + "i".into() + ]), + ]), + lexis_types: HashMap::from([ + ("root".to_string(), + vec![ + "CCC".into() + ]) + ]), + }; + let parent = Lexis{id: "parent".to_string(), word:None, + language: "gauntlet".to_string(), lexis_type: "root".to_string(), word_create: Some("root".to_string()), ..Default::default()}; + let derivative_one = Lexis{id: "derivative_one".to_string(), word: None, lexis_type: "word".to_string(), word_create: None, ..parent.clone()}; + let derivative_two = Lexis{id: "derivative_two".to_string(), word: None, lexis_type: "word".to_string(), word_create: None, ..parent.clone()}; + + let transform_one = Transform{name: "first_transform".to_string(), + lex_match: None, + transforms: vec![TransformFunc::LetterArray { + letters: vec![LetterArrayValues::Place(0), + LetterArrayValues::Char("a".into()), + LetterArrayValues::Place(1), + LetterArrayValues::Place(2)] }] + }; + + let transform_two = Transform{name: "second_transform".to_string(), + lex_match: None, + transforms: vec![TransformFunc::Prefix { value: "au".into() }], + }; + let mut tree = LanguageTree::new(); + tree.connect_etymology(derivative_one.clone(), parent, vec![transform_one], None); + tree.connect_etymology(derivative_two, derivative_one, vec![transform_two], None); + tree.word_creator_phonology = test_phon_rules; + tree.compute_lexicon(); + + let gen_parent = tree.get_by_id("parent").unwrap().word.unwrap().chars(); + let der_two = tree.get_by_id("derivative_two"); + println!("All words: {:?}", tree.to_vec()); + println!("updated string is: {}", der_two.clone().unwrap().word.unwrap().string_without_sep()); + + + let reconstructed = format!("au{}a{}{}", gen_parent[0], gen_parent[1], gen_parent[2]); + assert_eq!(der_two.unwrap().word.unwrap().string_without_sep(), reconstructed); + } + #[test] fn test_basic_tree(){ let mut tree = create_basic_words(); diff --git a/libkirum/src/lemma.rs b/libkirum/src/lemma.rs index b305a7d..20b3bbf 100644 --- a/libkirum/src/lemma.rs +++ b/libkirum/src/lemma.rs @@ -6,11 +6,17 @@ use log::error; const WORD_SEP: char = '\u{200B}'; -#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)] +#[derive(Clone, Default, PartialEq, Eq, PartialOrd, Ord)] pub struct Lemma { value: String, } +impl std::fmt::Debug for Lemma { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_fmt(format_args!("{}", self.string_without_sep())) + } +} + impl Serialize for Lemma { fn serialize(&self, serializer: S) -> Result @@ -62,7 +68,7 @@ impl IntoIterator for Lemma { type IntoIter = std::vec::IntoIter; fn into_iter(self) -> Self::IntoIter { - let separated: Vec = self.value.split(WORD_SEP).map(|c|c.to_owned()).collect(); + let separated: Vec = self.into(); separated.into_iter() } } @@ -78,7 +84,7 @@ impl From> for Lemma { fn from(value: Vec) -> Self { let mut build = String::new(); for part in value.into_iter() { - if part == WORD_SEP.to_string() { + if part == WORD_SEP.to_string() || part == "" { continue } build = format!("{}{}", build, part); @@ -114,9 +120,45 @@ impl std::string::ToString for Lemma { } } +impl From for Vec { + fn from(value: Lemma) -> Self { + value.value.split(WORD_SEP).map(|c|c.to_owned()).filter(|c| c != "").collect() + } +} impl Lemma { + + pub fn len(&self) -> usize { + self.clone().into_iter().count() + } + + pub fn push(&mut self, pushed: Lemma) { + if self.len() >0 { + let mut vectored: Vec = self.clone().into(); + let mut update_vec: Vec = pushed.into(); + vectored.append(&mut update_vec); + let updated: Lemma = vectored.into(); + self.value = updated.value + } else { + self.value = pushed.value + } + + } + + pub fn push_char(&mut self, pushed: &str) { + // a bit horrible, but the easiest way to insure we're inserting the separators properly + if self.len() > 0 { + let mut vectored: Vec = self.clone().into(); + vectored.push(pushed.to_string()); + let updated: Lemma = vectored.into(); + self.value = updated.value + } else { + self.value = pushed.to_string(); + } + + } + /// Return a string without the Lemma-specific character delimiters pub fn string_without_sep(&self) -> String { let rep = WORD_SEP.to_string(); diff --git a/libkirum/src/lexcreate.rs b/libkirum/src/lexcreate.rs new file mode 100644 index 0000000..7dde771 --- /dev/null +++ b/libkirum/src/lexcreate.rs @@ -0,0 +1,252 @@ +use std::collections::HashMap; +use rand::seq::SliceRandom; +use crate::lemma::Lemma; +use serde::{Deserialize, Serialize, de::Visitor}; + + +#[derive(Clone, PartialEq, Serialize, Deserialize, Default, Debug)] +pub struct LexPhonology { + pub phonemes: HashMap>, + pub lexis_types: HashMap> +} + +#[derive(Clone, PartialEq, Serialize, Default, Debug)] +pub struct PhoneticReference(Vec); + +impl<'de> Deserialize<'de> for PhoneticReference { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de> { + deserializer.deserialize_any(PhoneticReferenceVisitor) + } +} + +struct PhoneticReferenceVisitor; + +impl<'de> Visitor<'de> for PhoneticReferenceVisitor { + type Value = PhoneticReference; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(formatter, "a string value") + } + + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, { + Ok(v.into()) + } +} + +// the PhoneticReference can be formatted one of two ways: +// CCCC +// C C C C +// the latter helps for cases where we've inserted a weird character that's more than one unicode character +impl From<&str> for PhoneticReference{ + fn from(value: &str) -> Self { + let mut phon_vec: Vec = Vec::new(); + if value.matches(" ").count() > 1{ + for char in value.split_whitespace(){ + phon_vec.push(char.into()) + } + } else { + for char in value.chars(){ + phon_vec.push(char.into()) + } + } + + PhoneticReference(phon_vec) + } + +} + + +#[derive(Clone, PartialEq, PartialOrd, Eq, Ord, Serialize, Debug)] +pub enum CreateValue { + Phoneme(String), + Reference(String) +} + +impl From<&str> for CreateValue{ + fn from(value: &str) -> Self { + let found_lowercase = value.chars().find(|c| c.is_lowercase()); + if found_lowercase.is_some() { + CreateValue::Phoneme(value.to_string()) + } else { + CreateValue::Reference(value.to_string()) + } + } +} + +impl From for CreateValue{ + fn from(value: char) -> Self { + if value.is_lowercase(){ + CreateValue::Phoneme(value.to_string()) + } else { + CreateValue::Reference(value.to_string()) + } + } +} + +impl<'de> Deserialize<'de> for CreateValue { + fn deserialize(deserializer: D) -> Result + where + D: serde::Deserializer<'de> { + deserializer.deserialize_any(CreateValueVisitor) + } +} + +struct CreateValueVisitor; + +impl<'de> Visitor<'de> for CreateValueVisitor { + type Value = CreateValue; + + fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { + write!(formatter, "an upper or lower case character value") + } + + // logic: if an identifier is all uppercase, treat it as a reference, + // otherwise, it's a string phoneme + fn visit_str(self, v: &str) -> Result + where + E: serde::de::Error, { + Ok(v.into()) + } +} + +impl LexPhonology { + + pub fn create_word(&self, lexis_type: &str) -> Option { + if let Some(found_type_list) = self.lexis_types.get(lexis_type) { + if let Some(selected_phon) = found_type_list.choose(&mut rand::thread_rng()) { + return self.resolve_phonetic_reference(selected_phon) + } + } + + None + } + + fn resolve_phonetic_reference(&self, pref: &PhoneticReference) -> Option { + let mut phonetic_acc = Lemma::default(); + for phon in &pref.0 { + match phon { + CreateValue::Phoneme(p) => {phonetic_acc.push_char(&p)}, + CreateValue::Reference(single_ref) => { + if let Some(found_ref) = self.random_phoneme(&single_ref) { + phonetic_acc.push(found_ref) + } else { + return None + } + } + } + } + + if phonetic_acc.len() == 0 { + None + } else { + Some(phonetic_acc) + } + + } + + fn random_phoneme(&self, phoneme_key: &str) -> Option { + if let Some(type_val) = self.phonemes.get(phoneme_key) { + let picked_from = type_val.choose(&mut rand::thread_rng()); + if let Some(picked) = picked_from { + return self.resolve_phonetic_reference(picked) + } + } + + None + } + +} + + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use crate::lexcreate::PhoneticReference; + + use super::{LexPhonology, CreateValue}; + + #[test] + fn test_new_no_space(){ + let test_phon: PhoneticReference = "CCCC".into(); + let expected = PhoneticReference(vec!["C".into(), "C".into(), "C".into(), "C".into()]); + assert_eq!(test_phon, expected) + } + + #[test] + fn test_new_spaces() { + let test_phon: PhoneticReference = "C V i C r rw".into(); + + let expected = PhoneticReference(vec![ + CreateValue::Reference("C".to_string()), + CreateValue::Reference("V".to_string()), + CreateValue::Phoneme("i".to_string()), + CreateValue::Reference("C".to_string()), + CreateValue::Phoneme("r".to_string()), + CreateValue::Phoneme("rw".to_string()) + ]); + assert_eq!(test_phon, expected) + } + + #[test] + fn test_new_no_space_mix(){ + let test_phon: PhoneticReference = "CCrC".into(); + let expected = PhoneticReference(vec![ + CreateValue::Reference("C".to_string()), + CreateValue::Reference("C".to_string()), + CreateValue::Phoneme("r".to_string()), + CreateValue::Reference("C".to_string()) + ]); + assert_eq!(test_phon, expected) + } + + #[test] + fn test_basic_gen() { + let test_phon = LexPhonology{ + phonemes: HashMap::from([ + ("C".to_string(), + vec![ + PhoneticReference(vec![CreateValue::Phoneme("t".to_string())]), + PhoneticReference(vec![CreateValue::Phoneme("r".to_string())]) + ]), + ("V".to_string(), + vec![ + PhoneticReference(vec![CreateValue::Phoneme("u".to_string())]), + PhoneticReference(vec![CreateValue::Phoneme("i".to_string())]) + ]), + ("S".to_string(), + vec![ + PhoneticReference(vec![ + CreateValue::Reference("C".to_string()), + CreateValue::Reference("V".to_string()) + ]), + PhoneticReference(vec![ + CreateValue::Reference("V".to_string()), + CreateValue::Reference("C".to_string()) + ]) + ]) + ]), + lexis_types: HashMap::from([ + ("words".to_string(), + vec![ + PhoneticReference(vec![CreateValue::Reference("S".to_string())]), + PhoneticReference(vec![ + CreateValue::Reference("S".to_string()), + CreateValue::Reference("S".to_string()) + ]) + ]) + ]), + }; + + let res = test_phon.create_word("words"); + assert_eq!(true, res.is_some()); + assert!(res.clone().unwrap().len() > 0); + println!("got: {}", res.unwrap().to_string()); + } + + +} \ No newline at end of file diff --git a/libkirum/src/lib.rs b/libkirum/src/lib.rs index 2717288..736789a 100644 --- a/libkirum/src/lib.rs +++ b/libkirum/src/lib.rs @@ -5,6 +5,7 @@ pub mod errors; pub mod kirum; pub mod matching; pub mod lemma; +pub mod lexcreate; // #[cfg(test)] // mod tests { diff --git a/libkirum/src/matching.rs b/libkirum/src/matching.rs index 79e9883..63bf8e8 100644 --- a/libkirum/src/matching.rs +++ b/libkirum/src/matching.rs @@ -198,7 +198,8 @@ mod tests { pos: None, definition: "".to_string(), archaic: false, - tags: vec!["tag1".to_string(), "tag2".to_string()] + tags: vec!["tag1".to_string(), "tag2".to_string()], + word_create: None }; let test_match = LexisMatch{ From 88a699584281105d080968a4d6db12891d492ca3 Mon Sep 17 00:00:00 2001 From: Alex Kristiansen Date: Sun, 13 Aug 2023 10:13:23 -0700 Subject: [PATCH 2/4] cleaning up --- kirum/src/cli.rs | 19 ---- kirum/src/files.rs | 113 +++++++++++++++++------ kirum/src/generate.rs | 6 +- kirum/src/main.rs | 21 ++--- kirum/src/test_files/test_phonetics.json | 10 ++ libkirum/src/kirum.rs | 10 +- libkirum/src/lemma.rs | 12 ++- libkirum/src/lexcreate.rs | 15 +-- 8 files changed, 125 insertions(+), 81 deletions(-) create mode 100644 kirum/src/test_files/test_phonetics.json diff --git a/kirum/src/cli.rs b/kirum/src/cli.rs index fe2306e..ecf6cce 100644 --- a/kirum/src/cli.rs +++ b/kirum/src/cli.rs @@ -30,13 +30,6 @@ pub enum Commands{ }, /// Print a graphviz representation of the language Graphviz{ - /// JSON file of defined etymon transforms - #[clap(short, long, value_parser)] - transforms: Option, - /// json file of a language graph - #[clap(short, long, value_parser)] - graph: Option, - /// path to a directory to read in all transform and graph files #[clap(short, long, value_parser)] directory: Option, @@ -44,12 +37,6 @@ pub enum Commands{ /// Render a lexicon from an existing set of graph files and transformations Render{ - /// JSON file of defined etymon transforms - #[clap(short, long, value_parser)] - transforms: Option, - /// JSON file of a language graph - #[clap(short, long, value_parser)] - graph: Option, /// path to a directory to read in all transform and graph files. /// Can be specified instead of -g -d #[clap(short, long, value_parser)] @@ -74,12 +61,6 @@ pub enum Commands{ pub enum Generate{ /// Generate a daughter language from an existing language in a graph. Daughter{ - /// The file path to the existing language graph. - #[clap(short, long, value_parser)] - graph: Option, - /// Path to transforms referenced in existing graph. - #[clap(short, long, value_parser)] - transforms: Option, // path to a directory to read in all transform and graph files. Can be used instead of -t or -g #[clap(short, long, value_parser)] directory: Option, diff --git a/kirum/src/files.rs b/kirum/src/files.rs index b43b603..fe2f39b 100644 --- a/kirum/src/files.rs +++ b/kirum/src/files.rs @@ -1,10 +1,16 @@ use std::{path::{PathBuf, Path}, collections::HashMap}; use anyhow::{Result, Context, anyhow}; -use libkirum::{kirum::{LanguageTree, Lexis}, transforms::{Transform, TransformFunc}, word::{Etymology, Edge}}; +use libkirum::{kirum::{LanguageTree, Lexis}, transforms::{Transform, TransformFunc}, word::{Etymology, Edge}, lexcreate::LexPhonology}; use walkdir::{WalkDir, DirEntry}; use crate::entries::{RawTransform, RawLexicalEntry, TransformGraph, WordGraph}; use handlebars::Handlebars; +/// contains path data for everything needed for a project +pub struct Project { + graphs: Vec, + transforms: Vec, + phonetic_rules: Option> +} pub fn apply_def_vars(var_file: Option, dict: &mut Vec) -> Result<()> { if let Some(vars) = var_file { @@ -24,12 +30,12 @@ pub fn apply_def_vars(var_file: Option, dict: &mut Vec) -> Result } /// read a list of tree and transform files, return the raw Language Tree Object -pub fn read_from_files(transforms:Vec, graphs:Vec) -> Result{ +pub fn read_from_files(proj: Project) -> Result{ //first merge all the files into one giant hashmap for the transforms and graph // because we later need to get random words from the map to construct the etymology from the rawLex "etymology" fields, // the giant hashmaps of everything need to be made first let mut transform_map: HashMap = HashMap::new(); - for trans_file in &transforms { + for trans_file in &proj.transforms { let trans_raw = std::fs::read_to_string(trans_file.clone()).context(format!("error reading etymology file {}", trans_file.display()))?; let transforms: TransformGraph = serde_json::from_str(&trans_raw).context(format!("error parsing etymology file {}", trans_file.display()))?; debug!("read in transform file: {}", trans_file.display()); @@ -37,7 +43,7 @@ pub fn read_from_files(transforms:Vec, graphs:Vec) -> Result = HashMap::new(); - for lang_file in &graphs{ + for lang_file in &proj.graphs{ let graph_raw = std::fs::read_to_string(lang_file.clone()).context(format!("error reading tree file {}", lang_file.display()))?; let raw_graph: WordGraph = serde_json::from_str(&graph_raw).context(format!("error reading tree file {}", lang_file.display()))?; debug!("read in language file: {}", lang_file.display()); @@ -68,10 +74,15 @@ pub fn read_from_files(transforms:Vec, graphs:Vec) -> Result) -> Result{ + let mut phonetic_set = LexPhonology::default(); + for path in paths{ + let raw = std::fs::read_to_string(path)?; + let parsed: LexPhonology = serde_json::from_str(&raw)?; + phonetic_set.groups.extend(parsed.groups); + phonetic_set.lexis_types.extend(parsed.lexis_types); + } + + Ok(phonetic_set) +} + /// Searches the Hashmap for the transform objects specified in trans_tree, or return defaults pub fn find_transforms(raw: &Vec, trans_tree: &HashMap) -> Result> { let mut word_transforms: Vec = Vec::new(); @@ -121,33 +144,42 @@ pub fn find_transforms(raw: &Vec, trans_tree: &HashMap Result<(Vec, Vec)> { +pub fn handle_directory(path: String) -> Result { let lang_dir = Path::new(&path); let lang_graph_dir = lang_dir.join("tree"); let lang_transform_dir = lang_dir.join("etymology"); - - let mut graphs: Vec = Vec::new(); - let mut transforms: Vec = Vec::new(); + let phonetics_path = lang_dir.join("phonetics"); debug!("using tree path: {}", lang_graph_dir.display()); - for entry in WalkDir::new(lang_graph_dir).into_iter().filter_entry(check_path){ - let path = entry?.path().to_path_buf(); - if !path.is_dir(){ - graphs.push(path); - } - - } + let graphs: Vec = read_subdir_create_list(lang_graph_dir)?; debug!("using etymology path: {}", lang_transform_dir.display()); - for entry in WalkDir::new(lang_transform_dir).into_iter().filter_entry(check_path){ - let path = entry?.path().to_path_buf(); - if !path.is_dir(){ - transforms.push(path); + let transforms: Vec = read_subdir_create_list(lang_transform_dir)?; + + debug!("using phonetics path: {}", phonetics_path.display()); + + let phonetic_rules: Option> = if phonetics_path.exists(){ + Some(read_subdir_create_list(phonetics_path)?) + } else { + None + }; + + + Ok(Project { graphs, + transforms, + phonetic_rules}) +} + +fn read_subdir_create_list(path: PathBuf) -> Result>{ + let mut paths: Vec = Vec::new(); + for entry in WalkDir::new(path).into_iter().filter_entry(check_path){ + let found_path = entry?.path().to_path_buf(); + if !found_path.is_dir(){ + paths.push(found_path); } - } - Ok((transforms, graphs)) + Ok(paths) } /// check if the path is a valid file we want to read @@ -163,16 +195,14 @@ fn check_path(dir: &DirEntry) -> bool { /// read in the existing files and generate a graph /// deals with the logic of listed files versus a specified directory -pub fn read_and_compute(transforms: Option, graph: Option, directory: Option) -> Result{ - let (transform_files, graph_files): (Vec, Vec) = if transforms.is_some() && graph.is_some(){ - (vec![transforms.unwrap().into()], vec![graph.unwrap().into()]) - } else if directory.is_some(){ +pub fn read_and_compute(directory: Option) -> Result{ + let new_project: Project = if directory.is_some(){ handle_directory(directory.unwrap())? } else { return Err(anyhow!("must specify either a graph and transform file, or a directory")); }; info!("Reading in existing language files..."); - let mut lang_tree = read_from_files(transform_files, graph_files)?; + let mut lang_tree = read_from_files(new_project)?; info!("rendering tree..."); lang_tree.compute_lexicon(); Ok(lang_tree) @@ -183,17 +213,40 @@ pub fn read_and_compute(transforms: Option, graph: Option, direc #[cfg(test)] mod tests { + use std::collections::HashMap; + use anyhow::Result; - use libkirum::kirum::Lexis; + use libkirum::{kirum::Lexis, lexcreate::LexPhonology}; use crate::files::read_and_compute; use super::apply_def_vars; + #[test] + fn test_phonetic_ingest() -> Result<()>{ + let raw = std::fs::read_to_string("src/test_files/test_phonetics.json")?; + let parsed: LexPhonology = serde_json::from_str(&raw)?; + + let example = LexPhonology{ + groups: HashMap::from([ + ("C".into(), vec!["r".into(), "k".into(), "c".into(), "ch".into(), "b".into()]), + ("V".into(), vec!["i".into(), "u".into(), "o".into()]), + ("S".into(), vec!["CV".into(), "CVC".into(), "VC".into()]) + ]), + lexis_types: HashMap::from([ + ("word".into(), vec!["S".into(), "SuS".into(), "iSSS".into(), "SSSS".into()]) + ]) + }; + + assert_eq!(example, parsed); + + Ok(()) + } + #[test] fn test_ingest_with_derivatives() -> Result<()> { let directory = Some(String::from("src/test_files/test_der")); - let computed = read_and_compute(None, None, directory)?; + let computed = read_and_compute(directory)?; let rendered_dict = computed.to_vec(); assert_eq!(4, rendered_dict.len()); @@ -216,7 +269,7 @@ mod tests { #[test] fn test_repeated_keys() { let directory = Some(String::from("src/test_files/repeated_keys")); - let res = read_and_compute(None, None, directory); + let res = read_and_compute(directory); assert_eq!(true, res.is_err()); } diff --git a/kirum/src/generate.rs b/kirum/src/generate.rs index 197d89c..0cd9096 100644 --- a/kirum/src/generate.rs +++ b/kirum/src/generate.rs @@ -3,16 +3,14 @@ use anyhow::{Result, Context, anyhow}; use libkirum::{transforms::Transform, kirum::Lexis, word::Etymology}; use crate::{files::read_and_compute, entries, cli::SeparateValues}; -pub fn daughter(graph: Option, - transforms: Option, - daughter_ety: String, +pub fn daughter(daughter_ety: String, ancestor: String, lang_name: String, directory: Option, output: String, by_field: Option) -> Result { // setup, read files, etc - let mut computed = read_and_compute(transforms, graph, directory) + let mut computed = read_and_compute(directory) .context("error reading existing graph and transforms")?; let trans_raw = std::fs::read_to_string(daughter_ety.clone()) diff --git a/kirum/src/main.rs b/kirum/src/main.rs index cfe282c..62c5bcb 100644 --- a/kirum/src/main.rs +++ b/kirum/src/main.rs @@ -12,7 +12,7 @@ use anyhow::Result; use stat::gen_stats; use std::{fs::File, io::Write}; //use csv::WriterBuilder; -use env_logger::{Builder}; +use env_logger::Builder; use log::LevelFilter; #[macro_use] @@ -35,16 +35,16 @@ fn main() -> Result<()> { create_new_project(&name)?; format!("created new project {}", name) }, - cli::Commands::Graphviz{transforms, graph, directory} =>{ - let computed = read_and_compute(transforms, graph, directory)?; + cli::Commands::Graphviz{directory} =>{ + let computed = read_and_compute(directory)?; computed.graphviz() }, cli::Commands::Stat { directory } => { - let computed = read_and_compute(None, None, directory)?; + let computed = read_and_compute(directory)?; gen_stats(computed) } - cli::Commands::Render{command, transforms, graph, directory, variables} =>{ - let computed = read_and_compute(transforms, graph, directory)?; + cli::Commands::Render{command, directory, variables} =>{ + let computed = read_and_compute(directory)?; let mut rendered_dict = computed.to_vec(); apply_def_vars(variables, &mut rendered_dict)?; @@ -73,9 +73,9 @@ fn main() -> Result<()> { }, cli::Commands::Generate{command} =>{ match command{ - cli::Generate::Daughter { graph, transforms, daughter_etymology, ancestor, + cli::Generate::Daughter { daughter_etymology, ancestor, name:lang_name, directory, output, group_by: separate_by_field } =>{ - generate::daughter(graph, transforms, daughter_etymology, + generate::daughter(daughter_etymology, ancestor, lang_name, directory, output, separate_by_field)? } @@ -86,11 +86,8 @@ fn main() -> Result<()> { if let Some(out_path) = cli.output{ let mut out_file = File::create(out_path)?; write!(out_file, "{}", out_data)?; - }else { - if out_data.len() > 0 { + }else if !out_data.is_empty() { info!("{}", out_data); - } - } diff --git a/kirum/src/test_files/test_phonetics.json b/kirum/src/test_files/test_phonetics.json new file mode 100644 index 0000000..9635d00 --- /dev/null +++ b/kirum/src/test_files/test_phonetics.json @@ -0,0 +1,10 @@ +{ + "groups": { + "C": ["r", "k", "c", "ch", "b"], + "V": ["i", "u", "o"], + "S": ["CV", "CVC", "VC"] + }, + "lexis_types":{ + "word": ["S", "SuS", "iSSS", "SSSS"] + } +} \ No newline at end of file diff --git a/libkirum/src/kirum.rs b/libkirum/src/kirum.rs index 09e5ad0..6f76555 100644 --- a/libkirum/src/kirum.rs +++ b/libkirum/src/kirum.rs @@ -126,7 +126,7 @@ impl IntoIterator for LanguageTree { impl LanguageTree { pub fn new() -> Self { LanguageTree {graph: Graph::::new(), - word_creator_phonology: lexcreate::LexPhonology { phonemes: HashMap::new(), lexis_types: HashMap::new() }} + word_creator_phonology: lexcreate::LexPhonology { groups: HashMap::new(), lexis_types: HashMap::new() }} } @@ -362,7 +362,7 @@ impl LanguageTree { return Some(self.graph[node].clone()) } } - return None + None } @@ -407,8 +407,8 @@ mod tests { use std::collections::HashMap; use log::LevelFilter; - use crate::{kirum::{LanguageTree, Lexis}, transforms::{Transform, LetterArrayValues, TransformFunc, self, LetterValues}, matching::{LexisMatch, Value}, lexcreate::{LexPhonology, CreateValue, PhoneticReference}}; - use env_logger::Builder; + use crate::{kirum::{LanguageTree, Lexis}, transforms::{Transform, LetterArrayValues, TransformFunc, self, LetterValues}, matching::{LexisMatch, Value}, lexcreate::LexPhonology}; + use env_logger::Builder; fn create_basic_words() -> LanguageTree { let parent = Lexis{id: "parent".to_string(), word: Some("wrh".into()), language: "gauntlet".to_string(), lexis_type: "root".to_string(), ..Default::default()}; @@ -438,7 +438,7 @@ mod tests { let log_level = LevelFilter::Trace; Builder::new().filter_level(log_level).init(); let test_phon_rules = LexPhonology{ - phonemes: HashMap::from([ + groups: HashMap::from([ ("C".to_string(), vec![ "h".into(), diff --git a/libkirum/src/lemma.rs b/libkirum/src/lemma.rs index 20b3bbf..0a0280c 100644 --- a/libkirum/src/lemma.rs +++ b/libkirum/src/lemma.rs @@ -84,7 +84,7 @@ impl From> for Lemma { fn from(value: Vec) -> Self { let mut build = String::new(); for part in value.into_iter() { - if part == WORD_SEP.to_string() || part == "" { + if part == WORD_SEP.to_string() || part.is_empty() { continue } build = format!("{}{}", build, part); @@ -122,7 +122,7 @@ impl std::string::ToString for Lemma { impl From for Vec { fn from(value: Lemma) -> Self { - value.value.split(WORD_SEP).map(|c|c.to_owned()).filter(|c| c != "").collect() + value.value.split(WORD_SEP).map(|c|c.to_owned()).filter(|c| !c.is_empty()).collect() } } @@ -133,8 +133,12 @@ impl Lemma { self.clone().into_iter().count() } + pub fn is_empty(&self) -> bool{ + self.value.is_empty() + } + pub fn push(&mut self, pushed: Lemma) { - if self.len() >0 { + if !self.is_empty(){ let mut vectored: Vec = self.clone().into(); let mut update_vec: Vec = pushed.into(); vectored.append(&mut update_vec); @@ -148,7 +152,7 @@ impl Lemma { pub fn push_char(&mut self, pushed: &str) { // a bit horrible, but the easiest way to insure we're inserting the separators properly - if self.len() > 0 { + if !self.is_empty() { let mut vectored: Vec = self.clone().into(); vectored.push(pushed.to_string()); let updated: Lemma = vectored.into(); diff --git a/libkirum/src/lexcreate.rs b/libkirum/src/lexcreate.rs index 7dde771..6f1a476 100644 --- a/libkirum/src/lexcreate.rs +++ b/libkirum/src/lexcreate.rs @@ -6,7 +6,7 @@ use serde::{Deserialize, Serialize, de::Visitor}; #[derive(Clone, PartialEq, Serialize, Deserialize, Default, Debug)] pub struct LexPhonology { - pub phonemes: HashMap>, + pub groups: HashMap>, pub lexis_types: HashMap> } @@ -44,7 +44,7 @@ impl<'de> Visitor<'de> for PhoneticReferenceVisitor { impl From<&str> for PhoneticReference{ fn from(value: &str) -> Self { let mut phon_vec: Vec = Vec::new(); - if value.matches(" ").count() > 1{ + if value.matches(' ').count() > 1{ for char in value.split_whitespace(){ phon_vec.push(char.into()) } @@ -115,6 +115,7 @@ impl<'de> Visitor<'de> for CreateValueVisitor { impl LexPhonology { + /// Creates a new random word based on the applied phonetic rules pub fn create_word(&self, lexis_type: &str) -> Option { if let Some(found_type_list) = self.lexis_types.get(lexis_type) { if let Some(selected_phon) = found_type_list.choose(&mut rand::thread_rng()) { @@ -129,9 +130,9 @@ impl LexPhonology { let mut phonetic_acc = Lemma::default(); for phon in &pref.0 { match phon { - CreateValue::Phoneme(p) => {phonetic_acc.push_char(&p)}, + CreateValue::Phoneme(p) => {phonetic_acc.push_char(p)}, CreateValue::Reference(single_ref) => { - if let Some(found_ref) = self.random_phoneme(&single_ref) { + if let Some(found_ref) = self.random_phoneme(single_ref) { phonetic_acc.push(found_ref) } else { return None @@ -140,7 +141,7 @@ impl LexPhonology { } } - if phonetic_acc.len() == 0 { + if phonetic_acc.is_empty(){ None } else { Some(phonetic_acc) @@ -149,7 +150,7 @@ impl LexPhonology { } fn random_phoneme(&self, phoneme_key: &str) -> Option { - if let Some(type_val) = self.phonemes.get(phoneme_key) { + if let Some(type_val) = self.groups.get(phoneme_key) { let picked_from = type_val.choose(&mut rand::thread_rng()); if let Some(picked) = picked_from { return self.resolve_phonetic_reference(picked) @@ -207,7 +208,7 @@ mod tests { #[test] fn test_basic_gen() { let test_phon = LexPhonology{ - phonemes: HashMap::from([ + groups: HashMap::from([ ("C".to_string(), vec![ PhoneticReference(vec![CreateValue::Phoneme("t".to_string())]), From 69142e812f6d93ee0b978e714c9fb203f5124162 Mon Sep 17 00:00:00 2001 From: Alex Kristiansen Date: Mon, 21 Aug 2023 22:48:29 -0700 Subject: [PATCH 3/4] first pass working --- kirum/src/files.rs | 11 ++++ kirum/src/new.rs | 41 ++++++++++---- .../with_phonetic_rules/etymology/ety.json | 35 ++++++++++++ .../with_phonetic_rules/phonetics/rules.json | 23 ++++++++ .../tree/with_phonetic_rules.json | 46 ++++++++++++++++ libkirum/src/lemma.rs | 1 - libkirum/src/lexcreate.rs | 54 ++++++++++++++++++- 7 files changed, 198 insertions(+), 13 deletions(-) create mode 100644 kirum/src/test_files/with_phonetic_rules/etymology/ety.json create mode 100644 kirum/src/test_files/with_phonetic_rules/phonetics/rules.json create mode 100644 kirum/src/test_files/with_phonetic_rules/tree/with_phonetic_rules.json diff --git a/kirum/src/files.rs b/kirum/src/files.rs index fe2f39b..5a95ee1 100644 --- a/kirum/src/files.rs +++ b/kirum/src/files.rs @@ -243,6 +243,17 @@ mod tests { Ok(()) } + #[test] + fn test_phonetic_create() -> Result<()> { + let dir = Some(String::from("src/test_files/with_phonetic_rules")); + let computed = read_and_compute(dir)?; + let rendered: Vec = computed.to_vec().into_iter().filter(|w| w.word.is_some()).collect(); + println!("Got: {:?}", rendered); + assert_eq!(3, rendered.len()); + + Ok(()) + } + #[test] fn test_ingest_with_derivatives() -> Result<()> { let directory = Some(String::from("src/test_files/test_der")); diff --git a/kirum/src/new.rs b/kirum/src/new.rs index 45d400f..5cbb74c 100644 --- a/kirum/src/new.rs +++ b/kirum/src/new.rs @@ -1,15 +1,17 @@ use std::{path::PathBuf, io::Write, collections::HashMap, fs::{self, File}}; -use libkirum::{transforms::TransformFunc, word::{Etymology, Edge}}; +use libkirum::{transforms::TransformFunc, word::{Etymology, Edge}, lexcreate::LexPhonology}; use crate::entries::{RawTransform, TransformGraph, RawLexicalEntry, Derivative, WordGraph}; -use anyhow::Result; +use anyhow::{Result, Context}; /// Helper function. Create a new project, and write it out. pub fn create_new_project(name: &str) -> Result<()> { let base = PathBuf::from(name); let mut ety_path = base.join("etymology"); let mut tree_path = base.join("tree"); + let mut phonetic_path = base.join("phonetics"); fs::create_dir_all(&ety_path)?; fs::create_dir_all(&tree_path)?; + fs::create_dir_all(&phonetic_path)?; let mut transform_map: HashMap = HashMap::new(); transform_map.insert("of-from-latin".into(), RawTransform { @@ -71,18 +73,37 @@ pub fn create_new_project(name: &str) -> Result<()> { words: word_map }; + let example_phonetics = LexPhonology{ + groups: HashMap::from([ + ("C".into(), vec!["x".into(), "m".into(), "p".into(), "l".into(),]), + ("V".into(), vec!["e".into(), "a".into()]), + ("S".into(), vec!["VC".into(), "CCV".into()]) + ]), + lexis_types: HashMap::from([ + ("word".into(), vec!["SSS".into()]) + ]) + }; + + let phonetic_data = serde_json::to_string_pretty(&example_phonetics)?; let graph_data = serde_json::to_string_pretty(&example_tree)?; let trans_data = serde_json::to_string_pretty(&example_transforms)?; - tree_path.push(name); - tree_path.set_extension("json"); - let mut tree_file = File::create(tree_path)?; - write!(tree_file, "{}", graph_data)?; + write_json(name, &mut tree_path, graph_data)?; + write_json("ety", &mut ety_path, trans_data)?; + write_json("rules", &mut phonetic_path, phonetic_data)?; + + + Ok(()) +} + +fn write_json(subpath: &str, base_path: &mut PathBuf, data: String) -> Result<()>{ + base_path.push(subpath); + base_path.set_extension("json"); + let mut phonetics_file = File::create(base_path.clone()) + .context(format!("could not create json file {} {}", subpath, base_path.display()))?; - ety_path.push("ety"); - ety_path.set_extension("json"); - let mut ety_file = File::create(ety_path)?; - write!(ety_file, "{}", trans_data)?; + write!(phonetics_file, "{}", data) + .context(format!("error writing phonetics file"))?; Ok(()) } diff --git a/kirum/src/test_files/with_phonetic_rules/etymology/ety.json b/kirum/src/test_files/with_phonetic_rules/etymology/ety.json new file mode 100644 index 0000000..aa9a739 --- /dev/null +++ b/kirum/src/test_files/with_phonetic_rules/etymology/ety.json @@ -0,0 +1,35 @@ +{ + "transforms": { + "latin-from-verb": { + "transforms": [ + { + "match_replace": { + "old": "ere", + "new": "plum" + } + }, + { + "prefix": { + "value": "ex" + } + } + ] + }, + "of-from-latin": { + "transforms": [ + { + "match_replace": { + "old": "exe", + "new": "esse" + } + }, + { + "match_replace": { + "old": "um", + "new": "e" + } + } + ] + } + } +} \ No newline at end of file diff --git a/kirum/src/test_files/with_phonetic_rules/phonetics/rules.json b/kirum/src/test_files/with_phonetic_rules/phonetics/rules.json new file mode 100644 index 0000000..8ed3882 --- /dev/null +++ b/kirum/src/test_files/with_phonetic_rules/phonetics/rules.json @@ -0,0 +1,23 @@ +{ + "groups": { + "V": [ + "e", + "a" + ], + "S": [ + "VC", + "CCV" + ], + "C": [ + "x", + "m", + "p", + "l" + ] + }, + "lexis_types": { + "word": [ + "SSS" + ] + } +} \ No newline at end of file diff --git a/kirum/src/test_files/with_phonetic_rules/tree/with_phonetic_rules.json b/kirum/src/test_files/with_phonetic_rules/tree/with_phonetic_rules.json new file mode 100644 index 0000000..d3d0cb3 --- /dev/null +++ b/kirum/src/test_files/with_phonetic_rules/tree/with_phonetic_rules.json @@ -0,0 +1,46 @@ +{ + "words": { + "latin_verb": { + "type": "word", + "generate": "word", + "language": "Latin", + "definition": "To buy, remove", + "part_of_speech": "verb", + "archaic": true + }, + "latin_example": { + "type": "word", + "language": "Latin", + "definition": "an instance, model, example", + "part_of_speech": "noun", + "etymology": { + "etymons": [ + { + "etymon": "latin_verb", + "transforms": [ + "latin-from-verb" + ] + } + ] + }, + "archaic": true, + "tags": [ + "example", + "default" + ], + "derivatives": [ + { + "lexis": { + "language": "Old French", + "definition": "model, example", + "part_of_speech": "noun", + "archaic": true + }, + "transforms": [ + "of-from-latin" + ] + } + ] + } + } +} \ No newline at end of file diff --git a/libkirum/src/lemma.rs b/libkirum/src/lemma.rs index 0a0280c..741f7e3 100644 --- a/libkirum/src/lemma.rs +++ b/libkirum/src/lemma.rs @@ -147,7 +147,6 @@ impl Lemma { } else { self.value = pushed.value } - } pub fn push_char(&mut self, pushed: &str) { diff --git a/libkirum/src/lexcreate.rs b/libkirum/src/lexcreate.rs index 6f1a476..c0bdbf5 100644 --- a/libkirum/src/lexcreate.rs +++ b/libkirum/src/lexcreate.rs @@ -4,15 +4,38 @@ use crate::lemma::Lemma; use serde::{Deserialize, Serialize, de::Visitor}; +/// Carries the phonological rules for a word generator. #[derive(Clone, PartialEq, Serialize, Deserialize, Default, Debug)] pub struct LexPhonology { + /// May contain any kind of phonetic value, syllables, phonemes, etc + /// the keys of the hashmap are referenced in the following lexis_types below. + /// When a word is generated, a PhoneticReference from the vector is chosen at random. + /// Keys must be all uppercase to distinguish them from letter values. + /// For example: + /// C = v b r t h # The available consonants + /// V = i u o y e # The available vowels + /// S = CVC CVV VVC # The possible syllable structures pub groups: HashMap>, + /// A map of `groups` keys or PhoneticReferences. A key value in the map can be referenced + /// in the `create` field of a Lexis to generate a word. + /// Expanding on the above example: + /// word = S SS SuiS + /// prefix = S uS Su pub lexis_types: HashMap> } -#[derive(Clone, PartialEq, Serialize, Default, Debug)] +/// A single "reference" to a phonetic value used to generate words. +#[derive(Clone, PartialEq, Default, Debug)] pub struct PhoneticReference(Vec); +impl Serialize for PhoneticReference{ + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer { + serializer.serialize_str(&self.to_string()) + } +} + impl<'de> Deserialize<'de> for PhoneticReference { fn deserialize(deserializer: D) -> Result where @@ -59,13 +82,32 @@ impl From<&str> for PhoneticReference{ } +impl ToString for PhoneticReference{ + fn to_string(&self) -> String { + let mut acc = String::new(); + for part in &self.0{ + acc.push_str(&part.to_string()) + } + acc + } +} + -#[derive(Clone, PartialEq, PartialOrd, Eq, Ord, Serialize, Debug)] +#[derive(Clone, PartialEq, PartialOrd, Eq, Ord, Debug)] pub enum CreateValue { Phoneme(String), Reference(String) } +impl ToString for CreateValue{ + fn to_string(&self) -> String { + match self { + Self::Phoneme(p) => p.to_string(), + Self::Reference(r) => r.to_string() + } + } +} + impl From<&str> for CreateValue{ fn from(value: &str) -> Self { let found_lowercase = value.chars().find(|c| c.is_lowercase()); @@ -87,6 +129,14 @@ impl From for CreateValue{ } } +impl Serialize for CreateValue{ + fn serialize(&self, serializer: S) -> Result + where + S: serde::Serializer { + serializer.serialize_str(&self.to_string()) + } +} + impl<'de> Deserialize<'de> for CreateValue { fn deserialize(deserializer: D) -> Result where From 5e67ae96a0df5d6a8a3c5dc1f8bdfb3c6fd53db9 Mon Sep 17 00:00:00 2001 From: Alex Kristiansen Date: Wed, 23 Aug 2023 11:52:57 -0700 Subject: [PATCH 4/4] finished --- examples/phonetic_rules/etymology/ety.json | 35 +++++ examples/phonetic_rules/phonetics/rules.json | 25 ++++ examples/phonetic_rules/readme.md | 56 ++++++++ .../tree/with_phonetic_rules.json | 46 +++++++ kirum/Cargo.toml | 3 +- kirum/src/cli.rs | 9 +- kirum/src/entries.rs | 11 +- kirum/src/files.rs | 11 +- kirum/src/generate.rs | 2 +- kirum/src/main.rs | 17 ++- kirum/src/new.rs | 10 +- libkirum/src/errors.rs | 7 + libkirum/src/kirum.rs | 29 +++-- libkirum/src/lexcreate.rs | 123 ++++++++++++------ readme.md | 8 +- 15 files changed, 310 insertions(+), 82 deletions(-) create mode 100644 examples/phonetic_rules/etymology/ety.json create mode 100644 examples/phonetic_rules/phonetics/rules.json create mode 100644 examples/phonetic_rules/readme.md create mode 100644 examples/phonetic_rules/tree/with_phonetic_rules.json diff --git a/examples/phonetic_rules/etymology/ety.json b/examples/phonetic_rules/etymology/ety.json new file mode 100644 index 0000000..aa9a739 --- /dev/null +++ b/examples/phonetic_rules/etymology/ety.json @@ -0,0 +1,35 @@ +{ + "transforms": { + "latin-from-verb": { + "transforms": [ + { + "match_replace": { + "old": "ere", + "new": "plum" + } + }, + { + "prefix": { + "value": "ex" + } + } + ] + }, + "of-from-latin": { + "transforms": [ + { + "match_replace": { + "old": "exe", + "new": "esse" + } + }, + { + "match_replace": { + "old": "um", + "new": "e" + } + } + ] + } + } +} \ No newline at end of file diff --git a/examples/phonetic_rules/phonetics/rules.json b/examples/phonetic_rules/phonetics/rules.json new file mode 100644 index 0000000..f780915 --- /dev/null +++ b/examples/phonetic_rules/phonetics/rules.json @@ -0,0 +1,25 @@ +{ + "groups": { + "V": [ + "e", + "a" + ], + "S": [ + "VC", + "CCV", + "VyV" + ], + "C": [ + "x", + "m", + "p", + "l" + ] + }, + "lexis_types": { + "word_rule": [ + "SSS", + "SCSS" + ] + } +} \ No newline at end of file diff --git a/examples/phonetic_rules/readme.md b/examples/phonetic_rules/readme.md new file mode 100644 index 0000000..3969032 --- /dev/null +++ b/examples/phonetic_rules/readme.md @@ -0,0 +1,56 @@ +# Creating new words with phonetic rules + +In addition to creating word based on etymological rules and word relationships, Kirum can also generate +words from base phonetic rulesets, without any pre-existing etymology. + +To generate phonetic rules, a Kirum project must have one or more JSON rule files under `phonetics/` in the root project directory. These files are formatted as such: + +```json +{ + "groups": { + "V": [ + "e", + "a" + ], + "S": [ + "VC", + "CCV", + "VyV" + ], + "C": [ + "x", + "m", + "p", + "l" + ] + }, + "lexis_types": { + "word_rule": [ + "SSS", + "SCSS" + ] + } +} + +``` + +This phonetic file is divided into two maps: +- `groups`: breaks down possible groups of letters and consonants. The key of a group can be any uppercase unicode character, the values of an individual group can be any unicode value, or any uppercase group key. +In the above example, `V` are the language's possible vowels, `S` are the possible syllables, and `C` are +the possible consonants. +- `lexis_rules`: are the possible words that are derived from the specified group rules. + +To generate a word from a set of specified phonetic rules, simply add the given `lexis_types` value to +the lexis's `generate` field: +```json + "latin_verb": { + "type": "word", + "generate": "word_rule", + "language": "Latin", + "definition": "To buy, remove", + "part_of_speech": "verb", + "archaic": true + } +``` + +Note that the generator will not apply a new word if the lexis has both a `generate` and `word` field. diff --git a/examples/phonetic_rules/tree/with_phonetic_rules.json b/examples/phonetic_rules/tree/with_phonetic_rules.json new file mode 100644 index 0000000..9bcddb3 --- /dev/null +++ b/examples/phonetic_rules/tree/with_phonetic_rules.json @@ -0,0 +1,46 @@ +{ + "words": { + "latin_verb": { + "type": "word", + "generate": "word_rule", + "language": "Latin", + "definition": "To buy, remove", + "part_of_speech": "verb", + "archaic": true + }, + "latin_example": { + "type": "word", + "language": "Latin", + "definition": "an instance, model, example", + "part_of_speech": "noun", + "etymology": { + "etymons": [ + { + "etymon": "latin_verb", + "transforms": [ + "latin-from-verb" + ] + } + ] + }, + "archaic": true, + "tags": [ + "example", + "default" + ], + "derivatives": [ + { + "lexis": { + "language": "Old French", + "definition": "model, example", + "part_of_speech": "noun", + "archaic": true + }, + "transforms": [ + "of-from-latin" + ] + } + ] + } + } +} \ No newline at end of file diff --git a/kirum/Cargo.toml b/kirum/Cargo.toml index 5aa0bed..95c247d 100644 --- a/kirum/Cargo.toml +++ b/kirum/Cargo.toml @@ -18,4 +18,5 @@ walkdir = "2.3.3" log = "0.4.0" env_logger = "0.9.0" tabled = "0.12.1" -toml = "0.7.5" \ No newline at end of file +toml = "0.7.5" +validator = {version = "0.16.1", features = ["derive"]} \ No newline at end of file diff --git a/kirum/src/cli.rs b/kirum/src/cli.rs index ecf6cce..4fd221c 100644 --- a/kirum/src/cli.rs +++ b/kirum/src/cli.rs @@ -11,6 +11,9 @@ pub struct Args { /// Output file; defaults to stdout if unspecified #[clap(short, long, value_parser)] pub output: Option, + #[clap(short, long, default_value_t=false)] + /// Do not print any log output + pub quiet: bool, #[clap(subcommand)] pub command: Commands @@ -89,7 +92,7 @@ pub enum SeparateValues { Archaic, } -#[derive(clap::Subcommand, Clone)] +#[derive(clap::Subcommand, Clone, PartialEq, PartialOrd)] pub enum Format{ /// Print one word per line Line, @@ -103,5 +106,7 @@ pub enum Format{ /// Optional rhai scripts for processing template data. See https://docs.rs/handlebars/latest/handlebars/#script-helper #[clap(short, long, value_parser)] rhai_files: Option> - } + }, + /// Prints a JSON object of the language + Json } \ No newline at end of file diff --git a/kirum/src/entries.rs b/kirum/src/entries.rs index 9b8b9e4..b7b3b4e 100644 --- a/kirum/src/entries.rs +++ b/kirum/src/entries.rs @@ -92,14 +92,17 @@ impl From for RawLexicalEntry{ } } -// take the output of a call to to_vec_etymons() and structure it like a graph json file structure -pub fn create_json_graph(lex: Vec<(Lexis, Etymology)>) -> WordGraph{ +/// take the output of a call to to_vec_etymons() and structure it like a graph json file structure +pub fn create_json_graph(lex: Vec<(Lexis, Etymology)>,mut key_gen: F) -> WordGraph + where F: FnMut(Lexis) -> String + { let mut graph: HashMap = HashMap::new(); for (word, ety) in lex{ let base: RawLexicalEntry = word.clone().into(); - let complete = RawLexicalEntry{etymology: Some(ety), ..base}; - let key = format!("daughter-gen-{}", word.clone().word.unwrap().string_without_sep()); + let found_ety = if !ety.etymons.is_empty() {Some(ety)} else {None}; + let complete = RawLexicalEntry{etymology: found_ety, ..base}; + let key = key_gen(word); graph.insert(key, complete); } WordGraph { words: graph } diff --git a/kirum/src/files.rs b/kirum/src/files.rs index 5a95ee1..87df535 100644 --- a/kirum/src/files.rs +++ b/kirum/src/files.rs @@ -12,6 +12,7 @@ pub struct Project { phonetic_rules: Option> } +/// renders any templating code that was written into word definitions pub fn apply_def_vars(var_file: Option, dict: &mut Vec) -> Result<()> { if let Some(vars) = var_file { debug!("Applying variables from {}", vars); @@ -122,7 +123,7 @@ fn add_single_word(tree: &mut LanguageTree, trans_map: &HashMap) -> Result{ let mut phonetic_set = LexPhonology::default(); for path in paths{ - let raw = std::fs::read_to_string(path)?; + let raw = std::fs::read_to_string(&path)?; let parsed: LexPhonology = serde_json::from_str(&raw)?; phonetic_set.groups.extend(parsed.groups); phonetic_set.lexis_types.extend(parsed.lexis_types); @@ -229,12 +230,12 @@ mod tests { let example = LexPhonology{ groups: HashMap::from([ - ("C".into(), vec!["r".into(), "k".into(), "c".into(), "ch".into(), "b".into()]), - ("V".into(), vec!["i".into(), "u".into(), "o".into()]), - ("S".into(), vec!["CV".into(), "CVC".into(), "VC".into()]) + ('C', vec!["r".try_into()?, "k".try_into()?, "c".try_into()?, "ch".try_into()?, "b".try_into()?]), + ('V', vec!["i".try_into()?, "u".try_into()?, "o".try_into()?]), + ('S', vec!["CV".try_into()?, "CVC".try_into()?, "VC".try_into()?]) ]), lexis_types: HashMap::from([ - ("word".into(), vec!["S".into(), "SuS".into(), "iSSS".into(), "SSSS".into()]) + ("word".into(), vec!["S".try_into()?, "SuS".try_into()?, "iSSS".try_into()?, "SSSS".try_into()?]) ]) }; diff --git a/kirum/src/generate.rs b/kirum/src/generate.rs index 0cd9096..3aca6c6 100644 --- a/kirum/src/generate.rs +++ b/kirum/src/generate.rs @@ -46,7 +46,7 @@ pub fn daughter(daughter_ety: String, } for (fname, data) in file_map { - let graph = entries::create_json_graph(data); + let graph = entries::create_json_graph(data, |l| format!("daughter-gen-{}", l.word.unwrap().string_without_sep())); let graph_data = serde_json::to_string_pretty(&graph) .context("error creating JSON from graph")?; diff --git a/kirum/src/main.rs b/kirum/src/main.rs index 62c5bcb..5768a1d 100644 --- a/kirum/src/main.rs +++ b/kirum/src/main.rs @@ -6,6 +6,7 @@ mod stat; mod new; mod generate; use clap::Parser; +use entries::create_json_graph; use files::{read_and_compute, apply_def_vars}; use new::create_new_project; use anyhow::Result; @@ -15,6 +16,7 @@ use std::{fs::File, io::Write}; use env_logger::Builder; use log::LevelFilter; + #[macro_use] extern crate log; @@ -28,9 +30,12 @@ fn main() -> Result<()> { } else { LevelFilter::Trace }; - Builder::new().filter_level(log_level).init(); + if !cli.quiet { + Builder::new().filter_level(log_level).init(); + } + - let out_data: String = match cli.command{ + let out_data: String = match cli.command.clone(){ cli::Commands::New { name } => { create_new_project(&name)?; format!("created new project {}", name) @@ -67,6 +72,11 @@ fn main() -> Result<()> { // }, cli::Format::Template { template_file, rhai_files } =>{ tmpl::generate_from_tmpl(rendered_dict, template_file, rhai_files)? + }, + cli::Format::Json => { + let words = computed.to_vec_etymons(|_|true); + let word_data = create_json_graph(words, |l| l.id); + serde_json::to_string_pretty(&word_data)? } } @@ -87,9 +97,8 @@ fn main() -> Result<()> { let mut out_file = File::create(out_path)?; write!(out_file, "{}", out_data)?; }else if !out_data.is_empty() { - info!("{}", out_data); + println!("{}", out_data); } - Ok(()) } diff --git a/kirum/src/new.rs b/kirum/src/new.rs index 5cbb74c..5b007a7 100644 --- a/kirum/src/new.rs +++ b/kirum/src/new.rs @@ -75,12 +75,12 @@ pub fn create_new_project(name: &str) -> Result<()> { let example_phonetics = LexPhonology{ groups: HashMap::from([ - ("C".into(), vec!["x".into(), "m".into(), "p".into(), "l".into(),]), - ("V".into(), vec!["e".into(), "a".into()]), - ("S".into(), vec!["VC".into(), "CCV".into()]) + ('C', vec!["x".try_into()?, "m".try_into()?, "p".try_into()?, "l".try_into()?,]), + ('V', vec!["e".try_into()?, "a".try_into()?]), + ('S', vec!["VC".try_into()?, "CCV".try_into()?]) ]), lexis_types: HashMap::from([ - ("word".into(), vec!["SSS".into()]) + ("word".into(), vec!["SSS".try_into()?]) ]) }; @@ -103,7 +103,7 @@ fn write_json(subpath: &str, base_path: &mut PathBuf, data: String) -> Result<() .context(format!("could not create json file {} {}", subpath, base_path.display()))?; write!(phonetics_file, "{}", data) - .context(format!("error writing phonetics file"))?; + .context("error writing phonetics file".to_string())?; Ok(()) } diff --git a/libkirum/src/errors.rs b/libkirum/src/errors.rs index 463f052..11957c9 100644 --- a/libkirum/src/errors.rs +++ b/libkirum/src/errors.rs @@ -8,4 +8,11 @@ pub enum LangError { #[error("error parsing JSON input")] JSONSerdeError(#[source] serde_json::Error) +} + +#[derive(thiserror::Error, Debug)] +#[error("error parsing phonetic value: {msg}; found {found}")] +pub struct PhoneticParsingError { + pub msg: &'static str, + pub found: String } \ No newline at end of file diff --git a/libkirum/src/kirum.rs b/libkirum/src/kirum.rs index 6f76555..4780956 100644 --- a/libkirum/src/kirum.rs +++ b/libkirum/src/kirum.rs @@ -366,7 +366,8 @@ impl LanguageTree { } - /// Reduce the language graph to a vector of words that match the provided function. Returns a vector of tuples for each matching word and any associated etymological data. + /// Reduce the language graph to a vector of words that match the provided function. + /// Returns a vector of tuples for each matching word and any associated etymological data. pub fn to_vec_etymons(self, filter: F) -> Vec<(Lexis, Etymology)> where F: Fn(&Lexis) -> bool, @@ -406,9 +407,9 @@ fn join_string_vectors(words: &mut [(i32, Lemma)]) -> Lemma{ mod tests { use std::collections::HashMap; - use log::LevelFilter; + //use log::LevelFilter; use crate::{kirum::{LanguageTree, Lexis}, transforms::{Transform, LetterArrayValues, TransformFunc, self, LetterValues}, matching::{LexisMatch, Value}, lexcreate::LexPhonology}; - use env_logger::Builder; + //use env_logger::Builder; fn create_basic_words() -> LanguageTree { let parent = Lexis{id: "parent".to_string(), word: Some("wrh".into()), language: "gauntlet".to_string(), lexis_type: "root".to_string(), ..Default::default()}; @@ -435,27 +436,27 @@ mod tests { #[test] fn test_word_create() { - let log_level = LevelFilter::Trace; - Builder::new().filter_level(log_level).init(); + //let log_level = LevelFilter::Trace; + //Builder::new().filter_level(log_level).init(); let test_phon_rules = LexPhonology{ groups: HashMap::from([ - ("C".to_string(), + ('C', vec![ - "h".into(), - "r".into(), - "x".into(), - "k".into() + "h".try_into().unwrap(), + "r".try_into().unwrap(), + "x".try_into().unwrap(), + "k".try_into().unwrap() ]), - ("V".to_string(), + ('V', vec![ - "u".into(), - "i".into() + "u".try_into().unwrap(), + "i".try_into().unwrap() ]), ]), lexis_types: HashMap::from([ ("root".to_string(), vec![ - "CCC".into() + "CCC".try_into().unwrap() ]) ]), }; diff --git a/libkirum/src/lexcreate.rs b/libkirum/src/lexcreate.rs index c0bdbf5..e7edf76 100644 --- a/libkirum/src/lexcreate.rs +++ b/libkirum/src/lexcreate.rs @@ -1,8 +1,7 @@ use std::collections::HashMap; use rand::seq::SliceRandom; -use crate::lemma::Lemma; -use serde::{Deserialize, Serialize, de::Visitor}; - +use crate::{lemma::Lemma, errors::{self, PhoneticParsingError}}; +use serde::{Deserialize, Serialize, de::{Visitor, self, Unexpected}}; /// Carries the phonological rules for a word generator. #[derive(Clone, PartialEq, Serialize, Deserialize, Default, Debug)] @@ -15,7 +14,7 @@ pub struct LexPhonology { /// C = v b r t h # The available consonants /// V = i u o y e # The available vowels /// S = CVC CVV VVC # The possible syllable structures - pub groups: HashMap>, + pub groups: HashMap>, /// A map of `groups` keys or PhoneticReferences. A key value in the map can be referenced /// in the `create` field of a Lexis to generate a word. /// Expanding on the above example: @@ -56,7 +55,12 @@ impl<'de> Visitor<'de> for PhoneticReferenceVisitor { fn visit_str(self, v: &str) -> Result where E: serde::de::Error, { - Ok(v.into()) + match v.try_into() { + Err(_e) => { + Err(de::Error::invalid_value(Unexpected::Str(v), &self)) + }, + Ok(v) => {Ok(v)} + } } } @@ -64,20 +68,21 @@ impl<'de> Visitor<'de> for PhoneticReferenceVisitor { // CCCC // C C C C // the latter helps for cases where we've inserted a weird character that's more than one unicode character -impl From<&str> for PhoneticReference{ - fn from(value: &str) -> Self { +impl TryFrom<&str> for PhoneticReference{ + type Error = PhoneticParsingError; + fn try_from(value: &str) -> Result { let mut phon_vec: Vec = Vec::new(); if value.matches(' ').count() > 1{ for char in value.split_whitespace(){ - phon_vec.push(char.into()) + phon_vec.push(char.try_into()?) } } else { - for char in value.chars(){ + for char in value.chars(){ phon_vec.push(char.into()) } } - PhoneticReference(phon_vec) + Ok(PhoneticReference(phon_vec)) } } @@ -96,7 +101,7 @@ impl ToString for PhoneticReference{ #[derive(Clone, PartialEq, PartialOrd, Eq, Ord, Debug)] pub enum CreateValue { Phoneme(String), - Reference(String) + Reference(char) } impl ToString for CreateValue{ @@ -108,14 +113,25 @@ impl ToString for CreateValue{ } } -impl From<&str> for CreateValue{ - fn from(value: &str) -> Self { - let found_lowercase = value.chars().find(|c| c.is_lowercase()); - if found_lowercase.is_some() { - CreateValue::Phoneme(value.to_string()) +impl TryFrom<&str> for CreateValue{ + type Error = errors::PhoneticParsingError; + fn try_from(value: &str) -> Result { + let found_uppercase = value.chars().filter(|c| c.is_uppercase()).count(); + if found_uppercase == value.len() && value.len() == 1 { + let raw: char = value.chars().next() + .ok_or_else(|| PhoneticParsingError {msg:"could not find character for reference", + found: value.to_string()})?; + Ok(CreateValue::Reference(raw)) + + } else if found_uppercase == 0 { + Ok(CreateValue::Phoneme(value.to_string())) + } else { - CreateValue::Reference(value.to_string()) + Err(PhoneticParsingError{msg: "a reference can only be one upper-case character, or an all lowercase phonetic rule", + found: value.to_string()}) } + + } } @@ -124,7 +140,7 @@ impl From for CreateValue{ if value.is_lowercase(){ CreateValue::Phoneme(value.to_string()) } else { - CreateValue::Reference(value.to_string()) + CreateValue::Reference(value) } } } @@ -154,12 +170,23 @@ impl<'de> Visitor<'de> for CreateValueVisitor { write!(formatter, "an upper or lower case character value") } + fn visit_char(self, v: char) -> Result + where + E: serde::de::Error, { + Ok(v.into()) + } + // logic: if an identifier is all uppercase, treat it as a reference, // otherwise, it's a string phoneme fn visit_str(self, v: &str) -> Result where E: serde::de::Error, { - Ok(v.into()) + match v.try_into() { + Err(_e) => { + Err(de::Error::invalid_value(Unexpected::Str(v), &self)) + }, + Ok(v) => {Ok(v)} + } } } @@ -199,7 +226,7 @@ impl LexPhonology { } - fn random_phoneme(&self, phoneme_key: &str) -> Option { + fn random_phoneme(&self, phoneme_key: &char) -> Option { if let Some(type_val) = self.groups.get(phoneme_key) { let picked_from = type_val.choose(&mut rand::thread_rng()); if let Some(picked) = picked_from { @@ -216,27 +243,37 @@ impl LexPhonology { #[cfg(test)] mod tests { use std::collections::HashMap; + use crate::{lexcreate::PhoneticReference, errors::PhoneticParsingError}; + use super::{LexPhonology, CreateValue}; - use crate::lexcreate::PhoneticReference; + #[test] + fn test_bad_phonetic_input(){ + let bad: Result = "Ci".try_into(); + assert!(bad.is_err()) + } - use super::{LexPhonology, CreateValue}; + #[test] + fn test_spaces_bad_input(){ + let test_phon: Result = "C wV i C r rw".try_into(); + assert!(test_phon.is_err()) + } #[test] - fn test_new_no_space(){ - let test_phon: PhoneticReference = "CCCC".into(); - let expected = PhoneticReference(vec!["C".into(), "C".into(), "C".into(), "C".into()]); + fn test_new_no_space() { + let test_phon: PhoneticReference = "CCCC".try_into().unwrap(); + let expected = PhoneticReference(vec!['C'.into(), 'C'.into(), 'C'.into(), 'C'.into()]); assert_eq!(test_phon, expected) } #[test] fn test_new_spaces() { - let test_phon: PhoneticReference = "C V i C r rw".into(); + let test_phon: PhoneticReference = "C V i C r rw".try_into().unwrap(); let expected = PhoneticReference(vec![ - CreateValue::Reference("C".to_string()), - CreateValue::Reference("V".to_string()), + CreateValue::Reference('C'), + CreateValue::Reference('V'), CreateValue::Phoneme("i".to_string()), - CreateValue::Reference("C".to_string()), + CreateValue::Reference('C'), CreateValue::Phoneme("r".to_string()), CreateValue::Phoneme("rw".to_string()) ]); @@ -245,12 +282,12 @@ mod tests { #[test] fn test_new_no_space_mix(){ - let test_phon: PhoneticReference = "CCrC".into(); + let test_phon: PhoneticReference = "CCrC".try_into().unwrap(); let expected = PhoneticReference(vec![ - CreateValue::Reference("C".to_string()), - CreateValue::Reference("C".to_string()), + CreateValue::Reference('C'), + CreateValue::Reference('C'), CreateValue::Phoneme("r".to_string()), - CreateValue::Reference("C".to_string()) + CreateValue::Reference('C') ]); assert_eq!(test_phon, expected) } @@ -259,35 +296,35 @@ mod tests { fn test_basic_gen() { let test_phon = LexPhonology{ groups: HashMap::from([ - ("C".to_string(), + ('C', vec![ PhoneticReference(vec![CreateValue::Phoneme("t".to_string())]), PhoneticReference(vec![CreateValue::Phoneme("r".to_string())]) ]), - ("V".to_string(), + ('V', vec![ PhoneticReference(vec![CreateValue::Phoneme("u".to_string())]), PhoneticReference(vec![CreateValue::Phoneme("i".to_string())]) ]), - ("S".to_string(), + ('S', vec![ PhoneticReference(vec![ - CreateValue::Reference("C".to_string()), - CreateValue::Reference("V".to_string()) + CreateValue::Reference('C'), + CreateValue::Reference('V') ]), PhoneticReference(vec![ - CreateValue::Reference("V".to_string()), - CreateValue::Reference("C".to_string()) + CreateValue::Reference('V'), + CreateValue::Reference('C') ]) ]) ]), lexis_types: HashMap::from([ ("words".to_string(), vec![ - PhoneticReference(vec![CreateValue::Reference("S".to_string())]), + PhoneticReference(vec![CreateValue::Reference('S')]), PhoneticReference(vec![ - CreateValue::Reference("S".to_string()), - CreateValue::Reference("S".to_string()) + CreateValue::Reference('S'), + CreateValue::Reference('S') ]) ]) ]), diff --git a/readme.md b/readme.md index 7ed60b4..e40eda9 100644 --- a/readme.md +++ b/readme.md @@ -3,9 +3,9 @@ ![ci](https://github.com/fearful-symmetry/kirum/actions/workflows/rust.yml/badge.svg) Kirum (from Standard Babylonian _Kirûm_ meaning _garden_ or _orchard_) is a conlang CLI utility and library. -Unlike many conlang tools, which allow you to generate lexicons based on phonetic rules, Kirum can generate entire languages, and manage whole language families, based on specified etymology. Kirum also takes a "pets not cattle" approach to conlang tooling, allowing users to store and graph the entire history of a language family, down to individual morphemes. +Unlike many conlang tools, which allow you to generate lexicons based on phonetic rules, Kirum generates entire languages and language families based on specified etymology. Kirum also takes a "pets not cattle" approach to conlang tooling, allowing users to store and graph the entire history of a language family, down to individual morphemes. -Kirum is a work in progress, and should be considered alpha software. Major features are currently planned, including the ability to generate languages/words from phonetic rulesets, and IPA support. +Kirum is a work in progress, and should be considered alpha software. Major features are currently planned, including IPA support. ## Getting Started @@ -33,11 +33,12 @@ The [`examples`](examples) directory has a number of projects: - [generate_daugher](examples/generate_daughter/) - An example of how to use the `generate` subcommand to create a daughter language from a parent language. - [templates](examples/templates/) - Using a handlebars template to output an asciidoc dictionary. - [conditionals](examples/conditionals/) - Using conditional statements in transforms. +- [phonetic_rules](examples/phonetic_rules/) - Using Kirum's phonetic rulesets to generate words. ## The structure of a Kirum project -`kirum` generates languages from two files, contained in separate `tree` and `etymology` directories: Tree files contain a lexicon of words, stems, roots, etc, and etymology files contain data on the transforms between words. The transform files can also contain conditional statements that determine if a transform should be applied to a word. +`kirum` generates languages from a number of files, contained in separate `tree` and `etymology` directories: Tree files contain a lexicon of words, stems, roots, etc, and etymology files contain data on the transforms between words. The transform files can also contain conditional statements that determine if a transform should be applied to a word. An optional `phonetics` directory also allows for generating words from phonetic, as opposed to etymological, rules. ### Lexis objects @@ -48,6 +49,7 @@ A Tree file is a JSON object of `Lexis` objects, a maximal example of which is p "type": "word", // A user-supplied tag. Can be any value. "word": "exemplum", // The actual lexical word. If not supplied, kirum will attempt to derive it based on etymology "language": "Latin", // Can be any user-supplied value + "generate": "word_rules" // An optional tag that will generate the word from phonetic rules, see examples/phonetic_rules "definition": "an instance, model, example", "part_of_speech": "noun", // Optional. Must be one of Noun, verb, or adjective. "etymology": {