Merge pull request #28 from fearful-symmetry/phonetic_generator

Add Phonetic generator
fearful-symmetry · Aug 23, 2023 · 94850ca · 94850ca
2 parents b31c9e6 + 5e67ae9
commit 94850ca
Show file tree

Hide file tree

Showing 23 changed files with 967 additions and 106 deletions.
diff --git a/examples/phonetic_rules/etymology/ety.json b/examples/phonetic_rules/etymology/ety.json
@@ -0,0 +1,35 @@
+{
+  "transforms": {
+    "latin-from-verb": {
+      "transforms": [
+        {
+          "match_replace": {
+            "old": "ere",
+            "new": "plum"
+          }
+        },
+        {
+          "prefix": {
+            "value": "ex"
+          }
+        }
+      ]
+    },
+    "of-from-latin": {
+      "transforms": [
+        {
+          "match_replace": {
+            "old": "exe",
+            "new": "esse"
+          }
+        },
+        {
+          "match_replace": {
+            "old": "um",
+            "new": "e"
+          }
+        }
+      ]
+    }
+  }
+}
diff --git a/examples/phonetic_rules/phonetics/rules.json b/examples/phonetic_rules/phonetics/rules.json
@@ -0,0 +1,25 @@
+{
+  "groups": {
+    "V": [
+      "e",
+      "a"
+    ],
+    "S": [
+      "VC",
+      "CCV",
+      "VyV"
+    ],
+    "C": [
+      "x",
+      "m",
+      "p",
+      "l"
+    ]
+  },
+  "lexis_types": {
+    "word_rule": [
+      "SSS",
+      "SCSS"
+    ]
+  }
+}
diff --git a/examples/phonetic_rules/readme.md b/examples/phonetic_rules/readme.md
@@ -0,0 +1,56 @@
+# Creating new words with phonetic rules
+
+In addition to creating word based on etymological rules and word relationships, Kirum can also generate
+words from base phonetic rulesets, without any pre-existing etymology.
+
+To generate phonetic rules, a Kirum project must have one or more JSON rule files under `phonetics/` in the root project directory. These files are formatted as such:
+
+```json
+{
+  "groups": {
+    "V": [
+      "e",
+      "a"
+    ],
+    "S": [
+      "VC",
+      "CCV",
+      "VyV"
+    ],
+    "C": [
+      "x",
+      "m",
+      "p",
+      "l"
+    ]
+  },
+  "lexis_types": {
+    "word_rule": [
+      "SSS",
+      "SCSS"
+    ]
+  }
+}
+
+```
+
+This phonetic file is divided into two maps:
+- `groups`: breaks down possible groups of letters and consonants. The key of a group can be any uppercase unicode character, the values of an individual group can be any unicode value, or any uppercase group key.
+In the above example, `V` are the language's possible vowels, `S` are the possible syllables, and `C` are
+the possible consonants.
+- `lexis_rules`: are the possible words that are derived from the specified group rules.
+
+To generate a word from a set of specified phonetic rules, simply add the given `lexis_types` value to
+the lexis's `generate` field:
+```json
+    "latin_verb": {
+      "type": "word",
+      "generate": "word_rule",
+      "language": "Latin",
+      "definition": "To buy, remove",
+      "part_of_speech": "verb",
+      "archaic": true
+    }
+```
+
+Note that the generator will not apply a new word if the lexis has both a `generate` and `word` field.
diff --git a/examples/phonetic_rules/tree/with_phonetic_rules.json b/examples/phonetic_rules/tree/with_phonetic_rules.json
@@ -0,0 +1,46 @@
+{
+  "words": {
+    "latin_verb": {
+      "type": "word",
+      "generate": "word_rule",
+      "language": "Latin",
+      "definition": "To buy, remove",
+      "part_of_speech": "verb",
+      "archaic": true
+    },
+    "latin_example": {
+      "type": "word",
+      "language": "Latin",
+      "definition": "an instance, model, example",
+      "part_of_speech": "noun",
+      "etymology": {
+        "etymons": [
+          {
+            "etymon": "latin_verb",
+            "transforms": [
+              "latin-from-verb"
+            ]
+          }
+        ]
+      },
+      "archaic": true,
+      "tags": [
+        "example",
+        "default"
+      ],
+      "derivatives": [
+        {
+          "lexis": {
+            "language": "Old French",
+            "definition": "model, example",
+            "part_of_speech": "noun",
+            "archaic": true
+          },
+          "transforms": [
+            "of-from-latin"
+          ]
+        }
+      ]
+    }
+  }
+}
diff --git a/kirum/Cargo.toml b/kirum/Cargo.toml
@@ -18,4 +18,5 @@ walkdir = "2.3.3"
 log = "0.4.0"
 env_logger = "0.9.0"
 tabled = "0.12.1"
-toml = "0.7.5"
+toml = "0.7.5"
+validator = {version = "0.16.1", features = ["derive"]}
diff --git a/kirum/src/cli.rs b/kirum/src/cli.rs
@@ -11,6 +11,9 @@ pub struct Args {
     /// Output file; defaults to stdout if unspecified
     #[clap(short, long, value_parser)]
     pub output: Option<String>,
+    #[clap(short, long, default_value_t=false)]
+    /// Do not print any log output
+    pub quiet: bool,
 
     #[clap(subcommand)]
     pub command: Commands
@@ -30,26 +33,13 @@ pub enum Commands{
     },
     /// Print a graphviz representation of the language
     Graphviz{
-        /// JSON file of defined etymon transforms
-        #[clap(short, long, value_parser)]
-        transforms: Option<String>,
-        /// json file of a language graph
-        #[clap(short, long, value_parser)]
-        graph: Option<String>,
-
         /// path to a directory to read in all transform and graph files
         #[clap(short, long, value_parser)]
         directory: Option<String>,
     },
 
     /// Render a lexicon from an existing set of graph files and transformations
     Render{
-        /// JSON file of defined etymon transforms
-        #[clap(short, long, value_parser)]
-        transforms: Option<String>,
-        /// JSON file of a language graph
-        #[clap(short, long, value_parser)]
-        graph: Option<String>,
         /// path to a directory to read in all transform and graph files.
         /// Can be specified instead of -g -d
         #[clap(short, long, value_parser)]
@@ -74,12 +64,6 @@ pub enum Commands{
 pub enum Generate{
     /// Generate a daughter language from an existing language in a graph.
     Daughter{
-        /// The file path to the existing language graph.
-        #[clap(short, long, value_parser)]
-        graph: Option<String>,
-        /// Path to transforms referenced in existing graph.
-        #[clap(short, long, value_parser)]
-        transforms: Option<String>,
         // path to a directory to read in all transform and graph files. Can be used instead of -t or -g
         #[clap(short, long, value_parser)]
         directory: Option<String>,
@@ -108,7 +92,7 @@ pub enum SeparateValues {
     Archaic,
 }
 
-#[derive(clap::Subcommand, Clone)]
+#[derive(clap::Subcommand, Clone, PartialEq, PartialOrd)]
 pub enum Format{
      /// Print one word per line
     Line,
@@ -122,5 +106,7 @@ pub enum Format{
         /// Optional rhai scripts for processing template data. See https://docs.rs/handlebars/latest/handlebars/#script-helper
         #[clap(short, long, value_parser)]
         rhai_files: Option<Vec<String>>
-    }
+    },
+    /// Prints a JSON object of the language
+    Json
 }
diff --git a/kirum/src/entries.rs b/kirum/src/entries.rs
@@ -39,6 +39,8 @@ pub struct RawLexicalEntry {
     pub archaic: bool,
     pub tags: Option<Vec<String>>,
 
+    /// A tag that tells Kirum to generate the word based on the phonetic ruleset specified by the tag
+    pub generate: Option<String>,
     /// Words that will be added as a derivative of the enclosing Lexis; any value not specified will be taken from the enclosing entry.
     pub derivatives: Option<Vec<Derivative>>
 }
@@ -68,7 +70,9 @@ impl From<RawLexicalEntry> for Lexis{
             lexis_type: source.word_type.unwrap_or("".to_string()), 
             definition: source.definition, 
             archaic: source.archaic,
-            tags: source.tags.unwrap_or(Vec::new())}
+            tags: source.tags.unwrap_or(Vec::new()),
+            word_create: source.generate
+        }
     }
 }
 
@@ -83,18 +87,22 @@ impl From<Lexis> for RawLexicalEntry{
             archaic: value.archaic, 
             tags: if !value.tags.is_empty() {Some(value.tags)} else {None},
             derivatives: None,
+            generate: value.word_create
         }
     }
 }
 
-// take the output of a call to to_vec_etymons() and structure it like a graph json file structure
-pub fn create_json_graph(lex: Vec<(Lexis, Etymology)>) -> WordGraph{
+/// take the output of a call to to_vec_etymons() and structure it like a graph json file structure
+pub fn create_json_graph<F>(lex: Vec<(Lexis, Etymology)>,mut key_gen: F) -> WordGraph
+    where F: FnMut(Lexis) -> String
+    {
     let mut graph: HashMap<String, RawLexicalEntry> = HashMap::new();
 
     for (word, ety) in lex{
         let base: RawLexicalEntry = word.clone().into();
-        let complete = RawLexicalEntry{etymology: Some(ety), ..base};
-        let key = format!("daughter-gen-{}", word.clone().word.unwrap().string_without_sep());
+        let found_ety = if !ety.etymons.is_empty() {Some(ety)} else {None};
+        let complete = RawLexicalEntry{etymology: found_ety, ..base};
+        let key = key_gen(word);
         graph.insert(key, complete);
     }
     WordGraph { words: graph }