Skip to content

Commit

Permalink
Merge pull request #28 from fearful-symmetry/phonetic_generator
Browse files Browse the repository at this point in the history
Add Phonetic generator
  • Loading branch information
fearful-symmetry committed Aug 23, 2023
2 parents b31c9e6 + 5e67ae9 commit 94850ca
Show file tree
Hide file tree
Showing 23 changed files with 967 additions and 106 deletions.
35 changes: 35 additions & 0 deletions examples/phonetic_rules/etymology/ety.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
{
"transforms": {
"latin-from-verb": {
"transforms": [
{
"match_replace": {
"old": "ere",
"new": "plum"
}
},
{
"prefix": {
"value": "ex"
}
}
]
},
"of-from-latin": {
"transforms": [
{
"match_replace": {
"old": "exe",
"new": "esse"
}
},
{
"match_replace": {
"old": "um",
"new": "e"
}
}
]
}
}
}
25 changes: 25 additions & 0 deletions examples/phonetic_rules/phonetics/rules.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
{
"groups": {
"V": [
"e",
"a"
],
"S": [
"VC",
"CCV",
"VyV"
],
"C": [
"x",
"m",
"p",
"l"
]
},
"lexis_types": {
"word_rule": [
"SSS",
"SCSS"
]
}
}
56 changes: 56 additions & 0 deletions examples/phonetic_rules/readme.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Creating new words with phonetic rules

In addition to creating word based on etymological rules and word relationships, Kirum can also generate
words from base phonetic rulesets, without any pre-existing etymology.

To generate phonetic rules, a Kirum project must have one or more JSON rule files under `phonetics/` in the root project directory. These files are formatted as such:

```json
{
"groups": {
"V": [
"e",
"a"
],
"S": [
"VC",
"CCV",
"VyV"
],
"C": [
"x",
"m",
"p",
"l"
]
},
"lexis_types": {
"word_rule": [
"SSS",
"SCSS"
]
}
}

```

This phonetic file is divided into two maps:
- `groups`: breaks down possible groups of letters and consonants. The key of a group can be any uppercase unicode character, the values of an individual group can be any unicode value, or any uppercase group key.
In the above example, `V` are the language's possible vowels, `S` are the possible syllables, and `C` are
the possible consonants.
- `lexis_rules`: are the possible words that are derived from the specified group rules.

To generate a word from a set of specified phonetic rules, simply add the given `lexis_types` value to
the lexis's `generate` field:
```json
"latin_verb": {
"type": "word",
"generate": "word_rule",
"language": "Latin",
"definition": "To buy, remove",
"part_of_speech": "verb",
"archaic": true
}
```

Note that the generator will not apply a new word if the lexis has both a `generate` and `word` field.
46 changes: 46 additions & 0 deletions examples/phonetic_rules/tree/with_phonetic_rules.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
{
"words": {
"latin_verb": {
"type": "word",
"generate": "word_rule",
"language": "Latin",
"definition": "To buy, remove",
"part_of_speech": "verb",
"archaic": true
},
"latin_example": {
"type": "word",
"language": "Latin",
"definition": "an instance, model, example",
"part_of_speech": "noun",
"etymology": {
"etymons": [
{
"etymon": "latin_verb",
"transforms": [
"latin-from-verb"
]
}
]
},
"archaic": true,
"tags": [
"example",
"default"
],
"derivatives": [
{
"lexis": {
"language": "Old French",
"definition": "model, example",
"part_of_speech": "noun",
"archaic": true
},
"transforms": [
"of-from-latin"
]
}
]
}
}
}
3 changes: 2 additions & 1 deletion kirum/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ walkdir = "2.3.3"
log = "0.4.0"
env_logger = "0.9.0"
tabled = "0.12.1"
toml = "0.7.5"
toml = "0.7.5"
validator = {version = "0.16.1", features = ["derive"]}
28 changes: 7 additions & 21 deletions kirum/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ pub struct Args {
/// Output file; defaults to stdout if unspecified
#[clap(short, long, value_parser)]
pub output: Option<String>,
#[clap(short, long, default_value_t=false)]
/// Do not print any log output
pub quiet: bool,

#[clap(subcommand)]
pub command: Commands
Expand All @@ -30,26 +33,13 @@ pub enum Commands{
},
/// Print a graphviz representation of the language
Graphviz{
/// JSON file of defined etymon transforms
#[clap(short, long, value_parser)]
transforms: Option<String>,
/// json file of a language graph
#[clap(short, long, value_parser)]
graph: Option<String>,

/// path to a directory to read in all transform and graph files
#[clap(short, long, value_parser)]
directory: Option<String>,
},

/// Render a lexicon from an existing set of graph files and transformations
Render{
/// JSON file of defined etymon transforms
#[clap(short, long, value_parser)]
transforms: Option<String>,
/// JSON file of a language graph
#[clap(short, long, value_parser)]
graph: Option<String>,
/// path to a directory to read in all transform and graph files.
/// Can be specified instead of -g -d
#[clap(short, long, value_parser)]
Expand All @@ -74,12 +64,6 @@ pub enum Commands{
pub enum Generate{
/// Generate a daughter language from an existing language in a graph.
Daughter{
/// The file path to the existing language graph.
#[clap(short, long, value_parser)]
graph: Option<String>,
/// Path to transforms referenced in existing graph.
#[clap(short, long, value_parser)]
transforms: Option<String>,
// path to a directory to read in all transform and graph files. Can be used instead of -t or -g
#[clap(short, long, value_parser)]
directory: Option<String>,
Expand Down Expand Up @@ -108,7 +92,7 @@ pub enum SeparateValues {
Archaic,
}

#[derive(clap::Subcommand, Clone)]
#[derive(clap::Subcommand, Clone, PartialEq, PartialOrd)]
pub enum Format{
/// Print one word per line
Line,
Expand All @@ -122,5 +106,7 @@ pub enum Format{
/// Optional rhai scripts for processing template data. See https://docs.rs/handlebars/latest/handlebars/#script-helper
#[clap(short, long, value_parser)]
rhai_files: Option<Vec<String>>
}
},
/// Prints a JSON object of the language
Json
}
18 changes: 13 additions & 5 deletions kirum/src/entries.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ pub struct RawLexicalEntry {
pub archaic: bool,
pub tags: Option<Vec<String>>,

/// A tag that tells Kirum to generate the word based on the phonetic ruleset specified by the tag
pub generate: Option<String>,
/// Words that will be added as a derivative of the enclosing Lexis; any value not specified will be taken from the enclosing entry.
pub derivatives: Option<Vec<Derivative>>
}
Expand Down Expand Up @@ -68,7 +70,9 @@ impl From<RawLexicalEntry> for Lexis{
lexis_type: source.word_type.unwrap_or("".to_string()),
definition: source.definition,
archaic: source.archaic,
tags: source.tags.unwrap_or(Vec::new())}
tags: source.tags.unwrap_or(Vec::new()),
word_create: source.generate
}
}
}

Expand All @@ -83,18 +87,22 @@ impl From<Lexis> for RawLexicalEntry{
archaic: value.archaic,
tags: if !value.tags.is_empty() {Some(value.tags)} else {None},
derivatives: None,
generate: value.word_create
}
}
}

// take the output of a call to to_vec_etymons() and structure it like a graph json file structure
pub fn create_json_graph(lex: Vec<(Lexis, Etymology)>) -> WordGraph{
/// take the output of a call to to_vec_etymons() and structure it like a graph json file structure
pub fn create_json_graph<F>(lex: Vec<(Lexis, Etymology)>,mut key_gen: F) -> WordGraph
where F: FnMut(Lexis) -> String
{
let mut graph: HashMap<String, RawLexicalEntry> = HashMap::new();

for (word, ety) in lex{
let base: RawLexicalEntry = word.clone().into();
let complete = RawLexicalEntry{etymology: Some(ety), ..base};
let key = format!("daughter-gen-{}", word.clone().word.unwrap().string_without_sep());
let found_ety = if !ety.etymons.is_empty() {Some(ety)} else {None};
let complete = RawLexicalEntry{etymology: found_ety, ..base};
let key = key_gen(word);
graph.insert(key, complete);
}
WordGraph { words: graph }
Expand Down
Loading

0 comments on commit 94850ca

Please sign in to comment.