Description
Syllabifier for CMU Dictionary Transcriptions.
Description
Implements tidy syllabification of transcription. Based on @kylebgorman's 'python' implementation <https://github.com/kylebgorman/syllabify>.
README.md
syllabifyr
The goal of syllabify is to provide tidy syllabification of phonetic transcriptions. So far, only CMU dict transcriptions are supported.
I've largely utilized the same approach as Kyle Gorman's python implementation
Installation
syllabifyr
is now on Cran
install.packages("syllabifyr")
You can also install syllabifyr
from github with:
# install.packages("devtools")
devtools::install_github("JoFrhwld/syllabifyr")
Example
library(syllabifyr)
syllabify("AO0 S T R EY1 L Y AH0")
#> # A tibble: 8 x 4
#> syll part phone stress
#> <dbl> <chr> <chr> <chr>
#> 1 1 nucleus AO 0
#> 2 2 onset S 1
#> 3 2 onset T 1
#> 4 2 onset R 1
#> 5 2 nucleus EY 1
#> 6 2 coda L 1
#> 7 3 onset Y 0
#> 8 3 nucleus AH 0
syllabify(c("AO0", "S", "T", "R", "EY1", "L", "Y", "AH0"))
#> # A tibble: 8 x 4
#> syll part phone stress
#> <dbl> <chr> <chr> <chr>
#> 1 1 nucleus AO 0
#> 2 2 onset S 1
#> 3 2 onset T 1
#> 4 2 onset R 1
#> 5 2 nucleus EY 1
#> 6 2 coda L 1
#> 7 3 onset Y 0
#> 8 3 nucleus AH 0
library(tidyverse)
syllabficiation <- tribble(~word, ~transcription,
"Alaska", "AH0 L AE1 S K AH0",
"constraint", "K AH0 N S T R EY1 N T",
"canyon", "K AE1 N Y AH0 N",
"value", "V AE1 L Y UW0")%>%
mutate(syllable_df = map(transcription, syllabify))
syllabficiation$syllable_df[[1]]
#> # A tibble: 6 x 4
#> syll part phone stress
#> <dbl> <chr> <chr> <chr>
#> 1 1 nucleus AH 0
#> 2 2 onset L 1
#> 3 2 nucleus AE 1
#> 4 2 coda S 1
#> 5 3 onset K 0
#> 6 3 nucleus AH 0
syllabficiation$syllable_df[[2]]
#> # A tibble: 9 x 4
#> syll part phone stress
#> <dbl> <chr> <chr> <chr>
#> 1 1 onset K 0
#> 2 1 nucleus AH 0
#> 3 1 coda N 0
#> 4 2 onset S 1
#> 5 2 onset T 1
#> 6 2 onset R 1
#> 7 2 nucleus EY 1
#> 8 2 coda N 1
#> 9 2 coda T 1
syllabficiation$syllable_df[[3]]
#> # A tibble: 6 x 4
#> syll part phone stress
#> <dbl> <chr> <chr> <chr>
#> 1 1 onset K 1
#> 2 1 nucleus AE 1
#> 3 1 coda N 1
#> 4 2 onset Y 0
#> 5 2 nucleus AH 0
#> 6 2 coda N 0
syllabficiation$syllable_df[[4]]
#> # A tibble: 5 x 4
#> syll part phone stress
#> <dbl> <chr> <chr> <chr>
#> 1 1 onset V 1
#> 2 1 nucleus AE 1
#> 3 1 coda L 1
#> 4 2 onset Y 0
#> 5 2 nucleus UW 0