Code:
# This script reads a tab-separated file and syllabifies the columns pointed to by the variable'phons' (ot the first column, by default).
# gawk -f syll.gk fn>fn.out
BEGIN {
FS="\t";
OFS="\t";
if (code=="brulex") {
V="[aiouyîâêôû^eEéAO_]"; # vowels
C="[ptkbdgfs/vzjmn/shN£]"; # consonants except liquids & semivowels
C1="[pkbgfs/vzj]";
L="[lR]"; # liquids
Y="[ïü\377]"; # semi-vowels \377 stands for y-umlaut
X="[ptkbdgfs/vzjmnN£xlRïü\377]"; # all consonants
} else { # code == LAIPTTS)
V="[iYeE2591a@oO§uy*]"; # Vowels
C="[pbmfvtdnNkgszxSZGh/sh]"; # Consonants except liquids & semivowels
C1="[pkbgfsSvzZ]";
L="[lR]"; # liquids
Y="[j8w]"; # semi-vowels
X="[pbmfvtdnNkgszSZGlRrhxGj8w]"; # all consonants, including semivowels
}
if (phons==0) phons=1;
}
{
a=$phons;
n=1
}
{
while (i= match (a, V V)) {
a=substr(a,1,i) "-" substr(a,i+1,length(a)); n++; }
while (i= match(a, V X V)) {
a=substr(a,1,i) "-" substr(a,i+1,length(a)); n++}
while (i=match(a, V Y Y V)) {
a=substr(a,1,i+1) "-" substr(a,i+2, length(a)); n++}
while (i=match(a, V C Y V)) {
a=substr(a,1,i) "-" substr(a,i+1, length(a)); n++}
while (i=match(a, V L Y V)) {
a=substr(a,1,i) "-" substr(a,i+1, length(a)); n++}
while (i=match(a, V "[td]R" V)) {
a=substr(a,1,i) "-" substr(a,i+1, length(a)); n++}
while (i=match(a, V "[td]R" Y V)) {
a=substr(a,1,i) "-" substr(a,i+1, length(a)); n++}
while (i=match(a, V C1 L V)) {
a=substr(a,1,i) "-" substr (a,i+1,length(a)); n++}
while (i=match(a, V X X V)) {
a=substr(a,1,i+1) "-" substr(a,i+2, length(a)); n++}
while (i= match(a, V X X X V)) {
a=substr(a,1,i+1) "-" substr(a,i+2,length(a)); n++}
while (i=match(a, V X X X X V)) {
a=substr(a,1,i+1) "-" substr(a,i+2,length(a)); n++}
while (i=match(a, V X X X X X V)) {
a=substr(a,1,i+1) "-" substr(a,i+2,length(a)); n++}
# suppress the final schwa (^) in some multisyllabic words
# notr^ -> notR
# ar-bR^ => aRbR
b=gensub(/-([^-]+)\^$/,"\\1",1,a) ;
if (b!=a) { # there is a schwa to delete
a=b;
$phons=substr($phons,1,length($phons)-1);
n--;
}
# meme chose quand schwa='*'
b=gensub(/-([^-]+)\*$/,"\\1",1,a) ;
if (b!=a) { # there is a schwa to delete
a=b;
$phons=substr($phons,1,length($phons)-1);
n--;
}
# compute the CVY skeleton
sk= " ";
for (i=1;i<=length(a);i++) {
ph=substr(a,i,1);
if (ph~V) sk=sk"V";
else if ((ph~C)||(ph~L)) sk=sk"C";
else if (ph~Y) sk=sk"Y";
else sk=sk ph;
}
}
{ print $0,a,n,sk }