Módulo:generar-pron/tr

La documentación para este módulo puede ser creada en Módulo:generar-pron/tr/doc
-- tomado de [[:en:Module:tr-IPA]]
-- adaptado por Tmagc

local export = {}

local m_str = require("Módulo:String")

local u = m_str.char
local strsubn = m_str.gsub
local strsubrep = m_str.gsub_rep
local strfind = m_str.find
local strstrip = m_str.strip
local strhtml = m_str.encode_html

--CONVENCION: mayúscula para patrones encerrados entre corchetes, minúscula para todo lo demás
local ac_primario = u(0x02C8)
local ac_secundario = u(0x02CC)

local acentos_ipa = ac_primario..ac_secundario
local ACENTOS_IPA = "[" .. acentos_ipa .. "]"

local divsil = u(0xFFF0)
local sepsil = "%-." .. divsil
local SEPARADORES_SILABICOS = "[" .. sepsil .. "]"
local seppal = "# "
local separador_excepto_palabras = acentos_ipa .. sepsil
local separador = separador_excepto_palabras .. seppal
local SEPARADOR = "[" .. separador .. "]"

local PUNTUACION = "[%(%)%[%]%{%}¡!¿?.,;:–—]"
local PUNTUACION_EXTRA = "[%(%)%[%]%{%}¡!¿?.,;:–—\"“”„‟‘’«»»«‹››‹'´]"

local cons = "bçcdfgğhjklmnprsştvyzqxw" -- consonants
local vow = "aeiıoöuüâû" -- vowels

local C = "["..cons.."]" 
local V = "["..vow.."]" 

local permitido = cons..vow.."·|%s" -- asumo que limpié la puntuación

local lowerc = {
--Armeno-Turkish
	["Ո"]="ո", ["Ա"]="ա", ["Գ"]="գ", ["Ե"]="ե", ["Զ"]="զ", ["Է"]="է", 
	["Ը"]="ը", ["Թ"]="թ", ["Ժ"]="ժ", ["Ի"]="ի", ["Լ"]="լ", ["Խ"]="խ", 
	["Կ"]="կ", ["Հ"]="հ", ["Ղ"]="ղ", ["Ճ"]="ճ", ["Մ"]="մ", ["Ն"]="ն",
	["Շ"]="շ", ["Չ"]="չ", ["Պ"]="պ", ["Ս"]="ս", ["Վ"]="վ", ["Տ"]="տ", 
	["Ր"]="ր", ["Փ"]="փ", ["Ք"]="ք", ["Օ"]="օ", ["Ֆ"]="ֆ", ["Յ"]="յ",
--Latin
	["A"]="a", ["B"]="b", ["C"]="c", ["Ç"]="ç", ["D"]="d", ["E"]="e",
	["F"]="f", ["G"]="g", ["Ğ"]="ğ", ["H"]="h", ["I"]="ı", ["İ"]="i",
	["J"]="j", ["K"]="k", ["L"]="l", ["M"]="m", ["N"]="n", ["O"]="o",
	["Ö"]="ö", ["P"]="p", ["R"]="r", ["S"]="s", ["Ş"]="ş", ["T"]="t",
	["U"]="u", ["Ü"]="ü", ["V"]="v", ["Y"]="y", ["Z"]="z", ["Â"]="â",
	["Û"]="û", ["Q"]="q", ["X"]="x", ["W"]="w", ["Î"]="î"
}

local monographs = {
	["ա"]="a", ["գ"]="k", ["ե"]="y", ["զ"]="z", ["է"]="e", ["ը"]="ı",
	["թ"]="t", ["ժ"]="j", ["ի"]="i", ["լ"]="l", ["խ"]="ẍ", ["կ"]="g",
	["հ"]="h", ["ղ"]="g", ["ճ"]="c", ["մ"]="m", ["ն"]="n", ["շ"]="ʃ",
	["չ"]="ç", ["պ"]="b", ["ս"]="s", ["վ"]="v", ["տ"]="d", ["ր"]="r",
	["փ"]="p", ["ք"]="k", ["օ"]="o", ["ֆ"]="f"
}

local placeholders = {
    ["ու"] = "ù",
    ["իւ"] = "ì",
    ["էօ"] = "ò",
}

local placeholder_to_latin = {
    ["ù"] = "u",
    ["ì"] = "ü",
    ["ò"] = "ö",
}

local phon = {
	["c"]="d͡ʒ", ["ç"]="t͡ʃ", ["ğ"]="ɣ", ["ş"]="ʃ",
	["b"]="b", ["d"]="d", ["f"]="f", ["g"]="ɡ",
	["h"]="h", ["j"]="ʒ", ["k"]="k", ["l"]="l",
	["m"]="m", ["n"]="n", ["p"]="p", ["r"]="ɾ",
	["s"]="s", ["t"]="t", ["v"]="v", ["y"]="j", ["z"]="z",
	["q"]="k", ["x"]="ks", ["w"]="v", ["î"]="iː",
	["a"]="a", ["â"]="a", ["e"]="e", ["ı"]="ɯ", ["i"]="i",
	["o"]="o", ["ö"]="œ", ["u"]="u", ["û"]="u", ["ü"]="y",
}

local function convert_armeno_turkish_to_latin(text)
    -- Replace digraphs with placeholders
    for digraph, placeholder in pairs(placeholders) do
        text = strsubn(text, digraph, placeholder)
    end
    -- Replace placeholders with Latin script
    text = strsubn(text, '.', placeholder_to_latin)
    -- Replace monographs
    text = strsubn(text, '.', monographs)
    return text
end

local function normalizar(texto)
	texto = strsubn(texto, '.', lowerc)
	texto = convert_armeno_turkish_to_latin(texto)
	texto = strsubrep(texto, PUNTUACION, " | ") -- convierto lo que delimite fragmentos a los IPA foot boundaries |
	texto = strsubrep(texto, PUNTUACION_EXTRA, "") -- elimino la puntuación restante que haya quedado
	texto = strsubrep(texto, "[%-‐]", " ") --los guiones pasan a ser espacios (austro-húngaro, franco-italiano)
	
	if strfind(texto, "[^"..permitido.."]") then
		return nil
	end

    texto = strsubrep(texto, "%s*|%s*|%s*", " | ") --finalmente, elimino las barras y espacios de más
    texto = strsubrep(texto, "%s+", " ")
	texto = strstrip(texto, "[%s|]+")
	
	return texto
end

local function generar_pron(text, phonetic)
	text = normalizar(text)
    
    if phonetic then
		-- Handle k, g palatalization rules
		text = strsubn(text, "k[ɛeiyœöüÿâû]", "ć%1")
		text = strsubn(text, "g[ɛeiyœöüÿâû]", "ɟ%1")
		text = strsubn(text, "ćk", "ć")
		text = strsubn(text, "ɟg", "ɟ")
		text = strsubn(text, "[ɛeiyœöüÿâû]k", "%1ć")
		text = strsubn(text, "[ɛeiyœöüÿâû]g", "%1ɟ")
		text = strsubn(text, "kć", "ć")
		text = strsubn(text, "gɟ", "ɟ")
		
		-- Handle ğ rules
		text = strsubn(text, "([aeiıoöuüÿâû])ğ([bçcdfgğhjklmnprsştvyzqxwý])",
			"%1ː%2")
		text = strsubn(text, "([ɛeiyöüÿ])ğ([ɛeiyöüÿ])", "%1ý%2")
		text = strsubn(text, "ğ$", "ː")
		text = strsubn(text, "([ɛeiyÿöü])ğ", "%1ý")
		text = strsubn(text, "ğ", "")
	
		-- Handle l rules
		text = strsubn(text, "l","ɫ")
		text = strsubn(text, "ɫ[ɛeiyœöüÿâû]", "l%1")
		text = strsubn(text, "[ɛeiyœöüÿâû]ɫ", "%1l")
		text = strsubn(text, "lɫ", "ɫ")
		text = strsubn(text, "ɫl", "l")
	
		-- Handle aspirated p, t, c, k
		text = strsubn(text, "^p", "pʰ")
		text = strsubn(text, "^t", "tʰ")
		text = strsubn(text, "^ć", "ćʰ")
		text = strsubn(text, "^k", "kʰ")
	
		-- Front final /h/
		text = strsubn(text, "[ɛeiyœöüÿâû]h$", "%1ḉ")
		text = strsubn(text, "hḉ", "ḉ")
		text = strsubn(text, "h$", "ẍ")
	
		-- Handle exceptions for final devoicing
		local exceptions = { 
			["ad"] = true,
			["hac"] = true,
			["İd"] = true,
			["kod"] = true,
			["od"] = true
		}
	
		-- Only apply devoicing rule if word isn't in exception list
		if not exceptions[text] then
			-- Devoice final /b, d, d͡ʒ, ɡ, ɟ/
			text = strsubn(text, "b$", "p")
			text = strsubn(text, "d$", "t")
			text = strsubn(text, "d͡ʒ$", "č")
			text = strsubn(text, "ɡ$", "k")
			text = strsubn(text,"g$", "k")
			text = strsubn(text, "ɟ$", "ć")
			text = strsubn(text, "c$", "č")
		end
	
		-- Devoice /ɾ/ and make /ɫ/ and /l/ voiceless in appropriate conditions
		text = strsubn(text, "ɾ$", "ɾ̥")
		text = strsubn(text, "ɾ([ptćkḉsčʃ])", "ɾ̥%1")
		text = strsubn(text, "r$", "ɾ̥")
		text = strsubn(text, "r([ptćkḉsčʃ])", "ɾ̥%1")
		text = strsubn(text, "ɫ$", "ɫ̥")
		text = strsubn(text, "ɫ([ptćkḉsčʃ])", "ɫ̥%1")
		text = strsubn(text, "l$", "l̥")
		text = strsubn(text, "l([ptćkḉsčʃ])", "l̥%1")
	
		-- Lower /e/ before coda /m, n, l, r/
		text = strsubn(text, "e([mnlrɾɾ̥l̥ɫ̥])", "ɛ%1")
	
		-- Handle the lowering of specific vowels in word-final position
		text = strsubn(text, "i$", "ɪ")
		text = strsubn(text, "ÿ$", "ʏ")
		text = strsubn(text, "u$", "ʊ")
		text = strsubn(text, "e$", "ɛ")
	end
	
	-- phonemic
	text = strsubn(text, '.', phon)
	text = strsubn(text, "ć", "c")
	text = strsubn(text, "ý", "j")
	text = strsubn(text, "ÿ", "y")
	text = strsubn(text, "g", "ɡ")
	text = strsubn(text, ":", "ː")
	text = strsubn(text, "ẍ", "x")
	text = strsubn(text, "č", "t͡ʃ")
	text = strsubn(text, "ḉ", "ç")
	return {{strhtml(text)}}
end

-- Punto de entrada externo, recibe el título de página y los argumentos de plantilla
function export.procesar_pron_args(titulo, args)
	if #args["ayuda"] < 1 then
		args["ayuda"][1] = titulo
	end
	if #args["fone"] < 1 and #args["fono"] < 1 then
		args["fone"] = generar_pron(args["ayuda"][1], true)
	end
	
	return args
end

return export