Modulus:grc-translit

Purpose

This module will transliterate text written in “Lingua Graeca Antiqua”.

The module should preferably not be called directly from templates or other modules. To use it from a template, use {{xlit}}. Within a module, use Module:languages#Language:transliterate.

For testcases, see Module:grc-translit/testcases.

Functions

tr(text, lang, sc): Transliterates a given piece of text written in the script specified by sc, and language specified by lang. When the transliteration fails, returns nil.

local export = {}

local m_utilities = require('Module:grc-utilities')
local m_data = require('Module:grc-utilities/data')
local chars = m_data.named

local acute = chars.acute
local grave = chars.grave
local circumflex = chars.circum
local diaeresis = chars.diaeresis
local smooth = chars.smooth
local rough = chars.rough
local macron = chars.macron
local breve = chars.breve
local subscript = chars.subscript

local hat = chars.Latin_circum

local tt = {
	-- Vowels
	["α"] = "a",
	["ε"] = "e",
	["η"] = "e"..macron,
	["ι"] = "i",
	["ο"] = "o",
	["υ"] = "y",
	["ω"] = "o"..macron,

	-- Consonants
	["β"] = "b",
	["γ"] = "g",
	["δ"] = "d",
	["ζ"] = "z",
	["θ"] = "th",
	["κ"] = "k",
	["λ"] = "l",
	["μ"] = "m",
	["ν"] = "n",
	["ξ"] = "x",
	["π"] = "p",
	["ρ"] = "r",
	["σ"] = "s",
	["ς"] = "s",
	["τ"] = "t",
	["φ"] = "ph",
	["χ"] = "kh",
	["ψ"] = "ps",
	
	-- Archaic letters
	["ϝ"] = "w",
	["ϻ"] = "ś",
	["ϙ"] = "q",
	["ϡ"] = "š",
	["ͷ"] = "v",
	
	-- Diacritics
	[macron] = macron,
	[breve] = '',
	[smooth] = '',
	[rough] = '',
	[diaeresis] = diaeresis,
	[grave] = grave,
	[acute] = acute,
	[circumflex] = hat,
	[subscript] = 'i',
}

local diacritics = m_data.all

function export.tr(text, lang, sc)
	-- If the script is given as Cprt, then forward the transliteration to that module
	if sc == "Cprt" then
		return require("Module:Cprt-translit").tr(text, lang, sc)
	end
	
	if text == '῾' then
		return 'h'
	end
	
	local tokens = m_utilities.tokenize(text)

	--now read the tokens
	local output = {}
	for i, token in pairs(tokens) do
		-- substitute each character in the token for its transliteration
		translit = mw.ustring.gsub(mw.ustring.lower(token),'.',function(x) return tt[x] end)
		
		if token == 'γ' and tokens[i + 1] and mw.ustring.match(tokens[i + 1],'[κγχξ]') then
			-- γ before a velar should be <n>
			translit = 'n'
		elseif token == 'ρ' and tokens[i - 1] and tokens[i - 1] == 'ρ' then
			-- ρ after ρ should be <rh>
			translit = 'rh'
		elseif mw.ustring.match(token, '[αΑ].*'..subscript) then
			-- add macron to ᾳ
			translit = mw.ustring.gsub(translit, '([aA])','%1'..macron)
		end
		
		if mw.ustring.match(token, rough) then
			if mw.ustring.match(token, '[Ρρ]') then
				translit = translit .. 'h'
			else -- vowel
				translit = 'h' .. translit
			end
		end
	
		if mw.ustring.match(translit, macron .. '[' .. rough .. smooth .. ']?' .. hat) then
			translit = mw.ustring.gsub(translit, macron, '')
		end
		
		if token ~= mw.ustring.lower(token) then
			translit = mw.ustring.upper(mw.ustring.sub(translit, 1, 1) ) .. mw.ustring.lower(mw.ustring.sub(translit, 2) )
		end
		
		table.insert(output, translit)
	end
	output = table.concat(output)
	output = mw.ustring.toNFC(output)
	
	return output
end

return export