Module:tl-sortkey

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module will sort Tagalog language text. It is also used to sort Bikol Central, Cebuano, Hiligaynon, Hanunoo, Ilocano, Kankanaey, Kapampangan, and Waray-Waray. The module should preferably not be called directly from templates or other modules. To use it from a template, use {{sortkey}}. Within a module, use Module:languages#Language:makeSortKey.

For testcases, see Module:tl-sortkey/testcases.

Functions

makeSortKey(text, lang, sc)
Generates a sortkey for a given piece of text written in the script specified by the code sc, and language specified by the code lang.
When the sort fails, returns nil.

Alphabetic order: a b c d e ë f g h i j k l m n ñ ng o p q r s t u v w x y z.

Examples

  • ANDAR¹
andar
  • AN₃¹
ang
  • AN₃AL¹
angal
  • AN₃AL¹
ang̃al
  • ANTALA¹
antala
  • AN₂O¹
Año
  • BAMBAN¹
Bamban
  • BAMBAN₃¹
Bambang
  • BANAL¹
banal
  • BAN₃A¹
banga
  • BAN₃GA¹
bangga
  • BATANES¹
Batanes
  • BATAN₃AS¹
Batangas
  • BIN₂AN¹
Biñan
  • BINIBINI¹
binibini
  • BINYAG¹
binyag
  • BIN₃I¹
bingi

local export = {}
local u = mw.ustring.char
local a, b = u(0xF000), u(0xF001)

local oneChar = {
		["ë"] = "e" .. a, ["ñ"] = "n" .. a
}

local twoChars = {
	["ng"] = "n" .. b, ["ng̃"] = "n" .. b, ["ñg"] = "n" .. b
}

function export.makeSortKey(text, lang, sc)
	
	text = mw.ustring.gsub(text, "([!-&])([^%s]+)", "%2%1")
	
	for from, to in pairs(twoChars) do
		text = text:gsub(from, to)
	end
	
	return mw.ustring.upper(mw.ustring.gsub(mw.ustring.toNFC(text), ".", oneChar))
end

local tl = require("Module:languages").getByCode("tl")
local function tag(text)
	return require("Module:script utilities").tag_text(text, tl)
end

local showsubst = {
	[a] = "₂",
	[b] = "₃"
}

function export.showSortkey(frame)
	local output = {}
	
	for _, word in ipairs(frame.args) do
		local sc = tl:findBestScript(word):getCode()
		local sortkey = mw.ustring.gsub(export.makeSortKey(word, "tl", sc), ".", showsubst)
		sortkey = mw.ustring.gsub(sortkey, "([^0-9²])$", "%1¹")
		sortkey = mw.ustring.gsub(sortkey, "([^0-9²])(%s)", "%1¹%2")
		local example = "\n* <code>" .. sortkey .. "</code>\n: " .. tag(word)
		table.insert(output, example)
	end
	
	return table.concat(output)
end

function export.showSorting(frame)
	local terms = {}
	
	for _, term in ipairs(frame.args) do
		table.insert(terms, term)
	end
	
	local makeSortKey = require("Module:fun").memoize(export.makeSortKey)
	local function comp(term1, term2)
		return makeSortKey(term1) < makeSortKey(term2)
	end
	
	table.sort(terms, comp)
	
	for i, term in pairs(terms) do
		local sc = tl:findBestScript(term):getCode()
		local sortkey = mw.ustring.gsub(export.makeSortKey(term, "tl", sc), ".", showsubst)
		sortkey = mw.ustring.gsub(sortkey, "([^0-9²])$", "%1¹")
		sortkey = mw.ustring.gsub(sortkey, "([^0-9²])(%s)", "%1¹%2")
		terms[i] = "\n* " .. tag(term) .. " (<code>" .. sortkey .. "</code>)"
	end
	
	return table.concat(terms)
end

return export