Module:Unicode data/scripts/make

local p = {}

local Array = require 'Module:Array'

local function pattern_escape(str)
	return (str:gsub('%p', '%%%1'))
end

function p.make_script_name_to_code(page_name)
	local property_value_aliases = assert(assert(mw.title.new(page_name)):getContent())

	local script_aliases = property_value_aliases:match(
		pattern_escape '# Script (sc)'
		.. '%s+(.-)%s+'
		.. pattern_escape '# Script_Extensions (scx)')

	local script_name_to_code = {}
	
	for code, name in script_aliases:gmatch 'sc%s+;%s+(%a+)%s+;%s+([%a_]+)' do
		script_name_to_code[name] = code
	end
	
	return script_name_to_code
end

function p.make_script_data(scripts_txt, property_value_aliases_txt)
	local script_data = assert(assert(mw.title.new(scripts_txt)):getContent())
	
	local script_name_to_code = p.make_script_name_to_code(property_value_aliases_txt)
	setmetatable(script_name_to_code, { __index = function (self, k)
		error(('No code for "%s"'):format(k))
	end })
	
	local script_ranges = Array()
	
	local prev_codepoint, prev_script_name, prev_script_range
	for codepoint1, codepoint2, script_name in script_data:gmatch '%f[^\n%z](%x+)%.?%.?(%x*)%s+;%s*([%w_]+)' do
		codepoint1, codepoint2 = tonumber(codepoint1, 16), tonumber(codepoint2, 16)
		local script_range
		if prev_script_range and script_name == prev_script_name and codepoint1 - prev_codepoint == 1 then
			prev_script_range[2] = codepoint2 or codepoint1
		else
			script_range = { codepoint1, codepoint2 or codepoint1, script_name_to_code[script_name] }
			script_ranges:insert(script_range)
		end
		prev_codepoint, prev_script_name, prev_script_range =
			codepoint2 or codepoint1, script_name, script_range or prev_script_range
	end
	
	local singles = {}
	local i = 1
	
	while script_ranges[i] do
		local low, high, script_code = unpack(script_ranges[i])
		if low == high then
			singles[low] = script_code
			script_ranges:remove(i)
		else
			i = i + 1
		end
	end
	
	script_ranges:sort(
		function (range1, range2)
			return range1[1] < range2[1]
		end)
	
	local template = [[
local data = {
	singles = {
...
	},
	
	ranges = {
...
	},
	-- Scripts.txt gives full names; here we consider them aliases to save space.
	aliases = {
...
	},
}
]]
	
	local printed_ranges = Array()
	for _, range in ipairs(script_ranges) do
		local low, high, script_code = unpack(range)
		printed_ranges:insert(('\t\t{ 0x%05X, 0x%05X, "%s" },'):format(low, high, script_code))
	end
	
	local printed_singles = Array()
	for codepoint, script_code in require 'Module:TableTools'.sortedPairs(singles) do
		printed_singles:insert(('\t\t[0x%05X] = "%s",'):format(codepoint, script_code))
	end
	
	local printed_script_name_to_code = Array()
	for name, code in require 'Module:TableTools'.sortedPairs(script_name_to_code,
			function(name1, name2)
				return script_name_to_code[name1] < script_name_to_code[name2]
			end) do
		printed_script_name_to_code:insert(('\t\t%s = "%s",'):format(code, name:gsub('_', ' ')))
	end
	
	local data = template
		:gsub('%.%.%.', printed_singles:concat('\n'), 1)
		:gsub('%.%.%.', printed_ranges:concat('\n'), 1)
		:gsub('%.%.%.', printed_script_name_to_code:concat('\n'), 1)
	
	return data
end

function p.highlight(lua_code)
	return mw.getCurrentFrame():extensionTag{
		name = "syntaxhighlight",
		content = lua_code,
		args = { lang = "lua" }
	}
end

function p.main(frame)
	local dir = 'User:Erutuon/Unicode'
	local scripts_txt = dir .. '/Scripts.txt'
	local property_value_aliases_txt = dir ..'/PropertyValueAliases.txt'
	
	return p.highlight(p.make_script_data(scripts_txt, property_value_aliases_txt))
end

return p

Content Disclaimer

Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.

  1. The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
  2. There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
  3. It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
  4. Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
  5. Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.