Module:Sandbox/Erutuon/Unicode
- !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇňʼnŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƀƁƂƃƄƅƆƇƈƉƊƋƌƍƎƏƐƑƒƓƔƕƖƗƘƙƚƛƜƝƞƟƠơƢƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿǀǁǂǃDŽDždžLJLjljNJNjnjǍǎǏǐǑǒǓǔǕǖǗǘǙǚǛǜǝǞǟǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿȀȁȂȃȄȅȆȇȈȉȊȋȌȍȎȏȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟȠȡȢȣȤȥȦȧȨȩȪȫȬȭȮȯȰȱȲȳȴȵȶȷȸȹȺȻȼȽȾȿɀɁɂɃɄɅɆɇɈɉɊɋɌɍɎɏḀḁḂḃḄḅḆḇḈḉḊḋḌḍḎḏḐḑḒḓḔḕḖḗḘḙḚḛḜḝḞḟḠḡḢḣḤḥḦḧḨḩḪḫḬḭḮḯḰḱḲḳḴḵḶḷḸḹḺḻḼḽḾḿṀṁṂṃṄṅṆṇṈṉṊṋṌṍṎṏṐṑṒṓṔṕṖṗṘṙṚṛṜṝṞṟṠṡṢṣṤṥṦṧṨṩṪṫṬṭṮṯṰṱṲṳṴṵṶṷṸṹṺṻṼṽṾṿẀẁẂẃẄẅẆẇẈẉẊẋẌẍẎẏẐẑẒẓẔẕẖẗẘẙẚẛẜẝẞẟẠạẢảẤấẦầẨẩẪẫẬậẮắẰằẲẳẴẵẶặẸẹẺẻẼẽẾếỀềỂểỄễỆệỈỉỊịỌọỎỏỐốỒồỔổỖỗỘộỚớỜờỞởỠỡỢợỤụỦủỨứỪừỬửỮữỰựỲỳỴỵỶỷỸỹỺỻỼỽỾỿⱠⱡⱢⱣⱤⱥⱦⱧⱨⱩⱪⱫⱬⱭⱮⱯⱰⱱⱲⱳⱴⱵⱶⱷⱸⱹⱺⱻⱼⱽⱾⱿ꜠꜡ꜢꜣꜤꜥꜦꜧꜨꜩꜪꜫꜬꜭꜮꜯꜰꜱꜲꜳꜴꜵꜶꜷꜸꜹꜺꜻꜼꜽꜾꜿꝀꝁꝂꝃꝄꝅꝆꝇꝈꝉꝊꝋꝌꝍꝎꝏꝐꝑꝒꝓꝔꝕꝖꝗꝘꝙꝚꝛꝜꝝꝞꝟꝠꝡꝢꝣꝤꝥꝦꝧꝨꝩꝪꝫꝬꝭꝮꝯꝰꝱꝲꝳꝴꝵꝶꝷꝸꝹꝺꝻꝼꝽꝾꝿꞀꞁꞂꞃꞄꞅꞆꞇꞈ꞉꞊ꞋꞌꞍꞎꞏꞐꞑꞒꞓꞔꞕꞖꞗꞘꞙꞚꞛꞜꞝꞞꞟꞠꞡꞢꞣꞤꞥꞦꞧꞨꞩꞪꞫꞬꞭꞮꞯꞰꞱꞲꞳꞴꞵꞶꞷꞸꞹꞺꞻꞼꞽꞾꞿꟀꟁꟂꟃꟄꟅꟆꟇꟈꟉꟊꟐꟑꟓꟕꟖꟗꟘꟙꟲꟳꟴꟵꟶꟷꟸꟹꟺꟻꟼꟽꟾꟿꬰꬱꬲꬳꬴꬵꬶꬷꬸꬹꬺꬻꬼꬽꬾꬿꭀꭁꭂꭃꭄꭅꭆꭇꭈꭉꭊꭋꭌꭍꭎꭏꭐꭑꭒꭓꭔꭕꭖꭗꭘꭙꭚ꭛ꭜꭝꭞꭟꭠꭡꭢꭣꭤꭥꭦꭧꭨꭩ꭪꭫fffiflffifflſtst!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\–—«»
Latn (1027), Zyyy (105), Zzzz (24), Grek (1)
local p = {}
local Unicode_data = require "Module:Unicode data/sandbox"
local function errorf(level, ...)
if type(level) == "number" then
return error(string.format(...), level + 1)
else -- level is actually the format string.
return error(string.format(level, ...), 2)
end
end
function mw.logf(...)
return mw.log(string.format(...))
end
local output_mt = {}
function output_mt:insert(str)
self.n = self.n + 1
self[self.n] = str
end
-- also in [[Module:Unicode data/documentation functions]]
function output_mt:insert_format(...)
self:insert(string.format(...))
end
output_mt.join = table.concat
output_mt.__index = output_mt
local function Output()
return setmetatable({ n = 0 }, output_mt)
end
local Latn_pattern = table.concat {
"[",
"\n\32-\127",
"\194\160-\194\172",
"\195\128-\195\191",
"\196\128-\197\191",
"\198\128-\201\143",
"\225\184\128-\225\187\191",
"\226\177\160-\226\177\191",
"\234\156\160-\234\159\191",
"\234\172\176-\234\173\175",
"\239\172\128-\239\172\134",
"\239\188\129-\239\188\188",
"–",
"—",
"«", "»",
"]",
};
local get_codepoint = mw.ustring.codepoint
local function expand_range(start, ending)
local lower, higher = get_codepoint(start), get_codepoint(ending)
if higher < lower then
return nil
end
local chars = {}
local i = 0
for codepoint = lower, higher do
i = i + 1
chars[i] = mw.ustring.char(codepoint)
end
return table.concat(chars)
end
local fun = require "Module:Fun"
local m_table = require "Module:TableTools"
local script_to_count_mt = {
__index = function (self, key)
self[key] = 0
return 0
end,
__call = function (self, ...)
return setmetatable({}, self)
end
}
setmetatable(script_to_count_mt, script_to_count_mt)
-- Uses an iterator (such as mw.ustring.gcodepoint) that generates a codepoint
-- each time it is called with an optional state and another value.
local function show_scripts(iterator, state, value)
local script_to_count = script_to_count_mt()
for codepoint in iterator, state, value do
local script = Unicode_data.lookup_script(codepoint)
script_to_count[script] = script_to_count[script] + 1
end
return table.concat(
fun.mapIter(
function (count, script)
return ("%s (%d)"):format(script, count)
end,
m_table.sortedPairs(
script_to_count,
function (script1, script2)
return script_to_count[script1] > script_to_count[script2]
end)),
", ")
end
local function get_chars_in_scripts(iterator, state, value)
local script_to_char_set = {}
for codepoint in iterator, state, value do
local script = Unicode_data.lookup_script(codepoint)
script_to_char_set[script] = script_to_char_set[script] or {}
script_to_char_set[script][codepoint] = true
end
return script_to_char_set
end
local function print_char_set_map(script_to_char_set, format, separator)
format = format or "%s: %s"
separator = separator or "\n"
return table.concat(
fun.mapIter(
function (char_set, script)
local char_list = fun.mapIter(
function (_, codepoint)
return mw.ustring.char(codepoint)
end,
m_table.sortedPairs(char_set))
return (format):format(script, mw.text.nowiki(table.concat(char_list)))
end,
m_table.sortedPairs(script_to_char_set)),
separator)
end
function p.show(frame)
local expanded_pattern = Latn_pattern
:gsub("%[(.-)%]", "%1")
:gsub( -- Find two UTF-8-encoded characters separated by hyphen-minus.
"([%z\1-\127\194-\244][\128-\191]*)%-([%z\1-\127\194-\244][\128-\191]*)",
function (char1, char2)
return expand_range(char1, char2)
end)
return ('* <div style="overflow-wrap: break-word;">%s</div><br>%s')
:format(expanded_pattern
:gsub("^%s*", ""), -- Remove initial "\n " to avoid creating unwanted pre element.
show_scripts(mw.ustring.gcodepoint(expanded_pattern)))
end
local function get_block_info_from_arg(args, arg)
local block_name = args[1]
or errorf("Parameter %s is required", tostring(arg))
local block_info = Unicode_data.get_block_info(block_name)
or errorf("The block '%s' could be found", block_name)
return block_info
end
local function get_boolean_from_arg(args, arg)
return args[arg] and require "Module:Yesno" (args[arg])
end
function p.scripts_in_block(frame)
local block_info = get_block_info_from_arg(frame.args, 1)
local show_block_name = get_boolean_from_arg(frame.args, 2)
local script_list = show_scripts(fun.range(block_info[1], block_info[2]))
if show_block_name then
return ("%s: %s"):format(block_info[3], script_list)
else
return script_list
end
end
local function link_block_name(block_name)
if block_name:find " " then
return ("[[%s]]"):format(block_name)
else
return ("[[%s (Unicode block)|%s]]"):format(block_name, block_name)
end
end
function p.scripts_in_blocks(frame)
local output = Output()
local start = frame.args[1] and tonumber(frame.args[1], 16) or 0
local ending = frame.args[2] and tonumber(frame.args[2], 16) or 0x4000
local script_data = mw.loadData "Module:Unicode data/scripts"
local singles = script_data.singles
local ranges = script_data.ranges
local function clear (self)
for _, key in ipairs(m_table.keysToList(self, false)) do
self[key] = nil
end
end
local counts = {}
setmetatable(counts, {
__index = {
increment = function(self, script_code, amount)
self[script_code] = (self[script_code] or 0) + (amount or 1)
end,
clear = clear,
}
})
local codepoints_per_script = {}
setmetatable(codepoints_per_script, {
__index = {
add = function(self, script_code, codepoint)
self[script_code] = self[script_code] or { n = 0 }
if self[script_code].n <= 0x20
and not (codepoint <= 0x9F and (codepoint >= 0x80
or codepoint <= 0x1F)) then
if self[script_code].n == 0x20 then
local period = ("."):byte()
for _ = 1, 3 do
self[script_code].n = self[script_code].n + 1
self[script_code][self[script_code].n] = period
end
else
if script_code == "Zinh" then -- probably combining character
self[script_code].n = self[script_code].n + 1
self[script_code][self[script_code].n] = 0x25CC
end
self[script_code].n = self[script_code].n + 1
self[script_code][self[script_code].n] = codepoint
end
end
end,
clear = clear,
}
})
output:insert [[
{| class="wikitable"
|+ Scripts in each Unicode block
! block !! codepoints !! scripts
]]
for _, block in pairs(mw.loadData "Module:Unicode data/blocks") do
local codepoint = block[1]
if codepoint > ending then break end
if codepoint >= start then
while codepoint <= block[2] do
local script = singles[codepoint]
local count
if script then -- Codepoint is in "singles" map.
counts:increment(script)
codepoints_per_script:add(script, codepoint)
codepoint = codepoint + 1
count = 1 -- for potential future use
else
local range, index = Unicode_data.binary_range_search(codepoint, ranges)
if range then -- Codepoint is in "ranges" array.
count = 0
script = range[3]
while codepoint <= range[2] and codepoint <= block[2] do
count = count + 1
codepoints_per_script:add(script, codepoint)
codepoint = codepoint + 1
end
counts:increment(script, count)
else -- Codepoint doesn't have data; it's Zzzz.
-- Get range immediately above codepoint.
while ranges[index][2] < codepoint do
index = index + 1
end
count = 0
script = "Zzzz"
local range = ranges[index]
while codepoint < range[1] and codepoint <= block[2]
and not singles[codepoint] do
count = count + 1
codepoint = codepoint + 1
end
counts:increment(script, count)
end
end
end
output:insert_format([[
|-
| %s
| U+%04X–U+%04X
| %s
]], link_block_name(block[3]), block[1], block[2],
table.concat(
fun.map(
function (count, script)
return ('<abbr title="%s">%s</abbr> (<span title="%s">%d</span>)')
:format(
script_data.aliases[script], script,
codepoints_per_script[script]
and mw.text.nowiki(mw.ustring.char(
unpack(codepoints_per_script[script])))
or "",
count)
end,
m_table.sortedPairs(
counts,
function (script1, script2)
return counts[script1] > counts[script2]
end)),
", "))
end
-- mw.logObject(codepoints_per_script, block[3])
counts:clear()
codepoints_per_script:clear()
end
output:insert "|}"
return output:join()
end
function p.chars_in_scripts_in_block(frame)
local block_info = get_block_info_from_arg(frame.args, 1)
local show_block_name = get_boolean_from_arg(frame.args, 2)
local script_char_set_map = print_char_set_map(
get_chars_in_scripts(fun.range(block_info[1], block_info[2])))
if show_block_name then
return ("%s: %s"):format(block_info[3], script_char_set_map)
else
return script_char_set_map
end
end
function p.search_for_language_codes(frame)
local page_name = frame.args[1] or "English language"
local success, title_object = pcall(mw.title.new, page_name)
if not (success and title_object) then
mw.logf("Could not make title object for '%s'.", page_name)
return
end
local content = title_object:getContent()
local language_codes = {}
for lang_template in content:gmatch "{{lang[^}]+" do
local template_name = lang_template:match("{{([^|}]+)")
local language_code
if template_name == "lang" then
language_code = lang_template:match "{{lang|([^|}]+)"
elseif template_name:find "^lang-" then
language_code = lang_template:match "{{lang-([^|}]+)"
end
if language_code then
language_codes[language_code] = true
end
end
return table.concat(m_table.keysToList(language_codes), ", ")
end
return p
Content Disclaimer
Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.
- The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
- There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
- It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
- Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
- Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.