Module:Lua lexer
-- <nowiki>
-- Copyright (c) 2018 LoganDark
--
-- Permission is hereby granted, free of charge, to any person obtaining a copy
-- of this software and associated documentation files (the "Software"), to deal
-- in the Software without restriction, including without limitation the rights
-- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-- copies of the Software, and to permit persons to whom the Software is
-- furnished to do so, subject to the following conditions:
--
-- The above copyright notice and this permission notice shall be included in all
-- copies or substantial portions of the Software.
--
-- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-- SOFTWARE.
--- Lexer for Lua source code written in pure Lua.
-- @script lexer
-- @license MIT
-- @author https://github.com/LoganDark
-- @param {string} text Lua source code to lex.
-- @return {string} Table of line arrays containing lexemes.
--- Mapper for individual token list string.
-- @param {string} src List of characters or keywords to map.
-- @param[opt] {table} list Table to extend by reference.
-- @return {{char=true,...}}, map
-- @local
local function lookupify(src, list)
list = list or {}
if type(src) == 'string' then
for i = 1, src:len() do
list[src:sub(i, i)] = true
end
elseif type(src) == 'table' then
for i = 1, #src do
list[src[i]] = true
end
end
return list
end
--- Base identifier character set.
-- @variable {string} base_ident
local base_ident = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
--- Base identifier character set.
-- @variable {string} base_digits
local base_digits = '0123456789'
--- Base identifier character set.
-- @variable {string} base_operators
local base_operators = '+-*/^%#'
--- Map of Lua character patterns.
-- @table chars
-- @field {table} whitespace Boolean map of whitespace
-- tokens.
-- @field {table} validEscapes Boolean map of valid escape
-- characters.
-- @field {table} ident Boolean map of valid identifier
-- characters.
-- @field {table} symbols Boolean map of valid symbol and
-- operator characters.
-- @local
local chars = {
whitespace = lookupify(' \n\t\r'),
validEscapes = lookupify('abfnrtv"\'\\'),
ident = lookupify(
base_ident .. base_digits,
{
start = lookupify(base_ident),
}
),
digits = lookupify(
base_digits,
{
hex = lookupify(base_digits .. 'abcdefABCDEF')
}
),
symbols = lookupify(
base_operators .. ',{}[]();.:', {
equality = lookupify('~=><'),
operators = lookupify(base_operators)
}
)
}
--- List of Lua keywords.
-- @table keywords
-- @field structure Boolean map of structure keywords.
-- @field values Boolean map of primitive keywords.
local keywords = {
structure = lookupify({
'and', 'break', 'do', 'else', 'elseif', 'end', 'for', 'function',
'goto', 'if', 'in', 'local', 'not', 'or', 'repeat', 'return', 'then',
'until', 'while'
}),
values = lookupify({
'true', 'false', 'nil'
})
}
-- Lexer function export.
return function(text)
local pos = 1
local start = 1
local buffer = {}
local lines = {}
local function look(delta)
delta = pos + (delta or 0)
return text:sub(delta, delta)
end
local function get()
pos = pos + 1
return look(-1)
end
local function getDataLevel()
local num = 0
while look(num) == '=' do
num = num + 1
end
if look(num) == '[' then
pos = pos + num + 1
return num
end
end
local function getCurrentTokenText()
return text:sub(start, pos - 1)
end
local currentLineLength = 0
local lineoffset = 0
local function pushToken(type, text)
text = text or getCurrentTokenText()
local tk = buffer[#buffer]
if not tk or tk.type ~= type then
tk = {
type = type,
data = text,
posFirst = start - lineoffset,
posLast = pos - 1 - lineoffset
}
if tk.data ~= '' then
buffer[#buffer + 1] = tk
end
else
tk.data = tk.data .. text
tk.posLast = tk.posLast + text:len()
end
currentLineLength = currentLineLength + text:len()
start = pos
return tk
end
local function newline()
lines[#lines + 1] = buffer
buffer = {}
get()
pushToken('newline')
buffer[1] = nil
lineoffset = lineoffset + currentLineLength
currentLineLength = 0
end
local function getData(level, type)
while true do
local char = get()
if char == '' then
return
elseif char == '\n' then
pos = pos - 1
pushToken(type)
newline()
elseif char == ']' then
local valid = true
for i = 1, level do
if look() == '=' then
pos = pos + 1
else
valid = false
break
end
end
if valid and look() == ']' then
pos = pos - level - 1
return
end
end
end
end
local function chompWhitespace()
while true do
local char = look()
if char == '\n' then
pushToken('whitespace')
newline()
elseif chars.whitespace[char] then
pos = pos + 1
else
break
end
end
pushToken('whitespace')
end
while true do
chompWhitespace()
local char = get()
if char == '' then
break
elseif char == '-' and look() == '-' then
pos = pos + 1
if look() == '[' then
pos = pos + 1
local level = getDataLevel()
if level then
getData(level, 'comment')
pos = pos + level + 2
pushToken('comment')
else
while true do
local char2 = get()
if char2 == '' or char2 == '\n' then
pos = pos - 1
pushToken('comment')
if char2 == '\n' then
newline()
end
break
end
end
end
else
while true do
local char2 = get()
if char2 == '' or char2 == '\n' then
pos = pos - 1
pushToken('comment')
if char2 == '\n' then
newline()
end
break
end
end
end
pushToken('comment')
elseif char == '\'' or char == '"' then
pushToken('string_start')
while true do
local char2 = get()
if char2 == '\\' then
pos = pos - 1
pushToken('string')
get()
local char3 = get()
if chars.digits[char3] then
for i = 1, 2 do
if chars.digits[look()] then
pos = pos + 1
end
end
elseif char3 == 'x' then
if chars.digits.hex[look()] and chars.digits.hex[look(1)] then
pos = pos + 2
else
pushToken('unidentified')
end
elseif char3 == '\n' then
pos = pos - 1
pushToken('escape')
newline()
elseif not chars.validEscapes[char3] then
pushToken('unidentified')
end
pushToken('escape')
elseif char2 == '\n' then
pos = pos - 1
pushToken('string')
newline()
break
elseif char2 == char or char2 == '' then
pos = pos - 1
pushToken('string')
get()
break
end
end
pushToken('string_end')
elseif chars.ident.start[char] then
while chars.ident[look()] do
pos = pos + 1
end
local word = getCurrentTokenText()
if keywords.structure[word] then
pushToken('keyword')
elseif keywords.values[word] then
pushToken('value')
else
pushToken('ident')
end
elseif chars.digits[char] or (char == '.' and chars.digits[look()]) then
if char == '0' and look() == 'x' then
pos = pos + 1
while chars.digits.hex[look()] do
pos = pos + 1
end
else
while chars.digits[look()] do
pos = pos + 1
end
if look() == '.' then
pos = pos + 1
while chars.digits[look()] do
pos = pos + 1
end
end
if look():lower() == 'e' then
pos = pos + 1
if look() == '-' then
pos = pos + 1
end
while chars.digits[look()] do
pos = pos + 1
end
end
end
pushToken('number')
elseif char == '[' then
local level = getDataLevel()
if level then
pushToken('string_start')
getData(level, 'string')
pushToken('string')
pos = pos + level + 2
pushToken('string_end')
else
pushToken('symbol')
end
elseif char == '.' then
if look() == '.' then
pos = pos + 1
if look() == '.' then
pos = pos + 1
end
end
if getCurrentTokenText():len() == 3 then
pushToken('vararg')
else
pushToken('symbol')
end
elseif char == ':' and look() == ':' then
get()
pushToken('label_start')
chompWhitespace()
if chars.ident.start[look()] then
get()
while chars.ident[look()] do
get()
end
pushToken('label')
chompWhitespace()
if look() == ':' and look(1) == ':' then
get()
get()
pushToken('label_end')
end
end
elseif chars.symbols.equality[char] then
if look() == '=' then
pos = pos + 1
end
pushToken('operator')
elseif chars.symbols[char] then
if chars.symbols.operators[char] then
pushToken('operator')
else
pushToken('symbol')
end
else
pushToken('unidentified')
end
end
lines[#lines + 1] = buffer
return lines
end
Content Disclaimer
Informasi ini disarikan dari Wikipedia dan disajikan kembali untuk tujuan edukasi. Konten tersedia di bawah lisensi CC BY-SA 3.0. Kami tidak bertanggung jawab atas ketidakakuratan data yang bersumber dari kontribusi publik tersebut.
- The information displayed on this website is sourced in part or in whole from Wikipedia and has been adapted for the purpose of restating it. We strive to provide accurate and relevant information, however:
- There is no guarantee of absolute accuracy. Wikipedia is an open, collaborative project that can be edited by anyone, so information is subject to change.
- It is not intended to constitute professional advice. The content displayed is for informational and educational purposes only. For important decisions (e.g., medical, legal, or financial), please consult a professional.
- Content copyright. Wikipedia is licensed under the Creative Commons Attribution-ShareAlike License (CC BY-SA). This means that content may be reused with appropriate attribution and shared under a similar license.
- Responsible use. Any risk arising from the use of information from this website is entirely the responsibility of the user.