模块:Nxml

来自Noita Wiki
跳到导航 跳到搜索

此模块的文档可以在模块:Nxml/doc创建

local ffi = nil
if require then
    pcall(function()
        ffi = require("ffi")
    end)
end

local str_sub
local str_index
local str_normalize

if ffi then
    str_normalize = function(str)
        return ffi.cast("const char*", str)
    end

    str_sub = function(ptr, start_idx, len)
        return ffi.string(ptr + start_idx, len)
    end

    str_index = function(ptr, idx)
        return ptr[idx]
    end
else
    str_normalize = function(str) return str end

    str_sub = function(str, start_idx, len)
        return str:sub(start_idx + 1, start_idx + len)
    end

    str_index = function(str, idx)
        return string.byte(str:sub(idx + 1, idx + 1))
    end
end

--[[
 * The following is a Lua port of the NXML parser:
 * https://github.com/xwitchproject/nxml
 *
 * The NXML Parser is heavily based on code from poro
 * https://github.com/gummikana/poro
 * 
 * The poro project is licensed under the Zlib license:
 * 
 * --------------------------------------------------------------------------
 * Copyright (c) 2010-2019 Petri Purho, Dennis Belfrage
 * Contributors: Martin Jonasson, Olli Harjola
 * This software is provided 'as-is', without any express or implied
 * warranty.  In no event will the authors be held liable for any damages
 * arising from the use of this software.
 * Permission is granted to anyone to use this software for any purpose,
 * including commercial applications, and to alter it and redistribute it
 * freely, subject to the following restrictions:
 * 1. The origin of this software must not be misrepresented; you must not
 *    claim that you wrote the original software. If you use this software
 *    in a product, an acknowledgment in the product documentation would be
 *    appreciated but is not required.
 * 2. Altered source versions must be plainly marked as such, and must not be
 *    misrepresented as being the original software.
 * 3. This notice may not be removed or altered from any source distribution.
 * --------------------------------------------------------------------------
]]

local nxml = {}

local TOKENIZER_FUNCS = {}
local TOKENIZER_MT = {
    __index = TOKENIZER_FUNCS,
    __tostring = function(self) return "natif.nxml.tokenizer" end
}

local function new_tokenizer(cstring, len)
    return setmetatable({
        data = cstring,
        cur_idx = 0,
        cur_row = 1,
        cur_col = 1,
        prev_row = 1,
        prev_col = 1,
        len = len
    }, TOKENIZER_MT)
end

local ws = {
    [string.byte(" ")] = true,
    [string.byte("\t")] = true,
    [string.byte("\n")] = true,
    [string.byte("\r")] = true
}

function TOKENIZER_FUNCS:is_whitespace(char)
    local n = tonumber(char)
    return ws[n] or false
end

local punct = {
    [string.byte("<")] = true,
    [string.byte(">")] = true,
    [string.byte("=")] = true,
    [string.byte("/")] = true,
}

function TOKENIZER_FUNCS:is_whitespace_or_punctuation(char)
    local n = tonumber(char)
    return self:is_whitespace(n) or punct[n] or false
end

function TOKENIZER_FUNCS:move(n)
    n = n or 1
    local prev_idx = self.cur_idx
    self.cur_idx = self.cur_idx + n
    if self.cur_idx >= self.len then
        self.cur_idx = self.len
        return
    end
    for i = prev_idx, self.cur_idx - 1 do
        if str_index(self.data, i) == string.byte("\n") then
            self.cur_row = self.cur_row + 1
            self.cur_col = 1
        else
            self.cur_col = self.cur_col + 1
        end
    end
end

function TOKENIZER_FUNCS:peek(n)
    n = n or 1
    local idx = self.cur_idx + n
    if idx >= self.len then return 0 end

    return str_index(self.data, idx)
end

function TOKENIZER_FUNCS:match_string(str)
    local len = #str
    str = str_normalize(str)
    
    for i = 0, len - 1 do
        if self:peek(i) ~= str_index(str, i) then return false end
    end
    return true
end

function TOKENIZER_FUNCS:eof()
    return self.cur_idx >= self.len
end

function TOKENIZER_FUNCS:cur_char()
    if self:eof() then return 0 end
    return tonumber(str_index(self.data, self.cur_idx))
end

function TOKENIZER_FUNCS:skip_whitespace()
    while not self:eof() do
        if self:is_whitespace(self:cur_char()) then
            self:move()
        elseif self:match_string("<!--") then
            self:move(4)
            while not self:eof() and not self:match_string("-->") do
                self:move()
            end

            if self:match_string("-->") then
                self:move(3)
            end
        elseif self:cur_char() == string.byte("<") and self:peek(1) == string.byte("!") then
            self:move(2)
            while not self:eof() and self:cur_char() ~= string.byte(">") do
                self:move()
            end
            if self:cur_char() == string.byte(">") then
                self:move()
            end

        elseif self:match_string("<?") then
            self:move(2)
            while not self:eof() and not self:match_string("?>") do
                self:move()
            end
            if self:match_string("?>") then
                self:move(2)
            end
        else
            break
        end
    end
end

function TOKENIZER_FUNCS:read_quoted_string()
    local start_idx = self.cur_idx
    local len = 0

    while not self:eof() and self:cur_char() ~= string.byte("\"") do
        len = len + 1
        self:move()
    end

    self:move() -- skip "
    return str_sub(self.data, start_idx, len)
end

function TOKENIZER_FUNCS:read_unquoted_string()
    local start_idx = self.cur_idx - 1 -- first char is move()d
    local len = 1

    while not self:eof() and not self:is_whitespace_or_punctuation(self:cur_char()) do
        len = len + 1
        self:move()
    end

    return str_sub(self.data, start_idx, len)
end

local C_NULL = 0
local C_LT = string.byte("<")
local C_GT = string.byte(">")
local C_SLASH = string.byte("/")
local C_EQ = string.byte("=")
local C_QUOTE = string.byte("\"")

function TOKENIZER_FUNCS:next_token()
    self:skip_whitespace()

    self.prev_row = self.cur_row
    self.prev_col = self.cur_col

    if self:eof() then return nil end

    local c = self:cur_char()
    self:move()

    if c == C_NULL then return nil
    elseif c == C_LT then return { type = "<" }
    elseif c == C_GT then return { type = ">" }
    elseif c == C_SLASH then return { type = "/" }
    elseif c == C_EQ then return { type = "=" }
    elseif c == C_QUOTE then return { type = "string", value = self:read_quoted_string() }
    else return { type = "string", value = self:read_unquoted_string() }
    end
end

local PARSER_FUNCS = {}
local PARSER_MT = {
    __index = PARSER_FUNCS,
    __tostring = function(self) return "natif.nxml.parser" end
}

local function new_parser(tokenizer, error_reporter)
    return setmetatable({
        tok = tokenizer,
        errors = {},
        error_reporter = error_reporter or function(type, msg) print("parser error: [" .. type .. "] " .. msg) end
    }, PARSER_MT)
end

local XML_ELEMENT_FUNCS = {}
local XML_ELEMENT_MT = {
    __index = XML_ELEMENT_FUNCS,
    __tostring = function(self)
        return nxml.tostring(self)
    end,
}

function PARSER_FUNCS:report_error(type, msg)
    self.error_reporter(type, msg)
    table.insert(self.errors, { type = type, msg = msg, row = self.tok.prev_row, col = self.tok.prev_col })
end


function PARSER_FUNCS:parse_attr(attr_table, name)
    local tok = self.tok:next_token()
    if tok.type == "=" then
        tok = self.tok:next_token()

        if tok.type == "string" then
            attr_table[name] = tok.value
        else
            self:report_error("missing_attribute_value", string.format("parsing attribute '%s' - expected a string after =, but did not find one"), name)
        end
    else
        self:report_error("missing_equals_sign", string.format("parsing attribute '%s' - did not find equals sign after attribute name", name))
    end
end

function PARSER_FUNCS:parse_element(skip_opening_tag)
    local tok
    if not skip_opening_tag then
        tok = self.tok:next_token()
        if tok.type ~= "<" then
            self:report_error("missing_tag_open", "couldn't find a '<' to start parsing with")
        end
    end

    tok = self.tok:next_token()
    if tok.type ~= "string" then
        self:report_error("missing_element_name", "expected an element name after '<'")
    end

    local elem_name = tok.value
    local elem = nxml.new_element(elem_name)
    local content_idx = 0

    local self_closing = false

    while true do
        tok = self.tok:next_token()

        if tok == nil then
            return elem
        elseif tok.type == "/" then
            if self.tok:cur_char() == C_GT then
                self.tok:move()
                self_closing = true
            end
            break
        elseif tok.type == ">" then
            break
        elseif tok.type == "string" then
            self:parse_attr(elem.attr, tok.value)
        end
    end

    if self_closing then return elem end

    while true do
        tok = self.tok:next_token()

        if tok == nil then
            return elem
        elseif tok.type == "<" then
            if self.tok:cur_char() == C_SLASH then
                self.tok:move()

                local end_name = self.tok:next_token()
                if end_name.type == "string" and end_name.value == elem_name then
                    local close_greater = self.tok:next_token()

                    if close_greater.type == ">" then
                        return elem
                    else
                        self:report_error("missing_element_close", string.format("no closing '>' found for element '%s'", elem_name))
                    end
                else
                    self:report_error("mismatched_closing_tag", string.format("closing element is in wrong order - expected '</%s>', but instead got '%s'", elem_name, tostring(end_name.value)))
                end
                return elem
            else
                local child = self:parse_element(elem, true)
                table.insert(elem.children, child)
            end
        else
            if not elem.content then
                elem.content = {}
            end
            
            content_idx = content_idx + 1
            elem.content[content_idx] = tok.value or tok.type
        end
    end
end

function PARSER_FUNCS:parse_elements()
    local tok = self.tok:next_token()
    local elems = {}
    local elems_i = 1

    while tok and tok.type == "<" do
        elems[elems_i] = self:parse_element(true)
        elems_i = elems_i + 1

        tok = self.tok:next_token()
    end

    return elems
end

local function is_punctuation(str)
    return str == "/" or str == "<" or str == ">" or str == "="
end

function XML_ELEMENT_FUNCS:text()
    local content_count = #self.content

    if self.content == nil or content_count == 0 then
        return ""
    end

    local text = self.content[1]
    for i = 2, content_count do
        local elem = self.content[i]
        local prev = self.content[i - 1]

        if is_punctuation(elem) or is_punctuation(prev) then
            text = text .. elem
        else
            text = text .. " " .. elem
        end
    end

    return text
end

function XML_ELEMENT_FUNCS:add_child(child)
    self.children[#self.children + 1] = child
end

function XML_ELEMENT_FUNCS:add_children(children)
    local children_i = #self.children + 1
    for i = 1, #children do
        self.children[children_i] = children[i]
        children_i = children_i + 1
    end
end

function XML_ELEMENT_FUNCS:remove_child(child)
    for i = 1, #self.children do
        if self.children[i] == child then
            table.remove(self.children, i)
            break
        end
    end
end

function XML_ELEMENT_FUNCS:remove_child_at(index)
    table.remove(self.children, index)
end

function XML_ELEMENT_FUNCS:clear_children()
    self.children = {}
end

function XML_ELEMENT_FUNCS:clear_attrs()
    self.attr = {}
end

function XML_ELEMENT_FUNCS:first_of(element_name)
    local i = 0
    local n = #self.children

    while i < n do
        i = i + 1
        local c = self.children[i]

        if c.name == element_name then return c end
    end

    return nil
end

function XML_ELEMENT_FUNCS:each_of(element_name)
    local i = 1
    local n = #self.children

    return function()
        while i <= n and self.children[i].name ~= element_name do
            i = i + 1
        end
        i = i + 1
        return self.children[i - 1]
    end
end

function XML_ELEMENT_FUNCS:all_of(element_name)
    local table = {}
    local i = 1
    for elem in self:each_of(element_name) do
        table[i] = elem
        i = i + 1
    end
    return table
end

function XML_ELEMENT_FUNCS:each_child()
    local i = 0
    local n = #self.children

    return function()
        while i <= n do
            i = i + 1
            return self.children[i]
        end
    end
end

function XML_ELEMENT_FUNCS:each_child_i() -- Nathan func
    local i = 0
    local n = #self.children

    return function()
        while i < n do
            i = i + 1
            return i,self.children[i] -- yield but weird
        end
    end
end

function nxml.parse(data)
    local data_len = #data
    local tok = new_tokenizer(str_normalize(data), data_len)
    local parser = new_parser(tok)
    
    local elem = parser:parse_element(false)
    
    if not elem or (elem.errors and #elem.errors > 0) then
        error("parser encountered errors")
    end

    return elem
end

function nxml.parse_many(data)
    local data_len = #data
    local tok = new_tokenizer(str_normalize(data), data_len)
    local parser = new_parser(tok)
    
    local elems = parser:parse_elements(false)
    
    for i = 1, #elems do
        local elem = elems[i]

        if elem.errors and #elem.errors > 0 then
            error("parser encountered errors")
        end
    end

    return elems
end

function nxml.new_element(name, attrs)
    return setmetatable({
        name = name,
        attr = attrs or {},
        children = {},
        content = nil
    }, XML_ELEMENT_MT)
end

local function attr_value_to_str(value)
    local t = type(value)
    if t == "string" then return value end
    if t == "boolean" then return value and "1" or "0" end

    return tostring(value)
end

function nxml.tostring(elem, packed, indent_char, cur_indent)
    indent_char = indent_char or "\t"
    cur_indent = cur_indent or ""
    local s = "<" .. elem.name
    local self_closing = #elem.children == 0 and (not elem.content or #elem.content == 0)

    for k, v in pairs(elem.attr) do
        s = s .. " " .. k .. "=\"" .. attr_value_to_str(v) .. "\""
    end

    if self_closing then
        s = s .. " />"
        return s
    end

    s = s .. ">"

    local deeper_indent = cur_indent .. indent_char

    if elem.content and #elem.content ~= 0 then
        if not packed then s = s .. "\n" .. deeper_indent end
        s = s .. elem:text()
    end

    if not packed then s = s .. "\n" end

    for i, v in ipairs(elem.children) do
        if not packed then s = s .. deeper_indent end
        s = s .. nxml.tostring(v, packed, indent_char, deeper_indent)
        if not packed then s = s .. "\n" end
    end

    s = s .. cur_indent .. "</" .. elem.name .. ">"

    return s
end

return nxml