Module:FParser
Εμφάνιση
![Documentation icon](http://upload.wikimedia.org/wikipedia/commons/thumb/4/43/Test_Template_Info-Icon_-_Version_%282%29.svg/50px-Test_Template_Info-Icon_-_Version_%282%29.svg.png)
Μπορείτε να συμβάλλετε στη δημιουργία σελίδας τεκμηρίωσης για αυτό το Scribunto module. Οι συντάκτες μπορούν να πειραματίζονται στο πρόχειρο (δημιουργία | αντίγραφο) και στις δοκιμαστικές σελίδες (δημιουργία) του module. Παρακαλούμε να προσθέτετε τις κατηγορίες στην υποσελίδα τεκμηρίωσης. Υποσελίδες αυτού του module. |
local tool = require "Module:Tools"
local lexer = {}
local parser = {}
--[[ These parser functions are generic functions to build a parser.
It works with "Lexing" and "Parsing" functions.
The parser is initialized by the "parse" function, who generates a "state" object that must be a parameter of each parsing functions,
and eventually returns the main node of the AST if everything goes well
* Lexing functions have one parameter, the state of the parser, and returns a modified state
if they could find the terminals they are supposed to recognize and or nothing if the parse fails.
* Parsing functions always have a state as unique parameter.
They can be divided into
* Generic one, written in this module that corresponds that helps to build a parser
but don't generate nodes of the AST themselves, like "alternative", "chain", "star" or "plus"
* "chain" corresponds to the concatenation operation in a grammar. for example a function that parses the EBNF rule
twelve = "1", "2" ;
will be generated by chain{lex_char("1"), lex_char("2")}
* "alternative" corresponds to the alternative operation in a grammar, for example a function that parses the EBNF rule
digit_excluding_zero = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
will be written alternative{lex_char("1"), lex_char("2"), lex_char("3"), lex_char("4"), lex_char("5"), lex_char("6"),
lex_char("7"), lex_char("8"), lex_char("9")}
* User written one, that are functions that must generate a node of the AST as a "node" attribute of the "state" object.
To do that they must use attributes of the state object filled by the generic one like
* "parsed", the string parsed by the last lexing function,
* "acc", the list of nodes that are generated by the function that are passed to function that iterates their call namely "star" and "plus"
They return nothing if the parse fails, or the state of the last state returned by a lexer function called if they don't.
Other functions are
* maybe : a function to avoid checking if the state is nil every time : if the state is in a fail state,
it does not apply the parsing or lexing function. Maybe allows to chain the application of lexing and parsing function easily.
attribute of the "newstate" object to be able to access the "acc" or "node" of the previous object in a chain, for example.
* idop : a function that takes a function as parameter that is used to compute an action on the current state in a chain of parsing function,
mainly modifying variables in the closure. Functions passed to idop returns nothing. Usable for example for logging.
* statemodop : a function that takes a function as parameter ; who computes a node in the AST from the state and the variables of the closure and typically adding
it to the state.
Those functions have the same signatures than parsing functions, but typically they do not parse anything.
--]]
--------------------------------------------------------------------------------
-- lexer
--------------------------------------------------------------------------------
local function lex_regex(state, regex)
res = string.match(state.newstate.str, "^" .. regex)
if res then
local newstate = {}
local len = string.len(res)
newstate.str = state.newstate.str:sub(len + 1)
newstate.pos = state.newstate.pos + len
return {['lexed'] = res, ["newstate"] = newstate}
else
return nil
end
end
lexer.lex_regex = lex_regex
local function lex_epsilon(state)
return lex_regex(state, "")
end
--[[
Tests: p.parse("a", p.chain{p.lexer.lex_epsilon, p.lex_char("a"),p.lexer.lex_epsilon})
--]]
lexer.lex_epsilon = lex_epsilon
local lex_char = function(char)
return function(state)
return lex_regex(state, char)
end
end
-- tested with "p.parse("/", p.lex_char('/'))"
lexer.lex_char = lex_char
function lexer.open_parenthesis(state)
return lex_regex(state, "[(]")
end
function lexer.close_parenthesis(state)
return lex_regex(state, "[)]")
end
function lexer.lex_integer(state)
return lex_regex(state, "[0-9]+")
end
function lexer.eat_blanks(state)
return lex_regex(state,' *')
end
parser.lexer = lexer
----------------------------------------------------------
-- generic parser property
----------------------------------------------------------
local function maybe(state, func)
if state then
return func(state)
end
end
parser.maybe = maybe
local function idop(func)
return function(state)
if state then
func(state)
return state
end
end
end
parser.idop = idop
-- this allows avoiding to pass the state beetween each functions if they were called by hand
local function chain(funcs)
return function(state)
local i = 1
local res = funcs[1](state)
while i < #funcs and res do
i = i+1
if funcs[i] == nil then
error("a nil func in chain")
end
res = funcs[i](res)
end
return res
end
end
--[[
Tests :
p.parse("aba", p.chain{p.lex_char('a'), p.lex_char('b'), p.lex_char('a')}) => yes
p.parse("aba", p.chain{p.lex_char('a'), p.lex_char('b')}) => nope
p.parse("aba", p.chain{p.lex_char('a'), p.lex_char('b'), p.lex_char('a'), p.lex_char('a')}) => nope
--]]
parser.chain = chain
-- higher order function that can parse an alternative between several non terminals.
-- returns the state of the first match
local function alternative(funcs)
local i = 1
return function(state)
while i <= #funcs do
res = maybe(state, funcs[i])
if res then
return res
end
i=i+1
end
end
end
--[[
Tests :
p.parse("a", p.alternative{p.lex_char('a'), p.lex_char('b')}) => yes
p.parse("b", p.alternative{p.lex_char('a'), p.lex_char('b')}) => yes
p.parse("c", p.alternative{p.lex_char('a'), p.lex_char('b')}) => nope
--]]
parser.alternative = alternative
function star(parsing_func)
local function star_rec(state, acc, i)
res = chain{
parsing_func,
idop(
function (stat)
table.insert(acc, stat.node)
end
),
}(state)
if res then
return star_rec(res, acc, i + 1)
else
return state, acc
end
end
return function(state)
if state then
local acc = {}
result, acc2 = star_rec(state, acc, 1)
result.acc = acc2
return result
end
end
end
--[[
Tests: p.parse("aaab", p.chain{p.star(p.lex_char("a")), p.lex_char("b")}) => yes
p.parse("b", p.chain{p.star(p.lex_char("a")), p.lex_char("b")}) => yes
--]]
parser.star = star
function plus(parsing_func)
return function(state)
local firstnode
local acc
return chain{
parsing_func,
idop(
function(state)
firstnode = state.node
end
),
star(parsing_func),
function(state)
acc = state.acc
table.insert(acc, 1, firstnode)
state.acc = acc
return state
end
}(state)
end
end
--[[
res = p.parse("aaab",
p.chain{
p.plus(
p.chain{
p.lex_char("a"),
p.statemodop(function(res) res.node="a" ; return res; end)
}
),
p.lex_char("b")
}
)
--]]
parser.plus = plus
--[[
Tests :
p.parse("a", p.chain{p.lex_char('a'), p.idop(function (state) end )}) => yes
p.parse("ab", p.chain{p.lex_char('a'), p.idop(function (state) end), p.lex_char('b') }) => nope
p.parse("ab", p.chain{p.lex_char('a'), p.idop(function (state) end), p.lex_char('a') }) => nope
--]]
function questionmark(parsing_func)
return function(state)
local node = nil
local res=alternative{
chain{
parsing_func,
idop(function (stat) node = stat.node end)
},
lex_epsilon
}(state)
res.node = node
return res
end
end
parser.questionmark = questionmark
--------------------------------------------------------------------------------
-- main function that launches the parsing
--------------------------------------------------------------------------------
parser.parse = function (string, base_parsing_function)
local state = {}
state.newstate = {}
state.newstate.str = string
state.newstate.pos = 1
local res_node = nil
local res = chain{
base_parsing_function,
idop(function(stat) res_node = stat.node end),
lexer.eat_blanks
}(state)
tool.dump_to_console(res)
if res and res.newstate.str=="" then
mw.log('yes')
return res_node
end
mw.log('nope')
return nil
end
return parser