Μετάβαση στο περιεχόμενο

Module:FParser

Από τη Βικιπαίδεια, την ελεύθερη εγκυκλοπαίδεια
Documentation icon Τεκμηρίωση module[δημιουργία]
local tool = require "Module:Tools"

local lexer = {}
local parser = {}

--[[ These parser functions are generic functions to build a parser.
It works with "Lexing" and "Parsing" functions. 
The parser is initialized by the "parse" function, who generates a "state" object that must be a parameter of each parsing functions,
                                         and eventually returns the main node of the AST if everything goes well
* Lexing functions have one parameter, the state of the parser, and returns a modified state 
  if they could find the terminals they are supposed to recognize and  or nothing if the parse fails. 
* Parsing functions always have a state as unique parameter. 
  They can be divided into
  * Generic one, written in this module that corresponds that helps to build a parser 
    but don't generate nodes of the AST themselves, like "alternative", "chain", "star" or "plus"  
    * "chain" corresponds to the concatenation operation in a grammar. for example a function that parses the EBNF rule
            twelve  = "1", "2" ;
      will be generated by chain{lex_char("1"), lex_char("2")}
    * "alternative" corresponds to the alternative operation in a grammar, for example a function that parses the EBNF rule 
            digit_excluding_zero = "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ;
       will be written alternative{lex_char("1"), lex_char("2"), lex_char("3"), lex_char("4"), lex_char("5"), lex_char("6"), 
                                   lex_char("7"), lex_char("8"), lex_char("9")}
  * User written one, that are functions that must generate a node of the AST as a "node" attribute of the "state" object.
    To do that they must use attributes of the state object filled by the generic one like 
    * "parsed", the string parsed by the last lexing function,
    * "acc", the list of nodes that are generated by the function that are passed to function that iterates their call namely "star" and "plus" 
    They return nothing if the parse fails, or the state of the last state returned by a lexer function called if they don't.

Other functions are
* maybe : a function to avoid checking if the state is nil every time : if the state is in a fail state, 
  it does not apply the parsing or lexing function. Maybe allows to chain the application of lexing and parsing function easily.
  attribute of the "newstate" object to be able to access the "acc" or "node" of the previous object in a chain, for example.
* idop : a function that takes a function as parameter that is used to compute an action on the current state in a chain of parsing function, 
  mainly modifying variables in the closure. Functions passed to idop returns nothing. Usable for example for logging.
* statemodop : a function that takes a function as parameter ; who computes a node in the AST from the state and the variables of the closure and typically adding 
               it to the state.
               Those functions have the same signatures than parsing functions, but typically they do not parse anything.
--]]

--------------------------------------------------------------------------------
-- lexer
--------------------------------------------------------------------------------

local function lex_regex(state, regex)

	res = string.match(state.newstate.str, "^" .. regex)
	if res then
		local newstate = {}
		local len = string.len(res)
		newstate.str = state.newstate.str:sub(len + 1)
		newstate.pos = state.newstate.pos + len
		return {['lexed'] = res, ["newstate"] = newstate}
	else
		return nil
	end
end

lexer.lex_regex = lex_regex

local function lex_epsilon(state)
	return lex_regex(state, "")
end

--[[
Tests: p.parse("a", p.chain{p.lexer.lex_epsilon, p.lex_char("a"),p.lexer.lex_epsilon})
--]]

lexer.lex_epsilon = lex_epsilon



local lex_char = function(char)
	return function(state)
		return lex_regex(state, char)
	end
end

-- tested with "p.parse("/", p.lex_char('/'))"
lexer.lex_char = lex_char


function lexer.open_parenthesis(state)
	return lex_regex(state, "[(]")
end

function lexer.close_parenthesis(state)
	return lex_regex(state, "[)]")
end

function lexer.lex_integer(state)
	return lex_regex(state, "[0-9]+")
end


function lexer.eat_blanks(state)
	return lex_regex(state,' *')
end

parser.lexer = lexer

----------------------------------------------------------
-- generic parser property
----------------------------------------------------------


local function maybe(state, func)
	if state then
		return func(state)
	end
end

parser.maybe = maybe


local function idop(func)
	return function(state)
		if state then
			func(state)
			return state
		end
	end
end

parser.idop = idop


-- this allows avoiding to pass the state beetween each functions if they were called by hand
local function chain(funcs)
	return function(state)
		local i = 1
		local res = funcs[1](state)
		
		while i < #funcs and res do
			i = i+1
			if funcs[i] == nil then
				error("a nil func in chain")
			end
			res = funcs[i](res)
		end
		return res
	end
end


--[[
Tests : 
p.parse("aba", p.chain{p.lex_char('a'), p.lex_char('b'), p.lex_char('a')}) => yes
p.parse("aba", p.chain{p.lex_char('a'), p.lex_char('b')}) => nope
p.parse("aba", p.chain{p.lex_char('a'), p.lex_char('b'), p.lex_char('a'), p.lex_char('a')}) => nope
--]]

parser.chain = chain

-- higher order function that can parse an alternative between several non terminals.
-- returns the state of the first match
local function alternative(funcs)
	local i = 1

	return function(state)
		while i <= #funcs do
			res = maybe(state, funcs[i])
			if res then 
				return res 
			end
			i=i+1
		end
	end
end

--[[
Tests : 
p.parse("a", p.alternative{p.lex_char('a'), p.lex_char('b')}) => yes
p.parse("b", p.alternative{p.lex_char('a'), p.lex_char('b')}) => yes
p.parse("c", p.alternative{p.lex_char('a'), p.lex_char('b')}) => nope

--]]

parser.alternative = alternative


function star(parsing_func)
	local function star_rec(state, acc, i)

		res = chain{
			parsing_func,
			idop(
				function (stat)
					table.insert(acc, stat.node)
				end
			),
		}(state)
	
		if res then
			return star_rec(res, acc, i + 1)
		else
			return state, acc
		end
	end

	return function(state)
		if state then
			local acc = {}
			result, acc2 = star_rec(state, acc, 1)
			result.acc = acc2
			return result
		end
	end
end

--[[
Tests: p.parse("aaab", p.chain{p.star(p.lex_char("a")), p.lex_char("b")}) => yes
p.parse("b", p.chain{p.star(p.lex_char("a")), p.lex_char("b")}) => yes
--]]

parser.star = star


function plus(parsing_func)
	return function(state)
		local firstnode
		local acc
		
		return chain{
			parsing_func,
			idop(
				function(state)
					firstnode = state.node
				end
			),
			star(parsing_func),
			function(state)
				acc = state.acc
				table.insert(acc, 1, firstnode)
				state.acc = acc
				return state
			end
		}(state)
	end
end


--[[
res = p.parse("aaab", 
              p.chain{
                    p.plus(
                        p.chain{ 
                              p.lex_char("a"), 
                              p.statemodop(function(res) res.node="a" ;  return res; end)
                        }
                    ), 
                    p.lex_char("b")
              }
             )
--]]

parser.plus = plus


--[[
Tests : 
p.parse("a", p.chain{p.lex_char('a'), p.idop(function (state) end )}) => yes
p.parse("ab", p.chain{p.lex_char('a'), p.idop(function (state) end), p.lex_char('b') }) => nope
p.parse("ab", p.chain{p.lex_char('a'), p.idop(function (state) end), p.lex_char('a') }) => nope
--]]


function questionmark(parsing_func)
	return function(state)
		local node = nil
		local res=alternative{
			chain{
				parsing_func,
				idop(function (stat) node = stat.node end)
			},
			lex_epsilon
		}(state)
		res.node = node
		return res
	end
end

parser.questionmark = questionmark

--------------------------------------------------------------------------------
-- main function that launches the parsing
--------------------------------------------------------------------------------

parser.parse = function (string, base_parsing_function)
	local state = {}
	
	state.newstate = {}
	state.newstate.str = string
	state.newstate.pos = 1
	
	local res_node = nil
		
	local res = chain{
		base_parsing_function,
		idop(function(stat) res_node = stat.node end),
		lexer.eat_blanks
	}(state)

	tool.dump_to_console(res)
	
	if res and res.newstate.str=="" then
		mw.log('yes')
		return res_node
	end
	mw.log('nope')
	return nil
end

return parser