Parser(作りかけ)

amachangyaccパーザを書き始めたので、書き途中のjsParserをとりあえず晒しておく
俺のコードなんて晒し首だ!
晒されて乾いてしまえ!

以下、長いコードが載っているだけなので興味ない人は無視すれ

非終端記号をこんな風に定義して

S : NP VP
NP : N
NP : DET N
VP : V
VP : V NP
VP : VP PP
PP : PREP NP

終端記号をこんな風に定義

N : I, girl, telescope
V : saw, had
DET : a
PREP : with

でもって、

I saw a girl with a telescope

こういう文章を渡す。

そしたら、パージングして構文木を作る(予定)

岩波講座 ソフトウェア科学〈〔知識〕15〉自然言語処理

岩波講座 ソフトウェア科学〈〔知識〕15〉自然言語処理


↑を参考にLR(1)を実装中なんです

gotoグラフと、LRテーブルはできてるんだけどな・・・

/************************************************************
 *
 * @name	:	jsParser.js
 * @description	:
 * @version	:	0.0
 * @author	:	muddydixon <muddydixon@gmail.com>
 * @date	:	
 *
 ************************************************************/
    
/************************************************************
 *	alert function for debuging
 */
var debug = 1;
var _alert = alert;
var alert = function(msg){if(debug) _alert(msg);};

/************************************************************
 *	cons function
 */
var cons = function(x, xs){
    var a = new Array();
    if(x.length && typeof x != 'string' && x.length> 0)
	while(x.length) a.push(x.shift());
    else if(x && x.length == 0){}
    else if(x) a.push(x);
    for(var i in xs) a.push(xs[i]);
    return a;
};

/************************************************************
 *	utility functions
 */
var $ = function(elm){return document.getElementById(elm)};
var nullFilter = function(line){if(!line.match(/^[\s\r\n\b]*$/)) return line};

// @name	:	dontHasA
// @description	:	if as has array a, then return true, else return false
var dontHasA = function(as, a){
    var flg = true;
    for(var i in as){
	var _flg = false;
	if(as[i].length == a.length){
	    for(var j in as[i]) if(as[i][j] != a[j]) _flg = true;
	}else{
	    _flg = true;
	}
	flg = flg && _flg;
    }
    return flg;
};

// @name	:	hasState
// @description	:	if states has state, then return state, else false
var hasState = function(states, state){
    for(var i in states){
	if(sameState(states[i], state)){
	    return states[i];
	}
    }
    return false;
};

// @name	:	sameState
// @description	:	if state x equals y, then return true, else return false
var sameState = function(x, y){
    if(x.eqs.length != y.eqs.length) return false;
    for(var i in x.eqs){
	if(x.eqs[i].length != y.eqs[i].length) return false;
	for(var j in x.eqs[i]){
	    if(x.eqs[i][j] != y.eqs[i][j]) return false;
	}
    }
    return true;
};

/************************************************************
 *	states
 *	...that's too bad...
 */
var states = new Array();

/************************************************************
 *	State object
 */
var State = function(){this.initialize.apply(this, arguments)};
State.prototype = {
    initialize : function(id, state, rules, specialSymbol){
	this.id = id;
	this.eqs = state;
	this.num = 0;
	this.nextSym = new Array();
	this.next = new Array();
	this.action = {};
	this.goto = {};

	this.growState(rules, specialSymbol);
	this.getNextOfPt(rules, specialSymbol);
	var s = hasState(states, this)
	if(!s){
	    states.push(this);
	    this.getNextState(rules, specialSymbol);
	}
    },
    
    // @name		:	growState
    growState : function(_rules, specialSymbol){
	var change = true;
	var eqs = this.eqs;
	var pt = specialSymbol.pt, begin = specialSymbol.begin, end = specialSymbol.end;

	while(change){
	    change = false;
	    for(var i in eqs){
		for(var j in eqs[i]){
		    if(eqs[i][j] == pt && eqs[i][parseInt(j) + 1] && eqs[i][parseInt(j) + 1] != end){
			var r = _rules[eqs[i][parseInt(j) + 1]];
			if(r){
			    for(var k in r){
				var c = cons(eqs[i][parseInt(j) + 1], cons(pt, r[k]));
				if(dontHasA(eqs, c)){
				    eqs.push(c);
				    change = true;
				}
			    }
			}
		    }else{
		    }
		}
	    }
	}
    },
    getNextOfPt : function(_rules, specialSymbol){
	var ns = this.nextSym;
	var eqs = this.eqs;
	var pt = specialSymbol.pt, begin = specialSymbol.begin, end = specialSymbol.end;
	
	for(var i in eqs){
	    for(var j in eqs[i]){
		if(eqs[i][j] == pt && eqs[i][parseInt(j) + 1] && eqs[i][parseInt(j) + 1] != end){
		    if(!ns[eqs[i][parseInt(j) + 1]]) ns[eqs[i][parseInt(j) + 1]] = new Array();
		    ns[eqs[i][parseInt(j) + 1]].push(eqs[i]);
		}
	    }
	}
    },
    getNextState : function(_rules, specialSymbol){
	var ns = this.nextSym;
	var eqs = this.eqs;
	var pt = specialSymbol.pt, begin = specialSymbol.begin, end = specialSymbol.end;

	for(var i in ns){
	    var nns = new Array();
	    for(var j in ns[i]){
		for(var k in ns[i][j]){
		    if(ns[i][j][k] == pt && ns[i][j][parseInt(k) + 1] && ns[i][j][parseInt(k) + 1] != end){
			nns.push(cons(ns[i][j].slice(0,parseInt(k)), cons([ns[i][j][parseInt(k) + 1], pt], ns[i][j].slice(parseInt(k)+2, ns[i][j].length))));
		    }
		}
	    }
	    var n = new State(states.length, nns, _rules, specialSymbol);
	    var s = hasState(states, n);
	    if(s)  this.next[i] = s;
	}
    }
};


/************************************************************
 *	JSparser object
 */
var JSparser = function(){ this.initialize.apply(this, arguments)};
JSparser.prototype = {
    initialize : function(ruleElement, terminalElement){
	this.pt = '__point__';
	this.begin = '__Sp__';
	this.end = '__$__';
	this.first = null;
	this.follow = null;
	this.specialSymbol = {pt: this.pt, begin: this.begin, end: this.end};
	this.stack = new Array();
	
 	this.termRules = this.getTerminals(terminalElement);
	this.rules = this.compileRuleSet(ruleElement);
	this.gotoGraph = this.createGoToGraph(this.rules);
 	this.getFirstAndFollow(this.rules, this.termRules);
  	this.createLRTable(this.rules, this.termRules, this.gotoGraph);

	return this;
    },
    getTerminals : function(srcTerm){
	var srcTerm = $(srcTerm).value.split(/\s*\n\s*/).filter(nullFilter);
	var termRules = {};
	while(srcTerm.length){
	    var BS = srcTerm.shift().split(/\s*:\s*/).filter(nullFilter);
	    if(!termRules[BS[0]]) termRules[BS[0]] = new Array();
	    termRules[BS[0]].push(cons(termRules[BS[0]], BS[1].split(/\s*,\s*/).filter(nullFilter)));
	}
	return termRules;
    },
    compileRuleSet : function(srcRule){
	var begin = this.begin, end = this.end;
	var srcRule = $(srcRule).value.split(/\s*\n\s*/).filter(nullFilter);
	var rules = {};
	rules[begin] = [['S', end]];
	
	for(var i = 0, l = srcRule.length; i < l; i++){
	    var BS = srcRule[i].split(/\s*:\s*/);
	    if(BS.length != 2) return false;

	    var LHS = BS[0];
	    
	    if(!rules[LHS]) rules[LHS] = new Array();
	    var RHSs = BS[1].split(/,\s*/);
	    while(RHSs.length){
		rules[LHS].push(RHSs.pop().split(/\s+/).filter(nullFilter));
	    }
	}
	return rules;
    },
    
    createLRTable : function(_rules, termRules, gotoGraph){
	for(var i in states){
	    for(var t in termRules){
		if(states[i].next[t]){
		    states[i].action[t] = this._shift(states[i].next[t].id);
		}else{
		    for(var j in states[i].eqs){
			if(states[i].eqs[j][states[i].eqs[j].length - 1] == this.pt){
			    var follow = this.follow[states[i].eqs[j][0]];
			    for(var f in follow){
				states[i].action[t] = this._reduce(states[i].eqs[j]);
			    }
			}
		    }
		}
	    }
	    for(var c in _rules){
		if(states[i].next[c]){
		    states[i].goto[c] = states[i].next[c];
		}
	    }
	}
    },

    createGoToGraph : function(_rules){
	return new State(states.length, [[this.begin, this.pt, 'S', this.end]], _rules, this.specialSymbol);
    },
    
    getFirstAndFollow : function(_rules, _terms){
	var begin = this.begin, end = this.end;
	var first = {};
	var follow = {};

	var _open = function(elm){
	    var a = new Array();
	    if(_rules[elm]){  for(var i in _rules[elm])  if(_terms[_rules[elm][i][0]])  a.push(_rules[elm][i][0]);}
	    else{  if(_terms[elm])  a.push(elm);}
	    return a;
	};
	var _createFirst = function(){
	    var first = {};
	    first[end] = {}; first[end][end] = true;
	    
	    for(var x in _rules){
		var _first = {};
		for(var i in _rules[x]){
		    var c = _open(_rules[x][i][0]);
		    while(c.length)
			_first[c.shift()] = true;
		}
		first[x] = _first;
	    }
	    return first;
	};

	var _createFollow = function(){
	    var follow = {};
	    var change = true;
	    follow[begin] = {}; follow[begin][end] = true;

	    while(change){
		change = false;
		for(var x in _rules){
		    if(!follow[x]) follow[x] = {};

		    for(var lhs in _rules){
			for(var i in _rules[lhs]){
			    for(var j in _rules[lhs][i]){
				if(_rules[lhs][i][j] == x){
				    //FOLLOW(Y) | Y -> aX
				    var c = follow[lhs];
				    if(c)
					for(var _c in c){
					    if(!follow[x][_c]) change = true;
					    follow[x][_c] = true;
					}
				    
				    //FIRST(Y) | Z -> aXYb
				    if(_rules[lhs][i][parseInt(j)+1]){
					var c = first[_rules[lhs][i][parseInt(j)+1]];
					if(c)
					    for(var _c in c){
						if(!follow[x][_c]) change = true;
						follow[x][_c] = true;
					    }
				    }
				}
			    }
			}
		    }
		}
	    }
	    return follow;
	};

	first = _createFirst();
 	follow = _createFollow();

	this.first = first
	this.follow = follow;
    },

    _shift : function(i){
	return function(id){
	    alert('shift_'+i);
	    this.stack.push(id);
	    this.stack.push(states[i]);

	    return false;
	}
    },
    _reduce : function(r){
	return function(c_id){
	    alert('reduce_'+r[0]);
	    var t_st = this.stack.pop();
	    var t_id = this.stack.pop();
	    var id = [r[0], t_id];
	    var gt = this.stackTop().goto[r[0]];
	    this.stack.push(id);
	    this.stack.push(gt);
	    
	    return c_id;
	}
    },
    tokenize : function(str){
	return str.split(/\s+/);
    },
    toPTerm : function(token){
	for(var i in this.termRules){
	    for(var j = 0, l = this.termRules[i].length; j < l; j++){
		for(var k = 0, m = this.termRules[i][j].length; k < m; k++){
		    if(token == this.termRules[i][j][k])
			return i;
		}
	    }
	}
	return false;
    },
    stackTop : function(){
	return this.stack[this.stack.length - 1];
    },
    exec : function(line){
	alert(line);
	var tokens = this.tokenize(line);
	this.stack.push(states[0]);
	while(tokens.length){
	    var token = tokens.shift();
	    var id = {p : this.toPTerm(token), token : token};
	    
	    
	    var c_id = true;
	    alert(id.token + ' in ' + id.p + '\n'+this.stackTop().id + '\n' + this.stackTop().action[id.p]);
	    while(c_id = this.stackTop().action[id.p].apply(this, [id])){
		id = c_id;
		alert(id.token + ' in ' + id.p + '\n'+this.stackTop().id + '\n' + this.stackTop().action[id.p]);
	    }
	}
	var a = 'OK';
	return a;
    }
}