Create nfa from a regular expression

"Felix Dorner" <felix.dorner@gmail.com>
6 Oct 2006 17:14:50 -0400

          From comp.compilers

Related articles
Create nfa from a regular expression felix.dorner@gmail.com (Felix Dorner) (2006-10-06)
Re: Create nfa from a regular expression pjb@informatimago.com (Pascal Bourguignon) (2006-10-07)
| List of all articles for this month |
From: "Felix Dorner" <felix.dorner@gmail.com>
Newsgroups: comp.compilers
Date: 6 Oct 2006 17:14:50 -0400
Organization: Compilers Central
Keywords: lex, question
Posted-Date: 06 Oct 2006 17:14:50 EDT

hi,


I Tried To Write A Simple Compiler That Creates A Nondeterministic
Finite Automaton From A Regular Expression, However I Am Really Not
Sure My Solution Is The Way To Go, Most Because I Cannot Figure Out How
To Introduce Operator Precedence. So the given expression is evaluated
simply from the left to the right. (Operator precedence can then be
forced by putting braces) There are some cases that are still to be
catched, but the main work is done. I would be really pleased to get
some comments on my code.


Thanks,
Felix


public class NFAFactory {


private static Stack<NFA> nfaStack;
private static Stack<Character> braceStack;
private static Tokenizer t;


private static int idCounter;


public static NFA makeNFA(String[] literals, String regex) throws
ParseException {
idCounter = 0;
nfaStack = new Stack<NFA>();
braceStack = new Stack<Character>();
t = new Tokenizer(literals, regex);
makeit();
return nfaStack.pop();


}






private static void makeit() throws ParseException {


char token;
while (true) {
token = t.next();
if (token == 0){
if (!braceStack.empty()){
throw new ParseException("ERROR: ) expected");
}
System.out.println("ENDE");
break;
}
else if (token == Tokenizer.LITERAL){
handleLiteral();
}
else if (token == Tokenizer.CONCAT){
handleConcat();
}
else if (token == Tokenizer.UNION){
handleUnion();
}


else if (token == Tokenizer.STAR){
handleStar();
}


else if (token == Tokenizer.LEFTB){
handleLeftBrace();
}


else if (token == Tokenizer.RIGHTB){
handleRightBrace();
break;
}
}


}






private static void handleLiteral() throws ParseException {
if (t.getLastToken() == Tokenizer.RIGHTB){
throw new ParseException("ERROR: ) cannot be followed by a
Literal.");
}
if (t.getLastToken() == Tokenizer.STAR){
throw new ParseException("ERROR: * cannot be followed by a
Literal");
}


NFA n = new NFA();
State s0 = new State(idCounter++);
State s1 = new State(idCounter++);
n.addState(s0);
n.addState(s1);
s0.addTransition(t.getLastLiteral(), s1);
n.setInitialState(s0);
n.setFinalState(s1);
nfaStack.push(n);
}


private static void handleUnion() throws ParseException{
if (t.getLastToken() == Tokenizer.LEFTB){
throw new ParseException("ERROR: ( cannot be followed by |");
}
if (t.getLastToken() == Tokenizer.UNION){
throw new ParseException("ERROR: | cannot be followed by |");
}


if (t.getLastToken() == Tokenizer.CONCAT){
throw new ParseException("ERROR: ; cannot be followed by |");
}


if (nfaStack.isEmpty()) {
throw new ParseException("ERROR: RegExp cannot start with '|'");
}


makeit();
NFA n2 = nfaStack.pop();


if (nfaStack.isEmpty()) {
throw new ParseException("ERROR: RegExp cannot end with '|'");
}


NFA n1 = nfaStack.pop();
n1.addStates(n2.getStates());


State start = new State(idCounter++);
State end = new State(idCounter++);
n1.addState(start);
n1.addState(end);


start.addTransition(null, n1.getInitialState());
start.addTransition(null, n2.getInitialState());


n1.getFinalState().addTransition(null, end);
n2.getFinalState().addTransition(null, end);


n1.setInitialState(start);
n1.setFinalState(end);


nfaStack.push(n1);




}


private static void handleConcat() throws ParseException {
if (t.getLastToken() == Tokenizer.LEFTB){
throw new ParseException("ERROR: ( cannot be followed by ;");
}


if (nfaStack.isEmpty()) {
throw new ParseException("ERROR: RegExp cannot start with ';'");
}
makeit();
concat();
}


private static void handleStar() throws ParseException {
if (t.getLastToken() != Tokenizer.RIGHTB){
throw new ParseException("* must be preceded by )");
}


NFA n1 = nfaStack.pop();
n1.getFinalState().addTransition(null, n1.getInitialState());
n1.getInitialState().addTransition(null, n1.getFinalState());


nfaStack.push(n1);
}


private static void handleLeftBrace() throws ParseException {
braceStack.push(Tokenizer.LEFTB);
// Handle )(, *( , a( as concatenations
if (t.getLastToken() == Tokenizer.LITERAL ||
t.getLastToken() == Tokenizer.RIGHTB ||
t.getLastToken() == Tokenizer.STAR) {


makeit();
concat();
}


else {
makeit();
}
}


private static void handleRightBrace() throws ParseException{
if (braceStack.isEmpty()){
throw new ParseException("ERROR: Too many )īs");
}


if (t.getLastToken() == Tokenizer.CONCAT){
throw new ParseException("ERROR: ; cannot be followed by )");
}
if (t.getLastToken() == Tokenizer.UNION){
throw new ParseException("ERROR: | cannot be followed by )");
}
braceStack.pop();
}






private static void concat() throws ParseException {
NFA n2 = nfaStack.pop();
if (nfaStack.isEmpty()) {
throw new ParseException("ERROR: RegExp cannot end with ';'");
}
NFA n1 = nfaStack.pop();


for (State s : n2.getStates()){
n1.addState(s);
}


n1.getFinalState().addTransition(null, n2.getInitialState());
n1.setFinalState(n2.getFinalState());


nfaStack.push(n1);
}




public static void main(String[] args) throws ParseException {


String[] lit = {"north", "east", "west", "south"};
String exp = "(south)*";


NFA f = NFAFactory.makeNFA(lit, exp);
f.print();
}


}


Post a followup to this message

Return to the comp.compilers page.
Search the comp.compilers archives again.