Menu

Recursive Descent Simple Calculator Explained

Here is the code for a recursive descent parser for a simple calculator ... it only evaluates one expression, though it could be easily modified to do repeated evaluations. The only real trick here is to notice how semantic values are passed between the scanner and the parser (nothing new there), and how we deal with the "tail" productions that we're forced to use for top-down parsing.

/*** This is the file: rd.cpp ***/

/****************************************
Recursive descent parser for a simple
language.

Grammar for simple arithmetic.  Notice
the odd form of exp and term rules: We
had to write them without "left-recursion"
so that we could parse top-down:

stmt -> exp STOP
exp -> term etail
etail -> OPA term etail | e
term -> factor ttail
ttail -> OPM factor ttail | e
factor -> LP exp RP | NUM
*****************************************/
#include "rd.h"

//-- Globals
int next = -1, nextval = -1, yylval;

//-- Helper functions
void perror() { 
  cerr << "Parse error!" << endl; exit(1); }
int peek() { 
  if (next == -1) { next = yylex(); nextval = yylval; } 
  return next; }
// NOTE: match returns value of token if sucessful
int match(int tok) { 
  if (tok == peek()) { next = -1; return nextval; }
  else  perror(); }

//-- Prototypes
int stmt(); int exp(); int term();  int factor();
// For both etail & ttail, x is the value of the left-
// hand side from which the tail is called.  Inside 
// the tail functions we apply the operator to x and
// the right-hand side y that we read in.  The return
// value should be the total sum/product.
int etail(int x); 
int ttail(int x); 

//-- Grammar rule functions
int stmt() { int x = exp(); match(STOP); return x; }
int exp() {  int x = term(); return etail(x); }
int etail(int x) {
  switch(peek()) {
  case OPA: char c = match(OPA); int y = term(); 
    return etail(c == '+' ? x + y : x - y); break;
  case RP: case STOP: return x; break;
  default: perror(); break;
 }
}
int term() { int x = factor(); ttail(x); }
int ttail(int x) {
  switch(peek()) {
  case OPM: char c = match(OPM); int y = factor(); 
    return ttail(c == '*' ? x*y : x/y); break;
  case RP: case STOP: case OPA: return x; break;
  default: perror(); break;    
  }
}
int factor() {
  switch(peek()) {
  case LP: match(LP); int x = exp(); match(RP); return x; break;
  case NUM: int y = match(NUM); return y; break;
  default: perror(); break;
  }
}

int main() { cout << stmt() << endl; }
/*** This is the file: rd.h ***/
#include <iostream>
using namespace std;

const int NUM = 0, OPA = 1, OPM = 2, LP = 3, RP = 4, STOP = 5;

extern int yylval; // All semantic values of tokens are int's

int yylex();
/*** This is the file: rd.l ***/
%{
#include "rd.h"
%}

%option noyywrap

%%

[0-9]+   { yylval = atoi(yytext); return NUM; }
[\+\-]   { yylval = yytext[0]; return OPA; }
[\*/]    { yylval = yytext[0]; return OPM; }
"("      { return LP; }
")"      { return RP; }
";"      { return STOP; }
<<EOF>>  { return 0; }
[ \t\n]+ { }
.        { cerr << "Unrecognized token!" << endl; exit(1); }
%%
def:
        flex rd.l
        g++ -o mycalc lex.yy.c rd.cpp

Christopher W Brown
Last modified: Wed Sep 23 14:35:43 EDT 2009