Help with Boost Grammar
- by Decmanc04
I have been using the following win32 console code to try to parse a B Machine Grammar embedded within C++ using Boost Spirit grammar template. I am a relatively new Boost user. The code compiles, but when I run the .exe file produced by VC++2008, the program partially parses the input file. I believe the problem is with my grammar definition or the functions attached as semantic atctions.
The code is given below:
// BIFAnalyser.cpp : Defines the entry point for the console application.
//
//
/*=============================================================================
Copyright (c) Temitope Jos Onunkun 2010
http://www.dcs.kcl.ac.uk/pg/onun/
Use, modification and distribution is subject to the Boost Software
License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
http://www.boost.org/LICENSE_1_0.txt)
=============================================================================*/
////////////////////////////////////////////////////////////////////////////
// //
// B Machine parser using the Boost "Grammar" and "Semantic Actions". //
// //
////////////////////////////////////////////////////////////////////////////
#include <boost/spirit/core.hpp>
#include <boost/tokenizer.hpp>
#include <iostream>
#include <string>
#include <fstream>
#include <vector>
#include <utility>
///////////////////////////////////////////////////////////////////////////////////////////
using namespace std;
using namespace boost::spirit;
///////////////////////////////////////////////////////////////////////////////////////////
//
// Semantic actions
//
////////////////////////////////////////////////////////////////////////////
vector<string> strVect;
namespace
{
//semantic action function on individual lexeme
void do_noint(char const* str, char const* end)
{
string s(str, end);
if(atoi(str))
{
;
}
else
{
strVect.push_back(s);
cout << "PUSH(" << s << ')' << endl;
}
}
//semantic action function on addition of lexemes
void do_add(char const*, char const*)
{
cout << "ADD" << endl;
for(vector<string>::iterator vi = strVect.begin(); vi < strVect.end(); ++vi)
cout << *vi << " ";
}
//semantic action function on subtraction of lexemes
void do_subt(char const*, char const*)
{
cout << "SUBTRACT" << endl;
for(vector<string>::iterator vi = strVect.begin(); vi < strVect.end(); ++vi)
cout << *vi << " ";
}
//semantic action function on multiplication of lexemes
void do_mult(char const*, char const*)
{
cout << "\nMULTIPLY" << endl;
for(vector<string>::iterator vi = strVect.begin(); vi < strVect.end(); ++vi)
cout << *vi << " ";
cout << "\n";
}
//semantic action function on division of lexemes
void do_div(char const*, char const*)
{
cout << "\nDIVIDE" << endl;
for(vector<string>::iterator vi = strVect.begin(); vi < strVect.end(); ++vi)
cout << *vi << " ";
}
//semantic action function on simple substitution
void do_sSubst(char const* str, char const* end)
{
string s(str, end);
//use boost tokenizer to break down tokens
typedef boost::tokenizer<boost::char_separator<char> > Tokenizer;
boost::char_separator<char> sep("-+/*:=()"); // default char separator
Tokenizer tok(s, sep);
Tokenizer::iterator tok_iter = tok.begin();
pair<string, string > dependency; //create a pair object for dependencies
//save first variable token in simple substitution
dependency.first = *tok.begin();
//create a vector object to store all tokens
vector<string> dx;
//
for( ; tok_iter != tok.end(); ++tok_iter) //save all tokens in vector
{
dx.push_back(*tok_iter );
}
vector<string> d_hat; //stores set of dependency pairs
string dep; //pairs variables as string object
for(int unsigned i=1; i < dx.size()-1; i++)
{
dependency.second = dx.at(i);
dep = dependency.first + "|->" + dependency.second + " ";
d_hat.push_back(dep);
}
cout << "PUSH(" << s << ')' << endl;
for(int unsigned i=0; i < d_hat.size(); i++)
cout <<"\n...\n" << d_hat.at(i) << " ";
cout << "\nSIMPLE SUBSTITUTION\n";
}
//semantic action function on multiple substitution
void do_mSubst(char const* str, char const* end)
{
string s(str, end);
//use boost tokenizer to break down tokens
typedef boost::tokenizer<boost::char_separator<char> > Tok;
boost::char_separator<char> sep("-+/*:=()"); // default char separator
Tok tok(s, sep);
Tok::iterator tok_iter = tok.begin();
// string start = *tok.begin();
vector<string> mx;
for( ; tok_iter != tok.end(); ++tok_iter) //save all tokens in vector
{
mx.push_back(*tok_iter );
}
mx.push_back("END\n"); //add a marker "end"
for(unsigned int i=0; i<mx.size(); i++)
{
// if(mx.at(i) == "END" || mx.at(i) == "||" )
// break;
// else if( mx.at(i) == "||")
// do_sSubst(str, end);
// else
// {
// do_sSubst(str, end);
// }
cout << "\nTokens ... " << mx.at(i) << " ";
}
cout << "PUSH(" << s << ')' << endl;
cout << "MULTIPLE SUBSTITUTION\n";
}
}
////////////////////////////////////////////////////////////////////////////
//
// Simple Substitution Grammar
//
////////////////////////////////////////////////////////////////////////////
// Simple substitution grammar parser with integer values removed
struct Substitution : public grammar<Substitution>
{
template <typename ScannerT>
struct definition
{
definition(Substitution const& )
{
multi_subst
= (simple_subst [&do_mSubst]
>> +( str_p("||") >> simple_subst [&do_mSubst])
)
;
simple_subst
= (Identifier >> str_p(":=")
>> expression)[&do_sSubst]
;
Identifier
= alpha_p >> +alnum_p//[do_noint]
;
expression
= term
>> *( ('+' >> term)[&do_add]
| ('-' >> term)[&do_subt]
)
;
term
= factor
>> *( ('*' >> factor)[&do_mult]
| ('/' >> factor)[&do_div]
)
;
factor
= lexeme_d[( (alpha_p >> +alnum_p) | +digit_p)[&do_noint]]
| '(' >> expression >> ')'
| ('+' >> factor)
;
}
rule<ScannerT> expression, term, factor, Identifier, simple_subst,
multi_subst ;
rule<ScannerT> const&
start() const
{
return multi_subst;
}
};
};
////////////////////////////////////////////////////////////////////////////
//
// Main program
//
////////////////////////////////////////////////////////////////////////////
int
main()
{
cout << "************************************************************\n\n";
cout << "\t\t...Machine Parser...\n\n";
cout << "************************************************************\n\n";
// cout << "Type an expression...or [q or Q] to quit\n\n";
//prompt for file name to be input
cout << "Please enter a filename...or [q or Q] to quit:\n\n ";
char strFilename[256]; //file name store as a string object
cin >> strFilename;
ifstream inFile(strFilename); // opens file object for reading
//output file for truncated machine (operations only)
Substitution elementary_subst; // Simple substitution parser object
string str, next;
// inFile.open(strFilename);
while (inFile >> str)
{
getline(cin, next);
str += next;
if (str.empty() || str[0] == 'q' || str[0] == 'Q')
break;
parse_info<> info = parse(str.c_str(), elementary_subst, space_p);
if (info.full)
{
cout << "\n-------------------------\n";
cout << "Parsing succeeded\n";
cout << "\n-------------------------\n";
}
else
{
cout << "\n-------------------------\n";
cout << "Parsing failed\n";
cout << "stopped at: \": " << info.stop << "\"\n";
cout << "\n-------------------------\n";
}
}
cout << "Please enter a filename...or [q or Q] to quit\n";
cin >> strFilename;
return 0;
}
The contents of the file I tried to parse, which I named "mf7.txt" is given below:
debt:=(LoanRequest+outstandingLoan1)*20 || newDebt := loanammount-paidammount
The output when I execute the program is:
************************************************************
...Machine Parser...
************************************************************
Please enter a filename...or [q or Q] to quit:
c:\tplat\mf7.txt
PUSH(LoanRequest)
PUSH(outstandingLoan1)
ADD
LoanRequest outstandingLoan1
MULTIPLY
LoanRequest outstandingLoan1
PUSH(debt:=(LoanRequest+outstandingLoan1)*20)
...
debt|->LoanRequest
...
debt|->outstandingLoan1
SIMPLE SUBSTITUTION
Tokens ... debt
Tokens ... LoanRequest
Tokens ... outstandingLoan1
Tokens ... 20
Tokens ... END
PUSH(debt:=(LoanRequest+outstandingLoan1)*20)
MULTIPLE SUBSTITUTION
-------------------------
Parsing failedstopped at: ": "
-------------------------
My intention is to capture only the variables in the file, which I managed to do up to the "||" string. Clearly, the program is not parsing beyond the "||" string in the input file. I will appreciate assistance to fix the grammar. SOS, please.