Below is my lexeme code. When taking in input, I cant seem to get the correct output. I believe the issue lies in im split function. Can someone help? I'll also include images of the correct output and what output in getting.My output Correct output Also, this is the input for the program Input Can anyone tell me what im doing wrong ?
#include <stdlib.h>
#include <vector>
#include <string>
#include <cstring>
#include <iostream>
#include <algorithm>
using namespace std;
void lexEme(string str);
string getTokenID(string str);
vector <string> tokenize(string str);
string ReplaceAll(string str, string from, string to);
bool is_number(string s);
bool isalphanum(string s);
bool isOperator(char str);
vector<string> split(string str, string token);
vector<string> simpleSplit(string str, string token);
static bool is_decimal(string str);
string merge(vector<string> x);
string toLower(string str);
string toUpper(string str);
bool contain(string str , char token);
vector<string> betweenQuotes(string str);
//Store alphabet we are matching
int main(){
cout<< "Enter string" << endl;
string x;
vector<string> in;
while (getline(cin , x)){
if(x.empty()){
break;
}
in.push_back(x);
}
for(int i = 0;i < in.size(); i++){
lexEme(in[i]);
}
cout << "done" << endl;
return 0;
}
//This handles the lexical analysis
void lexEme(string str){
//We store our broken up string in here
vector<string> tokens;
//We handle some preparsing to avoid potential errors when reading lines with ( and )
str = ReplaceAll(str, "(", " ( ");
str = ReplaceAll(str, ")", " ) ");
str = ReplaceAll(str , "\n" , " ");
bool base = true;
bool hasStrings = false;
vector<string> temp;
//We assign tokens to our tokenized string
// str = ReplaceAll(str , "\"" , "?\"" );
for (int k = 0; k < str.length(); ++k) {
if(str[k] == '"') hasStrings = true;
if(str[k] == ' ') base = false;
}
if(base){
// cout << "this is base" << endl;
if (isalphanum(str) && isdigit(str[0])){
int iterate = 0;
string num;
string t;
while (iterate < str.length()) {
if (isalpha(str[iterate])) {
num += str[iterate];
} else {
t += str[iterate];
}
iterate++;
}
temp.push_back(t);
temp.push_back(num);
temp = split(str , " ");
cout << "lexeme: |" + temp[0] + "| length:" + to_string(temp[0].size()) + " token: " << getTokenID(temp[0]) << endl;
cout << "lexeme: |" + temp[1] + "| length:" + to_string(temp[1].size()) + " token: " << getTokenID(temp[1]) << endl;
return;
}else if(!isalphanum(str)){
// int index = 0;
}
else{
cout << "lexeme: |" + str + "| length:" + to_string(str.size()) + " token: " << getTokenID(str) << endl;
}
}else{
if(hasStrings) {
temp = split(str, "\"");
for (int i = 0; i < temp.size(); i++) {
if (i % 2 == 0) {
vector<string> hold = split(temp[i], " ");
for (int j = 0; j < hold.size(); j++) {
tokens.push_back(hold[j]);
}
} else {
if (temp[i][temp[i].length()] == '?' && temp[i][0] == '?') {
temp[i] = ReplaceAll(temp[i], "?", "\"");
tokens.push_back(temp[i]);
} else {
temp[i] = ReplaceAll(temp[i], "?", "\"");
tokens.push_back(temp[i]);
}
}
}
}else{
tokens = simpleSplit(str , " ");
// cout << "breakpt" << endl;
}
//Here we iterate and print out our results
for (int i = 0; i < tokens.size(); ++i) {
if(tokens[i] != " " && tokens.size() > 0) {
cout << "lexeme: |" + tokens[i] + "| length:" + to_string(tokens[i].size()) + " token: "
<< getTokenID(tokens[i]) << endl;
}
}
}
}
bool contain(string str , char token){
for (char i : str) {
if(i == token){
return true;
}
}
}
vector<string> simpleSplit(string str, string token) {
vector<string> result;
vector<string> finalResults;
while (str.size()) {
int index = str.find(token);
if (index != string::npos) {
result.push_back(str.substr(0, index));
str = str.substr(index + token.size());
if (str.size() == 0)result.push_back(str);
} else {
result.push_back(str);
str = "";
}
}
return result;
}
vector<string> split(string str, string token){
vector<string>result;
vector<string> finalResults;
while(str.size()){
int index = str.find(token);
if(index!=string::npos){
result.push_back(str.substr(0,index));
str = str.substr(index+token.size());
if(str.size()==0)result.push_back(str);
}else{
result.push_back(str);
str = "";
}
}
//clean out
for (int i = 0; i < result.size();i++){
if (!result[i].empty() && result[i] != " " && result[i].length() > 0){
//Weird cases like 123abc
if(isalphanum(result[i]) && isdigit(result[i][0])){
int iterate = 0;
string num;
string t;
while (iterate < result[i].length()) {
if (isalpha(result[i][iterate])) {
num += result[i][iterate];
} else {
t += result[i][iterate];
}
iterate++;
}
finalResults.push_back(t);
finalResults.push_back(num);
}else if(isalphanum(result[i])){
for(int i2 = 0; i2 < result[i].length(); i2++){
string tmp ="";
if (!isalnum(result[i][i2]) && isOperator(result[i][i2]))
if(tmp.length() < 2) {
tmp += result[i][i2];
}
finalResults.push_back(tmp);
tmp = "";
}
}else if(i != result.size() - 1){
if(result[i + 1][result[i + 1].length()] == '"'){
result[i] = result[i] + " " + result[i + 1];
result[i + 1] = " ";
finalResults.push_back(result[i]);
i++;
}
}
finalResults.push_back(result[i]);
}
}
vector<string> reclean;
for(int i = 0; i < finalResults.size(); i++){
if (finalResults[i].length() > 0 && finalResults[i].length() != ' '){
ReplaceAll(finalResults[i] , "?" , "");
reclean.push_back(finalResults[i]);
}
}
return reclean;
}
//This function handles encoding tokens.
string getTokenID(string str){
string id = ""; //our result will be stored here
//Here we create 2 arrays for each section, the 1st represents the value of our identifiers,
// the 2nd represents the respective encoding
vector<string> keywords = {"if","else","for","while","print","return","continue","break","debug","read","let"};
vector<string> keywordsEnc = {"1001","1002","1003","1004","1005","1006","1007","1008","1009","1010","1011"};
vector<string> datatypes = {"int" , "float" , "string"};
vector<string> datatypesEnc = {"1100" , "1101" , "1102"};
vector<string> punctuations = {";" , "(" , ")" , "[" , "]" , "{" , "}" , ","};
vector<string> punctuationsEnc = {"2000" , "2001" , "2002" , "2003" , "2004" , "2005" , "2006" , "2007"};
vector<string> operators = {"+" , "-" , "*" , "/" , ":=" , "==" , "<" , ">" , "<>" , "and" , "or" , "not" , "length"};
vector<string> operatorsEnc = {"3000" , "3001" , "3002" , "3003" , "3004" , "3005" , "3006" , "3007" , "3008" , "3009" , "3010" , "3011" , "3012"};
vector<string> abstractions = {"identifier" , "integer literal" , "floating-point literal" , "End of file" , "Unknown lexeme"};
vector<string> abstractionsEnc = {"4000" , "4001" , "4002" , "4003" , "5000" , "6000"};
//Now we run through and determine where our cases match.
for (int i = 0; i < keywords.size(); ++i) {
if (str == keywords[i]){
id = keywordsEnc[i];
return id;
}
}
for (int i = 0; i < datatypes.size(); ++i) {
if (str == datatypes[i]){
id = datatypesEnc[i];
return id;
}
}
for (int i = 0; i < punctuations.size(); ++i) {
if (str == punctuations[i]){
id = punctuationsEnc[i];
return id;
}
}
for (int i = 0; i < operators.size(); ++i) {
if (str == operators[i]){
id = operatorsEnc[i];
return id;
}
}
for (int i = 0; i < abstractions.size(); ++i) {
if (str == abstractions[i]){
id = abstractionsEnc[i];
return id;
}
}
//Special conditions for strings, decimals and integers are handled below
if(id == "") {
if (str[0] == '"' && str[str.length()] == '"'){
id = "4003";
return id;
} else if (is_number(str)){
id = "4001";
return id;
}else if(is_decimal(str)) {
id = "4002";
return id;
}else if(str == toUpper(str)){
if(str == toLower(str) && str[0] != EOF){
id = "6000";
return id;
}else{
id = "5001";
return id;
}
}else{
id = "4000";
return id;
}
}
return id;
}
string toLower(string str)
{
std::transform(str.begin(), str.end(), str.begin(), ::tolower);
return str;
}
bool isalphanum(string str){
int i = 0;
while(i < str.length()){
return isalnum(str[i]) != 0;
}
}
string toUpper(string str)
{
std::transform(str.begin(), str.end(), str.begin(), ::toupper);
return str;
}
//Checks to see if s is an integer
bool is_number(string s)
{
string::const_iterator it = s.begin();
while (it != s.end() && std::isdigit(*it)) ++it;
return !s.empty() && it == s.end();
}
//Combines all in x and returns as one string
string merge(vector<string> x){
string ans;
for (int i = 0; i < x.size(); ++i) {
ans += x[i];
}
}
//Checks to see if str is a decimal or float
static bool is_decimal(string str){
string::const_iterator it = str.begin();
bool decimalPoint = false;
int minSize = 0;
if(str.size()>0 && (str[0] == '-' || str[0] == '+')){
it++;
minSize++;
}
while(it != str.end()){
if(*it == '.'){
if(!decimalPoint) decimalPoint = true;
else break;
}else if(!isdigit(*it) && ((*it!='f') || it+1 != str.end() || !decimalPoint)){
break;
}
++it;
}
return str.size()>minSize && it == str.end();
}
string ReplaceAll(string str, string from, string to) {
size_t start_pos = 0;
while((start_pos = str.find(from, start_pos)) != std::string::npos) {
str.replace(start_pos, from.length(), to);
start_pos += to.length(); // Handles case where 'to' is a substring of 'from'
}
return str;
}
bool isOperator(char str){
string operators[] = {"+", "-", "/", "*", "%", "^", ">", "<"};
for (int i = 0; i < operators->length(); ++i) {
if (str == operators[i][0]){
return true;
}
}
}
vector<string> operatorExtractor(string str){
vector<string> ans;
string temp;
int index = 0;
while (index < str.length()) {
if (isOperator(str[index])) {
temp += str[index];
if(isOperator(str[index + 1] && str[index + 1] != '<' && str[index + 1] != '>')){
temp += str[index + 1];
ans.push_back(temp);
temp = "";
}
}
}
}
Aucun commentaire:
Enregistrer un commentaire