#include "arff.hpp" #include #include #include #include namespace ARFF { bool isVerbose = false; void ParseArguments(int argc, char* argv[]) { std::string argument_string; for (int i = 0; i < argc; ++i) { argument_string.assign(argv[i]); if (argument_string == "-v" || argument_string == "--verbose") { isVerbose = true; } } } std::string GetDataFilename(void) { std::string filename; std::cout << "Please enter name of the data file:\t"; std::cin >> filename; if (filename.empty()) { LogError("ARFF/Setup", "No data filename provided, exiting..."); exit(1); } std::cout << std::endl; return filename; } void LogInfo(const std::string location, const std::string message) { if (!isVerbose) { return; } std::cout << '[' << location << " - INFO] "; std::cout << message << std::endl; } void LogError(const std::string location, const std::string message) { if (!isVerbose) { return; } std::cerr << '[' << location << " - ERROR] "; std::cerr << message << std::endl; } AttributeType::AttributeType(std::string attribute) { this->attribute = attribute; } void AttributeType::AddValue(std::string value) { values.emplace_back(value); } Instance::Instance(const int id, const int size) { this->id = id; this->values.resize(size); } // Read entire data file and parse it void Arff::Read(std::string filename) { std::ifstream dataFile(filename); if (!dataFile.is_open()) { LogError("ARFF/Read", "Unable to open file with name `" + filename + ", exiting..."); exit(1); } std::string line; while (std::getline(dataFile, line)) { if (line.size() == 1) { continue; } switch (line.at(0)) { case '%': // Comment line in data continue; break; case '@': AddAttribute(line); break; default: AddData(line); break; } } TestIntegrity(); } // Add the attribute to the list void Arff::AddAttribute(std::string line) { std::stringstream parser(line); std::string token; parser >> token; // Signifies beginning of data // Might add a boolean later to mark this if (token == "@data") { return; } if (token == "@relation") { parser >> token; relation = token; LogInfo("ARFF/Attribute", "Relation set: " + relation); return; } parser >> token; attributeList.emplace_back(token); LogInfo("ARFF/Attribute", "Added attribute: " + token); while (parser >> token) { // Clean token from outside pieces token.erase(std::remove(token.begin(), token.end(), '{'), token.end()); token.erase(std::remove(token.begin(), token.end(), '}'), token.end()); token.erase(std::remove(token.begin(), token.end(), ','), token.end()); token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); attributeList.back().AddValue(token); LogInfo("ARFF/Attribute", "Added value: " + token); } } // Add data to runtime database void Arff::AddData(std::string line) { std::istringstream parser(line); std::string token; int id = 0; if (!database.empty()) { id = database.back().id + 1; } database.emplace_back(id, attributeList.size()); LogInfo("ARFF/Data", "Added id: " + std::to_string(database.back().id)); for (int i = 0; i < attributeList.size(); ++i) { std::getline(parser, token, ','); token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); database.back().values.at(i) = token; LogInfo("ARFF/Data", "Added instance value: " + token); } } void Arff::TestIntegrity(void) { for (Instance instance : database) { int successCheck = 0; for (int i = 0; i < attributeList.size(); ++i) { LogInfo("ARFF/Integrity", "Instance value tested: '" + instance.values.at(i) + "'"); for (std::string value : attributeList.at(i).values) { LogInfo("ARFF/Integrity", "attributeList value: '" + value + "'"); if (instance.values.at(i) == value) { LogInfo("ARFF/Integrity", "Value found: " + value); ++successCheck; break; } } } if (successCheck != attributeList.size()) { LogError("ARFF/Integrity", "Value size mismatch: " + std::to_string(successCheck) + " out of " + std::to_string(attributeList.size())); exit(1); } } LogInfo("ARFF/Integrity", "All values exist, continuing..."); } }