arff-mining/src/arff/arff.cpp

183 lines
6.3 KiB
C++
Raw Normal View History

#include "arff.hpp"
#include <iostream>
#include <fstream>
#include <sstream>
#include <algorithm>
namespace ARFF {
bool isVerbose = false;
void ParseArguments(int argc, char* argv[]) {
std::string argument_string;
for (int i = 0; i < argc; ++i) {
argument_string.assign(argv[i]);
if (argument_string == "-v" || argument_string == "--verbose") {
isVerbose = true;
}
}
}
std::string GetDataFilename(void) {
std::string filename;
std::cout << "Please enter name of the data file:\t";
std::cin >> filename;
if (filename.empty()) {
LogError("ARFF/Setup", "No data filename provided, exiting...");
exit(1);
}
std::cout << std::endl;
return filename;
}
void LogInfo(const std::string location, const std::string message) {
if (!isVerbose) { return; }
std::cout << '[' << location << " - INFO] ";
std::cout << message << std::endl;
}
void LogError(const std::string location, const std::string message) {
if (!isVerbose) { return; }
std::cerr << '[' << location << " - ERROR] ";
std::cerr << message << std::endl;
}
AttributeType::AttributeType(std::string attribute) {
this->attribute = attribute;
}
void AttributeType::AddValue(std::string value) {
values.emplace_back(value);
}
Instance::Instance(const int id, const int size) {
this->id = id;
this->values.resize(size);
}
// Read entire data file and parse it
void Arff::Read(std::string filename) {
std::ifstream dataFile(filename);
if (!dataFile.is_open()) {
LogError("ARFF/Read", "Unable to open file with name `"
+ filename + ", exiting...");
exit(1);
}
std::string line;
while (std::getline(dataFile, line)) {
if (line.size() == 1) { continue; }
switch (line.at(0)) {
case '%':
// Comment line in data
continue;
break;
case '@':
AddAttribute(line);
break;
default:
AddData(line);
break;
}
}
TestIntegrity();
}
void Arff::Print(void) {
std::cout << attributeList.size() << " attributes\n";
std::cout << database.size() << " examples\n";
std::cout << std::endl;
std::cout << "Attribute (#): values\n";
for (AttributeType type : attributeList) {
std::cout << type.attribute << " (" << type.values.size() << "):";
for (std::string value : type.values) {
std::cout << " " << value;
}
std::cout << '\n';
}
std::cout << std::endl;
std::cout << relation << '\n';
for (Instance instance: database) {
for (std::string value : instance.values) {
std::cout << '\t' << value;
}
std::cout << '\n';
}
std::cout << std::endl;
}
// Add the attribute to the list
void Arff::AddAttribute(std::string line) {
std::stringstream parser(line);
std::string token;
parser >> token;
// Signifies beginning of data
// Might add a boolean later to mark this
if (token == "@data") {
return;
}
if (token == "@relation") {
parser >> token;
relation = token;
LogInfo("ARFF/Attribute", "Relation set: " + relation);
return;
}
parser >> token;
attributeList.emplace_back(token);
LogInfo("ARFF/Attribute", "Added attribute: " + token);
while (parser >> token) {
// Clean token from outside pieces
token.erase(std::remove(token.begin(), token.end(), '{'), token.end());
token.erase(std::remove(token.begin(), token.end(), '}'), token.end());
token.erase(std::remove(token.begin(), token.end(), ','), token.end());
token.erase(std::remove(token.begin(), token.end(), '\r'), token.end());
token.erase(std::remove(token.begin(), token.end(), '\n'), token.end());
attributeList.back().AddValue(token);
LogInfo("ARFF/Attribute", "Added value: " + token);
}
}
// Add data to runtime database
void Arff::AddData(std::string line) {
std::istringstream parser(line);
std::string token;
int id = 0;
if (!database.empty()) { id = database.back().id + 1; }
database.emplace_back(id, attributeList.size());
LogInfo("ARFF/Data", "Added id: " + std::to_string(database.back().id));
for (int i = 0; i < attributeList.size(); ++i) {
std::getline(parser, token, ',');
token.erase(std::remove(token.begin(), token.end(), '\r'), token.end());
token.erase(std::remove(token.begin(), token.end(), '\n'), token.end());
database.back().values.at(i) = token;
LogInfo("ARFF/Data", "Added instance value: " + token);
}
}
void Arff::TestIntegrity(void) {
for (Instance instance : database) {
int successCheck = 0;
for (int i = 0; i < attributeList.size(); ++i) {
LogInfo("ARFF/Integrity", "Instance value tested: '"
+ instance.values.at(i) + "'");
for (std::string value : attributeList.at(i).values) {
LogInfo("ARFF/Integrity", "attributeList value: '"
+ value + "'");
if (instance.values.at(i) == value) {
LogInfo("ARFF/Integrity", "Value found: " + value);
++successCheck;
break;
}
}
}
if (successCheck != attributeList.size()) {
LogError("ARFF/Integrity", "Value size mismatch: "
+ std::to_string(successCheck) + " out of "
+ std::to_string(attributeList.size()));
exit(1);
}
}
LogInfo("ARFF/Integrity", "All values exist, continuing...");
}
}