From d30428dc4f82ddc60daa42217856cb73daebe8fb Mon Sep 17 00:00:00 2001 From: Trianta <56975502+Trimutex@users.noreply.github.com> Date: Mon, 18 Mar 2024 19:19:47 -0500 Subject: [PATCH] Added reading in data and a verbose mode --- src/arff/CMakeLists.txt | 1 + src/arff/arff.cpp | 157 ++++++++++++++++++++++++++++++++++++++++ src/arff/arff.hpp | 43 +++++++++++ src/arff/main.cpp | 13 +++- 4 files changed, 211 insertions(+), 3 deletions(-) create mode 100644 src/arff/arff.cpp create mode 100644 src/arff/arff.hpp diff --git a/src/arff/CMakeLists.txt b/src/arff/CMakeLists.txt index 2b5d56b..524fe0c 100644 --- a/src/arff/CMakeLists.txt +++ b/src/arff/CMakeLists.txt @@ -1,4 +1,5 @@ add_executable(arff ./main.cpp + ./arff.cpp ) diff --git a/src/arff/arff.cpp b/src/arff/arff.cpp new file mode 100644 index 0000000..a247e78 --- /dev/null +++ b/src/arff/arff.cpp @@ -0,0 +1,157 @@ +#include "arff.hpp" +#include +#include +#include +#include + +namespace ARFF { + bool isVerbose = false; + + void ParseArguments(int argc, char* argv[]) { + std::string argument_string; + for (int i = 0; i < argc; ++i) { + argument_string.assign(argv[i]); + if (argument_string == "-v" || argument_string == "--verbose") { + isVerbose = true; + } + } + } + + std::string GetDataFilename(void) { + std::string filename; + std::cout << "Please enter name of the data file:\t"; + std::cin >> filename; + if (filename.empty()) { + LogError("ARFF/Setup", "No data filename provided, exiting..."); + exit(1); + } + std::cout << std::endl; + return filename; + } + + void LogInfo(const std::string location, const std::string message) { + if (!isVerbose) { return; } + std::cout << '[' << location << " - INFO] "; + std::cout << message << std::endl; + } + + void LogError(const std::string location, const std::string message) { + if (!isVerbose) { return; } + std::cerr << '[' << location << " - ERROR] "; + std::cerr << message << std::endl; + } + + AttributeType::AttributeType(std::string attribute) { + this->attribute = attribute; + } + + void AttributeType::AddValue(std::string value) { + values.emplace_back(value); + } + + Instance::Instance(const int id, const int size) { + this->id = id; + this->values.resize(size); + } + + // Read entire data file and parse it + void Arff::Read(std::string filename) { + std::ifstream dataFile(filename); + if (!dataFile.is_open()) { + LogError("ARFF/Read", "Unable to open file with name `" + + filename + ", exiting..."); + exit(1); + } + std::string line; + while (std::getline(dataFile, line)) { + if (line.size() == 1) { continue; } + switch (line.at(0)) { + case '%': + // Comment line in data + continue; + break; + case '@': + AddAttribute(line); + break; + default: + AddData(line); + break; + } + } + TestIntegrity(); + } + + // Add the attribute to the list + void Arff::AddAttribute(std::string line) { + std::stringstream parser(line); + std::string token; + parser >> token; + // Signifies beginning of data + // Might add a boolean later to mark this + if (token == "@data") { + return; + } + if (token == "@relation") { + parser >> token; + relation = token; + LogInfo("ARFF/Attribute", "Relation set: " + relation); + return; + } + parser >> token; + attributeList.emplace_back(token); + LogInfo("ARFF/Attribute", "Added attribute: " + token); + while (parser >> token) { + // Clean token from outside pieces + token.erase(std::remove(token.begin(), token.end(), '{'), token.end()); + token.erase(std::remove(token.begin(), token.end(), '}'), token.end()); + token.erase(std::remove(token.begin(), token.end(), ','), token.end()); + token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); + token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); + attributeList.back().AddValue(token); + LogInfo("ARFF/Attribute", "Added value: " + token); + } + } + + // Add data to runtime database + void Arff::AddData(std::string line) { + std::istringstream parser(line); + std::string token; + int id = 0; + if (!database.empty()) { id = database.back().id + 1; } + database.emplace_back(id, attributeList.size()); + LogInfo("ARFF/Data", "Added id: " + std::to_string(database.back().id)); + for (int i = 0; i < attributeList.size(); ++i) { + std::getline(parser, token, ','); + token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); + token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); + database.back().values.at(i) = token; + LogInfo("ARFF/Data", "Added instance value: " + token); + } + } + + void Arff::TestIntegrity(void) { + for (Instance instance : database) { + int successCheck = 0; + for (int i = 0; i < attributeList.size(); ++i) { + LogInfo("ARFF/Integrity", "Instance value tested: '" + + instance.values.at(i) + "'"); + for (std::string value : attributeList.at(i).values) { + LogInfo("ARFF/Integrity", "attributeList value: '" + + value + "'"); + if (instance.values.at(i) == value) { + LogInfo("ARFF/Integrity", "Value found: " + value); + ++successCheck; + break; + } + } + } + if (successCheck != attributeList.size()) { + LogError("ARFF/Integrity", "Value size mismatch: " + + std::to_string(successCheck) + " out of " + + std::to_string(attributeList.size())); + exit(1); + } + } + LogInfo("ARFF/Integrity", "All values exist, continuing..."); + } +} diff --git a/src/arff/arff.hpp b/src/arff/arff.hpp new file mode 100644 index 0000000..7334c85 --- /dev/null +++ b/src/arff/arff.hpp @@ -0,0 +1,43 @@ +#ifndef ARFF_HPP +#define ARFF_HPP + +#include +#include + +namespace ARFF { + void ParseArguments(int argc, char* argv[]); + std::string GetDataFilename(void); + void LogInfo(const std::string location, const std::string message); + void LogError(const std::string location, const std::string message); + + struct AttributeType { + public: + std::string attribute; + std::vector values; + AttributeType(std::string attribute); + void AddValue(std::string value); + }; + + struct Instance { + public: + Instance(const int id, const int size); + unsigned int id; + std::vector values; + }; + + class Arff { + public: + Arff() = default; + void Read(std::string filename); + private: + std::string relation; + std::vector attributeList; + std::vector database; + void AddAttribute(std::string line); + void AddData(std::string line); + void TestIntegrity(void); + }; + +} + +#endif diff --git a/src/arff/main.cpp b/src/arff/main.cpp index df55c5a..11caf3d 100644 --- a/src/arff/main.cpp +++ b/src/arff/main.cpp @@ -1,5 +1,12 @@ -#include +/* + * Author: Gregory Crawford + * Date: 2024-03-18 + * Description: Read and store ARFF data from a file + */ +#include "arff.hpp" -int main(void) { - std::cout << "Hello world" << std::endl; +int main(int argc, char* argv[]) { + ARFF::ParseArguments(argc, argv); + ARFF::Arff data; + data.Read(ARFF::GetDataFilename()); }