feat: read data from arff files #1
@ -1,4 +1,5 @@
|
||||
add_executable(arff
|
||||
./main.cpp
|
||||
./arff.cpp
|
||||
)
|
||||
|
||||
|
157
src/arff/arff.cpp
Normal file
157
src/arff/arff.cpp
Normal file
@ -0,0 +1,157 @@
|
||||
#include "arff.hpp"
|
||||
#include <iostream>
|
||||
#include <fstream>
|
||||
#include <sstream>
|
||||
#include <algorithm>
|
||||
|
||||
namespace ARFF {
|
||||
bool isVerbose = false;
|
||||
|
||||
void ParseArguments(int argc, char* argv[]) {
|
||||
std::string argument_string;
|
||||
for (int i = 0; i < argc; ++i) {
|
||||
argument_string.assign(argv[i]);
|
||||
if (argument_string == "-v" || argument_string == "--verbose") {
|
||||
isVerbose = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::string GetDataFilename(void) {
|
||||
std::string filename;
|
||||
std::cout << "Please enter name of the data file:\t";
|
||||
std::cin >> filename;
|
||||
if (filename.empty()) {
|
||||
LogError("ARFF/Setup", "No data filename provided, exiting...");
|
||||
exit(1);
|
||||
}
|
||||
std::cout << std::endl;
|
||||
return filename;
|
||||
}
|
||||
|
||||
void LogInfo(const std::string location, const std::string message) {
|
||||
if (!isVerbose) { return; }
|
||||
std::cout << '[' << location << " - INFO] ";
|
||||
std::cout << message << std::endl;
|
||||
}
|
||||
|
||||
void LogError(const std::string location, const std::string message) {
|
||||
if (!isVerbose) { return; }
|
||||
std::cerr << '[' << location << " - ERROR] ";
|
||||
std::cerr << message << std::endl;
|
||||
}
|
||||
|
||||
AttributeType::AttributeType(std::string attribute) {
|
||||
this->attribute = attribute;
|
||||
}
|
||||
|
||||
void AttributeType::AddValue(std::string value) {
|
||||
values.emplace_back(value);
|
||||
}
|
||||
|
||||
Instance::Instance(const int id, const int size) {
|
||||
this->id = id;
|
||||
this->values.resize(size);
|
||||
}
|
||||
|
||||
// Read entire data file and parse it
|
||||
void Arff::Read(std::string filename) {
|
||||
std::ifstream dataFile(filename);
|
||||
if (!dataFile.is_open()) {
|
||||
LogError("ARFF/Read", "Unable to open file with name `"
|
||||
+ filename + ", exiting...");
|
||||
exit(1);
|
||||
}
|
||||
std::string line;
|
||||
while (std::getline(dataFile, line)) {
|
||||
if (line.size() == 1) { continue; }
|
||||
switch (line.at(0)) {
|
||||
case '%':
|
||||
// Comment line in data
|
||||
continue;
|
||||
break;
|
||||
case '@':
|
||||
AddAttribute(line);
|
||||
break;
|
||||
default:
|
||||
AddData(line);
|
||||
break;
|
||||
}
|
||||
}
|
||||
TestIntegrity();
|
||||
}
|
||||
|
||||
// Add the attribute to the list
|
||||
void Arff::AddAttribute(std::string line) {
|
||||
std::stringstream parser(line);
|
||||
std::string token;
|
||||
parser >> token;
|
||||
// Signifies beginning of data
|
||||
// Might add a boolean later to mark this
|
||||
if (token == "@data") {
|
||||
return;
|
||||
}
|
||||
if (token == "@relation") {
|
||||
parser >> token;
|
||||
relation = token;
|
||||
LogInfo("ARFF/Attribute", "Relation set: " + relation);
|
||||
return;
|
||||
}
|
||||
parser >> token;
|
||||
attributeList.emplace_back(token);
|
||||
LogInfo("ARFF/Attribute", "Added attribute: " + token);
|
||||
while (parser >> token) {
|
||||
// Clean token from outside pieces
|
||||
token.erase(std::remove(token.begin(), token.end(), '{'), token.end());
|
||||
token.erase(std::remove(token.begin(), token.end(), '}'), token.end());
|
||||
token.erase(std::remove(token.begin(), token.end(), ','), token.end());
|
||||
token.erase(std::remove(token.begin(), token.end(), '\r'), token.end());
|
||||
token.erase(std::remove(token.begin(), token.end(), '\n'), token.end());
|
||||
attributeList.back().AddValue(token);
|
||||
LogInfo("ARFF/Attribute", "Added value: " + token);
|
||||
}
|
||||
}
|
||||
|
||||
// Add data to runtime database
|
||||
void Arff::AddData(std::string line) {
|
||||
std::istringstream parser(line);
|
||||
std::string token;
|
||||
int id = 0;
|
||||
if (!database.empty()) { id = database.back().id + 1; }
|
||||
database.emplace_back(id, attributeList.size());
|
||||
LogInfo("ARFF/Data", "Added id: " + std::to_string(database.back().id));
|
||||
for (int i = 0; i < attributeList.size(); ++i) {
|
||||
std::getline(parser, token, ',');
|
||||
token.erase(std::remove(token.begin(), token.end(), '\r'), token.end());
|
||||
token.erase(std::remove(token.begin(), token.end(), '\n'), token.end());
|
||||
database.back().values.at(i) = token;
|
||||
LogInfo("ARFF/Data", "Added instance value: " + token);
|
||||
}
|
||||
}
|
||||
|
||||
void Arff::TestIntegrity(void) {
|
||||
for (Instance instance : database) {
|
||||
int successCheck = 0;
|
||||
for (int i = 0; i < attributeList.size(); ++i) {
|
||||
LogInfo("ARFF/Integrity", "Instance value tested: '"
|
||||
+ instance.values.at(i) + "'");
|
||||
for (std::string value : attributeList.at(i).values) {
|
||||
LogInfo("ARFF/Integrity", "attributeList value: '"
|
||||
+ value + "'");
|
||||
if (instance.values.at(i) == value) {
|
||||
LogInfo("ARFF/Integrity", "Value found: " + value);
|
||||
++successCheck;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (successCheck != attributeList.size()) {
|
||||
LogError("ARFF/Integrity", "Value size mismatch: "
|
||||
+ std::to_string(successCheck) + " out of "
|
||||
+ std::to_string(attributeList.size()));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
LogInfo("ARFF/Integrity", "All values exist, continuing...");
|
||||
}
|
||||
}
|
43
src/arff/arff.hpp
Normal file
43
src/arff/arff.hpp
Normal file
@ -0,0 +1,43 @@
|
||||
#ifndef ARFF_HPP
|
||||
#define ARFF_HPP
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace ARFF {
|
||||
void ParseArguments(int argc, char* argv[]);
|
||||
std::string GetDataFilename(void);
|
||||
void LogInfo(const std::string location, const std::string message);
|
||||
void LogError(const std::string location, const std::string message);
|
||||
|
||||
struct AttributeType {
|
||||
public:
|
||||
std::string attribute;
|
||||
std::vector<std::string> values;
|
||||
AttributeType(std::string attribute);
|
||||
void AddValue(std::string value);
|
||||
};
|
||||
|
||||
struct Instance {
|
||||
public:
|
||||
Instance(const int id, const int size);
|
||||
unsigned int id;
|
||||
std::vector<std::string> values;
|
||||
};
|
||||
|
||||
class Arff {
|
||||
public:
|
||||
Arff() = default;
|
||||
void Read(std::string filename);
|
||||
private:
|
||||
std::string relation;
|
||||
std::vector<AttributeType> attributeList;
|
||||
std::vector<Instance> database;
|
||||
void AddAttribute(std::string line);
|
||||
void AddData(std::string line);
|
||||
void TestIntegrity(void);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
@ -1,5 +1,12 @@
|
||||
#include <iostream>
|
||||
/*
|
||||
* Author: Gregory Crawford
|
||||
* Date: 2024-03-18
|
||||
* Description: Read and store ARFF data from a file
|
||||
*/
|
||||
#include "arff.hpp"
|
||||
|
||||
int main(void) {
|
||||
std::cout << "Hello world" << std::endl;
|
||||
int main(int argc, char* argv[]) {
|
||||
ARFF::ParseArguments(argc, argv);
|
||||
ARFF::Arff data;
|
||||
data.Read(ARFF::GetDataFilename());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user