From db4ef4740c747e6bccb1dddb3571ac61b2193fbf Mon Sep 17 00:00:00 2001 From: Trianta <56975502+Trimutex@users.noreply.github.com> Date: Mon, 8 Apr 2024 17:43:07 -0500 Subject: [PATCH] arff: fixed missing value case on read, still diagnosing soybean issue --- src/arff/arff.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/arff/arff.cpp b/src/arff/arff.cpp index 73c17c1..b6cd3ea 100644 --- a/src/arff/arff.cpp +++ b/src/arff/arff.cpp @@ -90,6 +90,7 @@ namespace ARFF { for (AttributeType type : attributeList) { std::cout << type.attribute << " (" << type.values.size() << "):"; for (std::string value : type.values) { + if (value == "?") { continue; } std::cout << " " << value; } std::cout << '\n'; @@ -125,8 +126,10 @@ namespace ARFF { parser >> token; attributeList.emplace_back(token); LogInfo("ARFF/Attribute", "Added attribute: " + token); - while (parser >> token) { + while (std::getline(parser, token, ',')) { // Clean token from outside pieces + token.erase(std::remove(token.begin(), token.end(), ' '), token.end()); + token.erase(std::remove(token.begin(), token.end(), '\t'), token.end()); token.erase(std::remove(token.begin(), token.end(), '{'), token.end()); token.erase(std::remove(token.begin(), token.end(), '}'), token.end()); token.erase(std::remove(token.begin(), token.end(), ','), token.end()); @@ -135,6 +138,8 @@ namespace ARFF { attributeList.back().AddValue(token); LogInfo("ARFF/Attribute", "Added value: " + token); } + // Additional missing value case + attributeList.back().AddValue("?"); } // Add data to runtime database @@ -147,6 +152,7 @@ namespace ARFF { LogInfo("ARFF/Data", "Added id: " + std::to_string(database.back().id)); for (int i = 0; i < attributeList.size(); ++i) { std::getline(parser, token, ','); + token.erase(std::remove(token.begin(), token.end(), ' '), token.end()); token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); database.back().values.at(i) = token;