arff: fixed missing value case on read, still diagnosing soybean issue

This commit is contained in:
Trianta 2024-04-08 17:43:07 -05:00
parent c2bba75e5a
commit db4ef4740c

View File

@ -90,6 +90,7 @@ namespace ARFF {
for (AttributeType type : attributeList) { for (AttributeType type : attributeList) {
std::cout << type.attribute << " (" << type.values.size() << "):"; std::cout << type.attribute << " (" << type.values.size() << "):";
for (std::string value : type.values) { for (std::string value : type.values) {
if (value == "?") { continue; }
std::cout << " " << value; std::cout << " " << value;
} }
std::cout << '\n'; std::cout << '\n';
@ -125,8 +126,10 @@ namespace ARFF {
parser >> token; parser >> token;
attributeList.emplace_back(token); attributeList.emplace_back(token);
LogInfo("ARFF/Attribute", "Added attribute: " + token); LogInfo("ARFF/Attribute", "Added attribute: " + token);
while (parser >> token) { while (std::getline(parser, token, ',')) {
// Clean token from outside pieces // Clean token from outside pieces
token.erase(std::remove(token.begin(), token.end(), ' '), token.end());
token.erase(std::remove(token.begin(), token.end(), '\t'), token.end());
token.erase(std::remove(token.begin(), token.end(), '{'), token.end()); token.erase(std::remove(token.begin(), token.end(), '{'), token.end());
token.erase(std::remove(token.begin(), token.end(), '}'), token.end()); token.erase(std::remove(token.begin(), token.end(), '}'), token.end());
token.erase(std::remove(token.begin(), token.end(), ','), token.end()); token.erase(std::remove(token.begin(), token.end(), ','), token.end());
@ -135,6 +138,8 @@ namespace ARFF {
attributeList.back().AddValue(token); attributeList.back().AddValue(token);
LogInfo("ARFF/Attribute", "Added value: " + token); LogInfo("ARFF/Attribute", "Added value: " + token);
} }
// Additional missing value case
attributeList.back().AddValue("?");
} }
// Add data to runtime database // Add data to runtime database
@ -147,6 +152,7 @@ namespace ARFF {
LogInfo("ARFF/Data", "Added id: " + std::to_string(database.back().id)); LogInfo("ARFF/Data", "Added id: " + std::to_string(database.back().id));
for (int i = 0; i < attributeList.size(); ++i) { for (int i = 0; i < attributeList.size(); ++i) {
std::getline(parser, token, ','); std::getline(parser, token, ',');
token.erase(std::remove(token.begin(), token.end(), ' '), token.end());
token.erase(std::remove(token.begin(), token.end(), '\r'), token.end()); token.erase(std::remove(token.begin(), token.end(), '\r'), token.end());
token.erase(std::remove(token.begin(), token.end(), '\n'), token.end()); token.erase(std::remove(token.begin(), token.end(), '\n'), token.end());
database.back().values.at(i) = token; database.back().values.at(i) = token;