generated from Trianta/cpp-unity-template
Update for completed project #1
@ -42,13 +42,13 @@ void SMSMessageFilter::GenerateProbability(std::string file_name) {
|
||||
while (std::getline(full_message, token, ' ')) {
|
||||
token = SanitizeToken(token);
|
||||
if (token.empty()) { continue; }
|
||||
if (probability_dictionary[token] == 0) {
|
||||
probability_dictionary[token] = 0.5;
|
||||
}
|
||||
if (is_ham_temp) {
|
||||
probability_dictionary[token] += probability_dictionary[token] * 0.1;
|
||||
probability_dictionary[token].value += probability_dictionary[token].value * 0.0000000001;
|
||||
if (probability_dictionary[token].value > 1.) {
|
||||
probability_dictionary[token].value = 1.;
|
||||
}
|
||||
} else {
|
||||
probability_dictionary[token] -= probability_dictionary[token] * 0.1;
|
||||
probability_dictionary[token].value -= probability_dictionary[token].value * 0.0000000001;
|
||||
}
|
||||
}
|
||||
temp_message.clear();
|
||||
@ -82,7 +82,6 @@ void SMSMessageFilter::Prepare(std::string file_name) {
|
||||
full_message.clear();
|
||||
continue; // Probably a bad line cut
|
||||
}
|
||||
full_message.ignore('\t');
|
||||
filtered_messages.emplace_back(is_ham_temp, full_message.str());
|
||||
temp_message.clear();
|
||||
full_message.clear();
|
||||
@ -97,15 +96,18 @@ void SMSMessageFilter::Filter(void) {
|
||||
full_message.str(filtered_messages[i].message);
|
||||
while (std::getline(full_message, token, ' ')) {
|
||||
token = SanitizeToken(token);
|
||||
if (probability_dictionary[token] == 0) {
|
||||
probability_dictionary[token] = 0.5;
|
||||
}
|
||||
type_probability = probability_dictionary[token] * type_probability;
|
||||
type_probability = probability_dictionary[token].value * type_probability;
|
||||
}
|
||||
double final_probability;
|
||||
final_probability = (1. - sentence_probability_ham) * type_probability;
|
||||
final_probability = final_probability / (final_probability + ((1. - type_probability) * sentence_probability_ham));
|
||||
if (type_probability <= sentence_probability_ham) {
|
||||
filtered_messages[i].is_ham_filter = true;
|
||||
} else { filtered_messages[i].is_ham_filter = false; }
|
||||
std::cout << "[SMSMessageFilter - Info] Final probability of "
|
||||
<< i << ": " << final_probability << std::endl;
|
||||
type_probability = 0.5;
|
||||
full_message.clear();
|
||||
}
|
||||
}
|
||||
|
||||
@ -114,10 +116,10 @@ void SMSMessageFilter::Report(void) {
|
||||
}
|
||||
|
||||
ReportData SMSMessageFilter::GenerateReport(void) {
|
||||
double true_ham = 0;
|
||||
double true_spam = 0;
|
||||
double false_ham = 0;
|
||||
double false_spam = 0;
|
||||
double true_ham = 0.;
|
||||
double true_spam = 0.;
|
||||
double false_ham = 0.;
|
||||
double false_spam = 0.;
|
||||
for (SMSMessage message : filtered_messages) {
|
||||
// Get total count
|
||||
if (!(message.is_ham ^ message.is_ham_filter)) {
|
||||
@ -129,6 +131,7 @@ ReportData SMSMessageFilter::GenerateReport(void) {
|
||||
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
std::cout << "[SMSMessageFilter - Info] Ham barrier: ";
|
||||
std::cout << sentence_probability_ham << std::endl;
|
||||
std::cout << "[SMSMessageFilter - Info] True ham: ";
|
||||
@ -139,6 +142,7 @@ ReportData SMSMessageFilter::GenerateReport(void) {
|
||||
std::cout << false_ham << std::endl;
|
||||
std::cout << "[SMSMessageFilter - Info] False spam: ";
|
||||
std::cout << false_spam << std::endl;
|
||||
std::cout << std::endl;
|
||||
|
||||
// Calculate report data
|
||||
ReportData new_report;
|
||||
@ -153,6 +157,8 @@ ReportData SMSMessageFilter::GenerateReport(void) {
|
||||
}
|
||||
|
||||
void SMSMessageFilter::PrintReport(ReportData report) {
|
||||
std::cout << std::endl;
|
||||
std::cout << "============ [SMSMessageFilter - Report - Start] ============" << std::endl;
|
||||
// Spam precision: (true positives) / (true positives + false positives)
|
||||
std::cout << "[SMSMessageFilter - Report] Spam precision: ";
|
||||
std::cout << report.spam_precision << std::endl;
|
||||
@ -180,6 +186,7 @@ void SMSMessageFilter::PrintReport(ReportData report) {
|
||||
// Accuracy: (spam recall + ham recall) / 2
|
||||
std::cout << "[SMSMessageFilter - Report] Accuracy: ";
|
||||
std::cout << report.accuracy << std::endl;
|
||||
std::cout << "============ [SMSMessageFilter - Report - End] ============" << std::endl;
|
||||
}
|
||||
|
||||
std::string SanitizeToken(std::string token) {
|
||||
|
@ -5,6 +5,11 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
// C++ maps don't allow specifying default value
|
||||
struct DoubleDefaultedToHalf {
|
||||
double value = 0.5;
|
||||
};
|
||||
|
||||
struct ReportData {
|
||||
double spam_precision;
|
||||
double spam_recall;
|
||||
@ -33,7 +38,7 @@ public:
|
||||
|
||||
private:
|
||||
double sentence_probability_ham = 0.5; // Spam is 1 - sentence_probability_ham
|
||||
std::map<std::string, double> probability_dictionary;
|
||||
std::map<std::string, DoubleDefaultedToHalf> probability_dictionary;
|
||||
std::vector<SMSMessage> filtered_messages;
|
||||
ReportData GenerateReport(void);
|
||||
void PrintReport(ReportData report);
|
||||
|
Loading…
Reference in New Issue
Block a user