bayesian-spam-filter/src/filter.hpp

57 lines
1.3 KiB
C++
Raw Normal View History

2023-11-11 15:29:31 -06:00
#ifndef FILTER_HPP
#define FILTER_HPP
#include <map>
#include <string>
#include <vector>
// C++ maps don't allow specifying default value
struct DoubleDefaultedToHalf {
double value = 0.5;
};
struct ReportData {
double spam_precision;
double spam_recall;
double ham_precision;
double ham_recall;
double spam_f_score;
double ham_f_score;
double accuracy;
};
struct SMSMessage {
SMSMessage(bool given_type, std::string given_message);
bool is_ham;
std::string message;
bool is_ham_filter;
};
class SMSMessageFilter {
2023-11-11 15:29:31 -06:00
public:
SMSMessageFilter(void) = default;
~SMSMessageFilter(void) = default;
2023-11-12 20:28:01 -06:00
bool is_generator_defined = false;
bool is_input_defined = false;
void GenerateProbability();
void Prepare();
void Filter(void);
void Report(void);
2023-11-12 20:28:01 -06:00
void ReadArguments(int argc, char* argv[]);
2023-11-11 15:29:31 -06:00
private:
2023-11-12 20:28:01 -06:00
double sentence_probability_ham = 0.2; // Sentence is spam if < this value
std::map<std::string, DoubleDefaultedToHalf> probability_dictionary;
2023-11-12 20:28:01 -06:00
std::string generation_file_path;
std::string filter_file_path;
std::vector<SMSMessage> filtered_messages;
ReportData GenerateReport(void);
void PrintReport(ReportData report);
2023-11-11 15:29:31 -06:00
};
std::string SanitizeToken(std::string token);
2023-11-12 20:28:01 -06:00
void PrintHelp(void);
2023-11-11 15:29:31 -06:00
#endif // !FILTER_HPP