2023-12-07 21:36:51 -06:00
|
|
|
#include "generator.hpp"
|
2023-12-08 18:19:14 -06:00
|
|
|
#include <cstdlib>
|
2023-12-08 18:03:58 -06:00
|
|
|
#include <fstream>
|
2023-12-07 21:36:51 -06:00
|
|
|
#include <iostream>
|
2023-12-08 18:19:14 -06:00
|
|
|
#include <random>
|
|
|
|
|
|
|
|
std::default_random_engine generator;
|
|
|
|
void InitializeGenerator(void)
|
|
|
|
{
|
|
|
|
generator.seed(std::random_device{}());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Returns a newly generated number
|
|
|
|
int GenerateRandomNumber(int generationLimit)
|
|
|
|
{
|
|
|
|
int generatedNumber;
|
|
|
|
std::uniform_int_distribution<> distribution(0, generationLimit - 1);
|
|
|
|
generatedNumber = distribution(snakeplusplus::generator);
|
|
|
|
return generatedNumber;
|
|
|
|
}
|
2023-12-07 21:36:51 -06:00
|
|
|
|
2023-12-08 18:03:58 -06:00
|
|
|
|
|
|
|
void Trie::insert(const std::deque<char>& currentKGram) {
|
|
|
|
TrieNode* current = root;
|
|
|
|
|
|
|
|
for (char ch : currentKGram) {
|
|
|
|
if (current->children.find(ch) == current->children.end()) {
|
|
|
|
current->children[ch] = new TrieNode();
|
|
|
|
} else { ++current->occurances; }
|
|
|
|
|
|
|
|
current = current->children[ch];
|
|
|
|
}
|
|
|
|
|
|
|
|
current->isEndOfWord = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Trie::search(const std::deque<char>& currentKGram) const {
|
|
|
|
TrieNode* current = root;
|
|
|
|
|
|
|
|
for (char ch : currentKGram) {
|
|
|
|
if (current->children.find(ch) == current->children.end()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
current = current->children[ch];
|
|
|
|
}
|
|
|
|
|
|
|
|
return current->isEndOfWord;
|
|
|
|
}
|
|
|
|
|
2023-12-07 21:36:51 -06:00
|
|
|
void Generator::SetArguments(int argc, char* argv[]) {
|
|
|
|
std::string tempStr;
|
2023-12-07 23:58:54 -06:00
|
|
|
for (int i = 1; i < argc; i += 2) {
|
2023-12-07 21:36:51 -06:00
|
|
|
tempStr.assign(argv[i]);
|
|
|
|
if (tempStr == "-i") {
|
|
|
|
setup.isFileSet = true;
|
2023-12-07 23:58:54 -06:00
|
|
|
setup.filename.assign(argv[i+1]);
|
2023-12-07 21:36:51 -06:00
|
|
|
}
|
|
|
|
if (tempStr == "-k") {
|
|
|
|
setup.isPrefixSet = true;
|
2023-12-07 23:58:54 -06:00
|
|
|
setup.prefixLength = std::stoi(argv[i+1]);
|
2023-12-07 21:36:51 -06:00
|
|
|
}
|
|
|
|
if (tempStr == "-n") {
|
|
|
|
setup.isOutputSet = true;
|
2023-12-07 23:58:54 -06:00
|
|
|
setup.outputLength = std::stoi(argv[i+1]);
|
2023-12-07 21:36:51 -06:00
|
|
|
}
|
|
|
|
if (tempStr == "-h") {
|
|
|
|
PrintUsage();
|
|
|
|
exit(0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (!setup.isFileSet) { std::cerr << "[Setup - Error] Filename not specified" << std::endl; }
|
|
|
|
if (!setup.prefixLength) { std::cerr << "[Setup - Error] Prefix length not specified" << std::endl; }
|
|
|
|
if (!setup.outputLength) { std::cerr << "[Setup - Error] Output length not specified" << std::endl; }
|
|
|
|
if (!setup.isFileSet || !setup.isPrefixSet || !setup.isOutputSet) { PrintUsage(); }
|
|
|
|
}
|
|
|
|
|
2023-12-08 18:03:58 -06:00
|
|
|
void Generator::ReadFile(void) {
|
|
|
|
std::ifstream inputFile(setup.filename);
|
|
|
|
if (!inputFile.is_open()) {
|
|
|
|
std::cerr << "[ReadFile - Error] Could not open file: " << setup.filename << std::endl;
|
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
std::deque<char> currentKGram;
|
|
|
|
char tempChar;
|
|
|
|
// Read in first k-gram
|
|
|
|
{
|
|
|
|
std::string initializeKGram;
|
|
|
|
inputFile.get(&initializeKGram[0], setup.prefixLength);
|
|
|
|
for (char ch : initializeKGram) { currentKGram.emplace_back(ch); }
|
|
|
|
trie.insert(currentKGram);
|
|
|
|
}
|
|
|
|
// Read rest of file
|
|
|
|
while (inputFile.get(tempChar)) {
|
|
|
|
currentKGram.emplace_back(tempChar);
|
|
|
|
if (currentKGram.size() > setup.prefixLength) { currentKGram.pop_front(); }
|
|
|
|
trie.insert(currentKGram);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Generator::GenerateOutput(void) {
|
|
|
|
}
|
|
|
|
|
2023-12-08 18:19:14 -06:00
|
|
|
char Generator::GenerateCharacter(void) {
|
|
|
|
double roll = ((double) GenerateRandomNumber(RAND_MAX)) / ((double) RAND_MAX);
|
2023-12-08 18:03:58 -06:00
|
|
|
return 'z';
|
|
|
|
}
|
|
|
|
|
2023-12-07 21:36:51 -06:00
|
|
|
void PrintUsage(void) {
|
|
|
|
std::cout << "Usage: markov -i input_file -k prefix_length -n output_length" << std::endl;
|
|
|
|
std::cout << " -i: Direct path to input file for basis" << std::endl;
|
|
|
|
std::cout << " -k: Prefix length for Markov chain" << std::endl;
|
|
|
|
std::cout << " -n: Length of output to be generated (words)" << std::endl;
|
|
|
|
std::cout << " -h: Prints this usage text" << std::endl;
|
|
|
|
}
|