From d3f007b7f63ce3ac724e0336e7435c9a9fc27eac Mon Sep 17 00:00:00 2001 From: Trianta <56975502+Trimutex@users.noreply.github.com> Date: Fri, 8 Dec 2023 18:03:58 -0600 Subject: [PATCH] Added reading in text file and trie structure --- src/generator.cpp | 60 +++++++++++++++++++++++++++++++++++++++++++++++ src/generator.hpp | 25 ++++++++++++++++++++ src/main.cpp | 2 ++ 3 files changed, 87 insertions(+) diff --git a/src/generator.cpp b/src/generator.cpp index d9edafa..ab8f2d1 100644 --- a/src/generator.cpp +++ b/src/generator.cpp @@ -1,6 +1,36 @@ #include "generator.hpp" +#include #include + +void Trie::insert(const std::deque& currentKGram) { + TrieNode* current = root; + + for (char ch : currentKGram) { + if (current->children.find(ch) == current->children.end()) { + current->children[ch] = new TrieNode(); + } else { ++current->occurances; } + + current = current->children[ch]; + } + + current->isEndOfWord = true; +} + +bool Trie::search(const std::deque& currentKGram) const { + TrieNode* current = root; + + for (char ch : currentKGram) { + if (current->children.find(ch) == current->children.end()) { + return false; + } + + current = current->children[ch]; + } + + return current->isEndOfWord; +} + void Generator::SetArguments(int argc, char* argv[]) { std::string tempStr; for (int i = 1; i < argc; i += 2) { @@ -28,6 +58,36 @@ void Generator::SetArguments(int argc, char* argv[]) { if (!setup.isFileSet || !setup.isPrefixSet || !setup.isOutputSet) { PrintUsage(); } } +void Generator::ReadFile(void) { + std::ifstream inputFile(setup.filename); + if (!inputFile.is_open()) { + std::cerr << "[ReadFile - Error] Could not open file: " << setup.filename << std::endl; + exit(1); + } + std::deque currentKGram; + char tempChar; + // Read in first k-gram + { + std::string initializeKGram; + inputFile.get(&initializeKGram[0], setup.prefixLength); + for (char ch : initializeKGram) { currentKGram.emplace_back(ch); } + trie.insert(currentKGram); + } + // Read rest of file + while (inputFile.get(tempChar)) { + currentKGram.emplace_back(tempChar); + if (currentKGram.size() > setup.prefixLength) { currentKGram.pop_front(); } + trie.insert(currentKGram); + } +} + +void Generator::GenerateOutput(void) { +} + +char GenerateCharacter(void) { + return 'z'; +} + void PrintUsage(void) { std::cout << "Usage: markov -i input_file -k prefix_length -n output_length" << std::endl; std::cout << " -i: Direct path to input file for basis" << std::endl; diff --git a/src/generator.hpp b/src/generator.hpp index a50313b..5c5e76f 100644 --- a/src/generator.hpp +++ b/src/generator.hpp @@ -1,6 +1,8 @@ #ifndef GENERATOR_HPP #define GENERATOR_HPP +#include +#include #include void PrintUsage(void); @@ -14,13 +16,36 @@ struct ArgumentList { bool isOutputSet = false; }; +class TrieNode { +public: + std::unordered_map children; + int occurances = 1; + bool isEndOfWord; + + TrieNode() : isEndOfWord(false) {} +}; + +class Trie { +public: + Trie() : root(new TrieNode()) {} + void insert(const std::deque& currentKGram); + bool search(const std::deque& currentKGram) const; +private: + TrieNode* root; +}; + + struct Generator { public: Generator(void) = default; ~Generator(void) = default; void SetArguments(int argc, char* argv[]); + void ReadFile(void); + void GenerateOutput(void); private: + char GenerateCharacter(void); ArgumentList setup; + Trie trie; }; #endif // !GENERATOR_HPP diff --git a/src/main.cpp b/src/main.cpp index 6997419..56497de 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -7,5 +7,7 @@ int main(int argc, char* argv[]) { } Generator markovChain; markovChain.SetArguments(argc, argv); + markovChain.ReadFile(); + markovChain.GenerateOutput(); return 0; }