I am trying to create a NLP program in C++ for my school assignment, how do I add more tags for my program autonomously without me adding extra tags for it?
Below is the code:
#include <iostream>
#include <string>
#include <vector>
#include <sstream>
// Function to tokenize a string into words
std::vector<std::string> tokenize(const std::string& text) {
std::vector<std::string> tokens;
std::istringstream iss(text);
std::string token;
while (iss >> token) {
tokens.push_back(token);
}
return tokens;
}
// Function to perform basic part-of-speech tagging
std::vector<std::pair<std::string, std::string>> posTag(const std::vector<std::string>& tokens) {
std::vector<std::pair<std::string, std::string>> posTags;
for (const std::string& token : tokens) {
std::string posTag = "UNKNOWN";
if (token == "is" || token == "are" || token == "was" || token == "were") {
posTag = "VERB";
}
else if (token == "a" || token == "an" || token == "the") {
posTag = "ARTICLE";
}
else if (token == "of" || token == "in" || token == "on" || token == "by") {
posTag = "PREPOSITION";
}
else if (token == "NLP" || token == "AI") {
posTag = "NOUN";
}
posTags.push_back(std::make_pair(token, posTag));
}
return posTags;
}
int main() {
std::string inputText = "Natural language processing is a subfield of artificial intelligence.";
// Tokenize the input text
std::vector<std::string> tokens = tokenize(inputText);
// Perform basic part-of-speech tagging
std::vector<std::pair<std::string, std::string>> posTags = posTag(tokens);
// Print the results
for (const auto& pair : posTags) {
std::cout << "Token: " << pair.first << " | POS Tag: " << pair.second << std::endl;
}
return 0;
}