我想数数每一个单词在一堆课文中的出现次数。 我能够计算每个单词及其在一个文本中的出现(我将每个单词插入到二叉树中的文本中,当我在文本中再次找到它时,将其递增一),但我不能计算所有文本中的每个单词。
愚蠢的例子:
课文1:你好,亚当,我叫亚当,我想要快乐。 短信2:你好,山姆,他是哪里人?
(在一篇课文中出现的每个单词)树1:亚当,2你好,1我,1名字,1是,1我,1想要,1 to,1 be,1 happy,1(在所有课文中出现的每个单词)树2:你好,2亚当,1山姆,1 where,1是,2.。。
有人能给我解释一下我能做什么吗? 或者帮我做一个这样做的算法?
我的文件:
节点。hpp
class Node{
private:
Node *left; //left child
Node *right; //right child
std::string num;
public:
int data; //number
Node(); //constructor
void setData(string num, int data); //sets number in node
string getData(); //return numbers from node
int &getOcc();
void setLeft(Node *l); //sets left child pointer
Node* &getLeft(); //returns left child pointer
void setRight(Node *r); //sets right child pointer
Node* &getRight(); //return right child pointer
};
node.cpp
Node::Node(){
this->left = NULL;
this->right = NULL;
}
void Node::setData(string num, int data){
this->num = num;
this->data = data;
}
string Node::getData(){
return this->num;
}
int &Node::getOcc(){
return this->data;
}
void Node::setLeft(Node *l){
this->left = l;
}
Node* &Node::getLeft(){
return this->left;
}
void Node::setRight(Node *r){
this->right = r;
}
Node* &Node::getRight(){
return this->right;
}
BST.HPP
//BST class
class BST{
private:
Node * root; //root node pointer
public:
BST(); //constructor
~BST(); //destructor
void Insert(string num, int data); //Inserts new number in tree
void InsertIDF(string num, int data); //Inserts new number in tree
bool find(string num); //finds whether a number is present in tree
void min(); //find and print minimum number in the tree
void max(); //find and print maximum number in the tree
void save_file(string filename); //save the tree to file
void Delete(string num); //deletes a number from tree
void LoadFromFile(string filename); //loads numbers from file to tree
void Print(); //print tree to stdout
//private functions used as helper functions in the public operations
private:
void printHelper(Node *root);
bool findHelper(Node *root,string num);
void InsertHelper(Node * ¤t, string num, int data);
void InsertHelperIDF(Node * ¤t, string num, int data);
void findMinHelper(Node* current);
void findMaxHelper(Node * current);
void saveHelper(ofstream &fout, Node* current);
Node* DeleteHelper(Node *current, string num);
Node * findMaximum(Node * n);
void clear(Node *currnt);
};
bst.cpp
BST::BST(){
this->root = NULL; //root is NULL in the start
}
BST::~BST(){
clear(root); //delete all nodes
}
void BST::clear(Node* current){
if(current == NULL)
return;
clear(current->getLeft()); //clear left subtree
clear(current->getRight()); //clear right subtree
delete current; //delete this node
}
void BST::Insert(string num, int data){
InsertHelper(root,num,data); //call helper to insert
}
void BST::InsertHelper( Node * ¤t, string num, int data ){
if ( current == nullptr ){
// create new node to be inserted
current = new Node();
current->setData( num, data );
current->setLeft( nullptr );
current->setRight( nullptr );
} else if ( num < current->getData() ){
InsertHelper( current->getLeft(), num, data );
} else if ( current->getData() < num ){
InsertHelper( current->getRight(), num, data );
} else {
int h = current->getOcc();
h++;
current->setData(num, h);
}
}
void BST::InsertIDF(string num, int data){
InsertHelperIDF(root,num,data); //call helper to insert
}
void BST::InsertHelperIDF( Node * ¤t, string num, int data){
if ( current == nullptr ){
// create new node to be inserted
current = new Node();
current->setData( num, data );
current->setLeft( nullptr );
current->setRight( nullptr );
} else if ( num < current->getData() ){
InsertHelperIDF( current->getLeft(), num, data );
} else if ( current->getData() < num ){
InsertHelperIDF( current->getRight(), num, data );
}
}
void BST::min(){
findMinHelper(root);
}
void BST::findMinHelper(Node* current){
if(current == NULL)
return;
if(current->getLeft() == NULL) //if no node at right
cout<<current->getData(); //current has min data
else
findMinHelper(current->getLeft()); //check on left subtree
}
void BST::max(){
findMaxHelper(root);
}
void BST::findMaxHelper(Node * current){
if(current == NULL)
return;
if(current->getRight() == NULL) //if no node at right
cout<<current->getData(); //current node has max data
else
findMaxHelper(current->getRight()); //check on right subtree
}
void BST::Print(){
printHelper(root);
}
void BST::printHelper(Node *current){
if(current == NULL) //stop if NULL
return;
printHelper(current->getLeft()); //print left tree
cout<<current->getData() << " " << current->getOcc() << " "; //print current node data
printHelper(current->getRight()); //print right tree
}
void BST::Delete(string num){
root = DeleteHelper(root,num);
}
Node* BST::DeleteHelper(Node *current, string num){
if(current == NULL)
return NULL;
Node *tobeReturned;
if (current->getData() == num) { //if key is found
if (current->getLeft() == NULL) { //no node at left
tobeReturned = current->getRight();
delete current;
return tobeReturned; //right subtree should replace this node
} else if (current->getRight() == NULL) {
tobeReturned = current->getLeft();
delete current;
return tobeReturned;
} else {
//find maximum node in the left subtree
Node * maxnode = findMaximum(current->getLeft());
//copy values from max node to this node
// current->setData(maxnode->getData());
//delete the max node
current->setLeft(DeleteHelper(current->getLeft(), num));
}
cout<<"Deleted!!!";
} else { //not found
if (num < current->getData()) {
current->setLeft(DeleteHelper(current->getLeft(),num));
} else {
current->setRight(DeleteHelper(current->getRight(), num));
}
}
return current;
}
Node* BST::findMaximum(Node * n){
if(n->getRight() == NULL) //if no node at right, current is maximum
return n;
return findMaximum(n->getRight()); //find in right subtree
}
这是我的main.cpp
int x = 0;
// go through each story
for( Histoire * histoire : * histoires ) {
// go through each sentence
for( Phrase p : * histoire ) {
// go through each word
for ( Phrase w : p ){
std::stringstream sstream;
sstream << w;
std::string s = sstream.str();
tree.Insert(s , 1); // here i insert each word in a tree and count its occurence in one text
}
// treeIDF.Insert(t,1);
};
嗯,这看起来确实像是一个家庭作业:)
你确定二叉树是适合这类问题的数据结构吗? 正如评论中所建议的,最好是简单地使用std::map。
#include <fstream>
#include <iostream>
#include <map>
#include <string>
#include <vector>
void processFile(const std::string &filename,
std::map<std::string, int> &words_count) {
std::string word;
std::ifstream load_file;
load_file.open(filename.c_str(), std::ifstream::in);
while (load_file >> word) {
if (words_count.find(word) == words_count.end()) {
words_count[word] = 1;
} else {
words_count[word]++;
}
}
}
int main() {
const std::vector<std::string> files_to_process{"text1.txt", "text2.txt"};
std::map<std::string, int> words_count{};
for (const auto &file : files_to_process) {
processFile(file, words_count);
}
for (const auto &w : words_count) {
std::cout << w.first << " " << w.second << std::endl;
}
}
不过,这需要改进--比如去掉特殊字符,将其打包到类中等等。如果你不能使用STL(无论出于何种原因),只需自己创建一个键值结构并实现一些搜索。