Wednesday, 21 March 2012

Set Theory

I had a code problem that most programmers would be very familiar with. I wanted to create a set of names. The definition of set is basically a number of items where each item must be a unique element. So for example, if I add the name "Dave" twice it only stores one copy of the name. The important requirement was that the set returned the index (number) of the name in the array .. for example

0: Craig
1: Jen
2: Dave
3: Gary
4: Peter

when inserting the name "Dave" I want the number 2 to return from the function.

The easy way to perform this is to say "for each element, if name equals the new Name, return the number".

This method becomes quite slow for very large lists of names. So I wrote this class ...


class ArraySet
{
public:

string& Get(int p)
{
if( p < elems.size() )
{
return elems[p];
}
else return string("");
}

void Append(string &s)
{
elems.push_back(s);
}


vector<string> elems;
};

class TreeNode
{
public:

TreeNode()
{
parent = NULL;
left = right = NULL;
data = -1;
searchVisited = false;
}

TreeNode(TreeNode* pParent)
{
parent = pParent;
left = right = NULL;
data = -1;
searchVisited = false;
}

int data;
bool searchVisited ;

int Insert(string &s2, ArraySet &set)
{
string s1 = "";
s1 = set.Get(data);

if( s1 == s2 )
{
return data;
}
else
{
if( s1 < s2 )
{
if( right )
{
return right->Insert(s2, set);
}
else
{
right = new TreeNode(this);
set.Append(s2);
right->data = set.elems.size()-1;
return right->data;
}
}
else if( s1 > s2 )
{
if( left )
{
return left->Insert(s2, set);
}
else
{
left = new TreeNode(this);
set.Append(s2);
left->data = set.elems.size()-1;
return left->data;
}
}
}
}

void Visit(vector<int> &rmp)
{
if( left )
{
left->Visit(rmp);
}

rmp.push_back(data);

if( right )
{
right->Visit(rmp);
}
}

TreeNode* parent;
TreeNode* left;
TreeNode *right;
};

class TreeIndexedSet
{
public:

TreeIndexedSet()
{
root = NULL;
}

int InsertString(string &str)
{
if( root )
{
return root->Insert(str, m_set);
}
else
{
root = new TreeNode;
m_set.Append(str);
root->data = m_set.elems.size()-1;

return root->data;
}
}

string& LookupString(int p)
{
return m_set.Get(p);
}

void SortRemap()
{
if( m_set.elems.size() > 1000000 )
{
SortRemapIterate();
}
else
{
root->Visit(sortedRemapping);
}
}

void SortRemapIterate()
{
        TreeNode *currNode = root;

        while (true)
        {

            if (currNode == NULL)
                break;

if (currNode->left != NULL && currNode->left->searchVisited != true)
{
currNode = currNode->left;
}
            else if (currNode->right != NULL && currNode->right->searchVisited != true)
            {
currNode = currNode->right;
}
            else if (currNode->searchVisited == false)
            {
sortedRemapping.push_back(currNode->data);
                currNode->searchVisited = true;
            }
else
            {
currNode = currNode->parent;
}

        }
}

vector< int > sortedRemapping;
TreeNode* root;
ArraySet m_set;
};

No comments:

Post a Comment