The Algorithms logo
The Algorithms
AboutDonate

Words Alphabetical

/**
 * @file
 * @brief Printing the [words contained in a
 * file](http://www.dailyfreecode.com/Code/word-list-reads-text-file-makes-2050.aspx)
 * named `file.txt` in alphabetical order and also their frequencies in to
 * another file "wordcount.txt"
 * @details
 * Given a file (`file.txt`) containing words (like a publication or a novel),
 * where words are separated by a space, newline, or underscore.
 * This program prints (writes or outputs) to another file (`wordcount.txt`),
 * the individual words contained in 'file.txt' with their frequencies (number
 * of occurrences) each on a newline and in alphabetical order. This program uses
 * the binary tree data structure to accomplish this task.
 * @author [Randy Kwalar](https://github.com/RandyKdev)
 */

#include <assert.h>    /// for assert
#include <ctype.h>     /// for type checks
#include <inttypes.h>  /// for uint64_t based types, int64_t based types
#include <stdbool.h>   /// for boolean data type
#include <stdio.h>     /// for IO operations
#include <stdlib.h>    /// for memory allocation
#include <string.h>    /// for string operations

/**
 * @brief structure defining a node in the binary tree
 */
struct Node
{
    char *word;          ///< the word (value) of the node
    uint64_t frequency;  ///< number of occurrences of the word
    struct Node *left;   ///< pointer to the left child node
    struct Node *right;  ///< pointer to the right child node
};

/**
 * @brief Ends program due to an error
 * @param errorMessage the error message to be printed
 * @returns void
 */
void endProgramAbruptly(char *errorMessage)
{
    fprintf(stderr, "%s\n", errorMessage);
    exit(EXIT_FAILURE);
}

/**
 * @brief Frees memory when program is terminating
 * @param node pointer to current node
 * @returns void
 */
void freeTreeMemory(struct Node *node)
{
    if (node != NULL)
    {
        freeTreeMemory(node->left);
        freeTreeMemory(node->right);
        free(node->word);  // freeing node->word because memory was allocated
                           // using malloc
        free(node);  // freeing node because memory was allocated using malloc
    }
}

/**
 * @brief Stores word in memory
 * @param word word to be stored in memory
 * @returns a pointer to the newly allocated word if the word IS stored successfully
 * @returns `NULL` if the word is NOT stored
 */
char *getPointerToWord(char *word)
{
    char *string =
        (char *)malloc((strlen(word) + 1) * sizeof(char));  ///< pointer to string
    // + 1 is for the '\0' character
    if (string != NULL)
    {
        strcpy(string, word);
        return string;
    }
    endProgramAbruptly(
        "\nA problem occurred while reserving memory for the word\n");
    return NULL;
}

/**
 * @brief Closes the file after reading or writing
 * @param file pointer to the file to be closed
 * @returns void
 */
void closeFile(FILE *file)
{
    if (fclose(file)) {
        endProgramAbruptly("\nA Problem Occurred while closing a file\n");
     }
}

/**
 * @brief Reserves memory for new node
 * @returns a pointer to the newly allocated node if memory IS successfully reserved
 * @returns `NULL` if memory is NOT reserved
 */
struct Node *allocateMemoryForNode()
{
    struct Node *node =
        (struct Node *)malloc(sizeof(struct Node));  ///< pointer to the node
    if (node != NULL)
    {
        return node;
    }
    endProgramAbruptly(
        "\nA problem occurred while reserving memory for the structure\n");
    return NULL;
}

/**
 * @brief Writes contents of tree to another file alphabetically
 * @param node pointer to current node
 * @param file pointer to file
 * @returns void
 */
void writeContentOfTreeToFile(struct Node *node, FILE *file)
{
    static uint64_t i = 1;  ///< for word numbering in the write file
    if (node != NULL)       // checks if the node is valid
    {
        writeContentOfTreeToFile(
            node->left,
            file);  // calls `writeContentOfTreeToFile` for left sub tree
        fprintf(file, "%-5lu \t %-9lu \t %s \n", i++, node->frequency,
                node->word);  // prints the word number, word frequency and word
                              // in tabular format to the file
        writeContentOfTreeToFile(
            node->right,
            file);  // calls `writeContentOfTreeToFile` for right sub tree
    }
}

/**
 * @brief Adds word (node) to the correct position in tree
 * @param word word to be inserted in to the tree
 * @param currentNode node which is being compared
 * @returns a pointer to the root node
 */
struct Node *addWordToTree(char *word, struct Node *currentNode)
{
    if (currentNode == NULL)  // checks if `currentNode` is `NULL`
    {
        struct Node *currentNode =
            allocateMemoryForNode();  // allocates memory for new node
        currentNode->word = getPointerToWord(word);  // stores `word` in memory
        currentNode->frequency = 1;  // initializes the word frequency to 1
        currentNode->left = NULL;    // sets left node to `NULL`
        currentNode->right = NULL;   // sets right node to `NULL`
        return currentNode;          // returns pointer to newly created node
    }

    int64_t compared = strcmp(word, currentNode->word);  ///< holds compare state

    if (compared > 0) {
        currentNode->right = addWordToTree(word,
            currentNode->right);  // adds `word` to right sub tree if `word` is
                                  // alphabetically greater than `currentNode->word`
    }
    else if (compared < 0) {
        currentNode->left = addWordToTree(word,
            currentNode->left);  // adds `word` to left sub tree if `word` is
                                 // alphabetically less than `currentNode->word`
    }
    else {
        currentNode->frequency++; // increments `currentNode` frequency if `word` is the same as `currentNode->word`
    }

    return currentNode; // returns pointer to current node
}

/**
 * @brief Reads words from file to tree
 * @param file file to be read from
 * @param root root node of tree
 * @returns a pointer to the root node
 */
struct Node *readWordsInFileToTree(FILE *file, struct Node *root)
{
    // longest english word = 45 chars
    // +1 for '\0' = 46 chars
    char *inputString =
        (char *)malloc(46 * sizeof(char));  ///< pointer to the input string

    char inputChar;                ///< temp storage of characters
    bool isPrevCharAlpha = false;  ///< bool to mark the end of a word
    uint8_t pos = 0;  ///< position in inputString to place the inputChar

    while ((inputChar = fgetc(file)) != EOF)
    {
        if (pos > 0)
            isPrevCharAlpha = isalpha(inputString[pos - 1]);

        // checks if character is letter
        if (isalpha(inputChar))
        {
            inputString[pos++] = tolower(inputChar);
            continue;
        }

        // checks if character is ' or - and if it is preceded by a letter eg
        // yours-not, persons' (valid)
        if ((inputChar == '\'' || inputChar == '-') && isPrevCharAlpha)
        {
            inputString[pos++] = inputChar;
            continue;
        }

        // makes sure that there is something valid in inputString
        if (pos == 0)
            continue;

        // if last character is not letter and is not ' then replace by \0
        if (!isPrevCharAlpha && inputString[pos - 1] != '\'')
            pos--;
        inputString[pos] = '\0';
        pos = 0;
        isPrevCharAlpha = false;
        root = addWordToTree(inputString, root);
    }

    // this is to catch the case for the EOF being immediately after the last
    // letter or '
    if (pos > 0)
    {
        if (!isPrevCharAlpha && inputString[pos - 1] != '\'')
            pos--;
        inputString[pos] = '\0';
        root = addWordToTree(inputString, root);
    }

    free(inputString);
    return root;
}

/**
 * @brief Self-test implementations
 * @returns void
 */
static void test()
{
    struct Node *root = NULL;  ///< pointer to the root node
    FILE *file = NULL;         ///< pointer to the file

    file = fopen("file.txt", "w");  // creates test file in write mode

    fprintf(file,
            "hey_this, is a. test input \n to a_file");  // writes test data to
                                                         // test file

    closeFile(file);                // closes test file
    file = fopen("file.txt", "r");  // reopens test file in read mode

    root = readWordsInFileToTree(file,
                                 root);  // reads words from test file to tree

    // Tests to check if words were added to correct position in tree and also
    // if their frequencies were added correctly
    assert(strcmp(root->word, "hey") == 0);
    assert(root->frequency == 1);
    assert(strcmp(root->left->word, "a") == 0);
    assert(root->left->frequency == 2);
    assert(strcmp(root->right->word, "this") == 0);
    assert(strcmp(root->left->right->word, "file") == 0);
    assert(strcmp(root->right->left->word, "is") == 0);

    closeFile(file);     // closes test file
    remove("file.txt");  // deletes test file from storage

    file = fopen("wordcount.txt", "a");  // creates write file
    fprintf(file, "%-5s \t %9s \t %s \n", "S/N", "FREQUENCY",
            "WORD");  // prints the heading to `wordcount.txt`
    writeContentOfTreeToFile(
        root, file);  // writes content of tree to file (`wordcount.txt`)

    // Here is how the output to `wordcount.txt` should look like
    char *correctString =
        "S/N   	 FREQUENCY 	 WORD \n"
        "1     	 2         	 a \n"
        "2     	 1         	 file \n"
        "3     	 1         	 hey \n"
        "4     	 1         	 input \n"
        "5     	 1         	 is \n"
        "6     	 1         	 n \n"
        "7     	 1         	 test \n"
        "8     	 1         	 this \n"
        "9     	 1         	 to \n";

    int16_t inputChar;  // holds the current character in `wordcount.txt`
    uint64_t i = 0;     // holds the current index in `correctString`

    // Checks if the content in `wordcount.txt` is as expected (the same as in
    // `correctString`)
    while ((inputChar = fgetc(file)) != EOF) {
        assert(inputChar == correctString[i++]);
    }

    closeFile(file);          // closes `wordcount.txt`
    remove("wordcount.txt");  // deletes `wordcount.txt`

    freeTreeMemory(root);  // frees memory taken up by the tree
}

/**
 * @brief Main function
 * @returns 0 on exit
 */
int main()
{
    test();  // run self-test implementations
    return 0;
}