Graphs : Word ladder or word golf

Tags: , ,

Word ladder or word golf

Let me explain problem in detail first. We are given two words A and B and a dictionary of words. Task is to transform first word into second word by changing one character at a time. Character should be changed in such a way that the new word should be in dictionary. Find minimum number of such hops from one word to reach the second word.

For example, lets say given word are  CAT and DOG

The sequence of steps will be CAT –> COT–> COG–> DOG, hence three substitutions and we are done.
To convert COLD to WARM (from Wikipedia)
Sequence of steps will be COLD–>CORD–>CARD–>WARD–>WARM.

There are two strings/words given as input. And hoping can be done only if there is one condition : the destination of hop should differ at most one character with source. So if two words differ only in one character, we have a relationship between them.
So if take those two words as two nodes and connect them only if they differ by one character, we get a graph with two nodes and one edge.
Apply same principle to all words in dictionary and create a graph, nodes representing words and edges the relationship.
Once we have created graph as per above conditions, start breadth first traversing from the node which has values as first word and go on scanning till we get a node with value as second word. That’s all. Simple analysis! Implementation is bit tricky. Let’s look at implementation. For each word, if we go an scan every word in dictionary to figure out if they differ by one character in order to create edge will be too inefficient with complexity of O(n2). If we create buckets of words with one character of that replaced with _ and put all similar words into that bucket, we will segregate words with one character different. Let’s take an example.

Dictionary given is  {“CAT”, “BAT”, “COT”, “COG”, “COW”, “RAT”, “BUT”,”CUT”, “DOG”, “WEB”}. Now for word CAT, there are three buckets possible.

Now take word BAT, if we replace B with _ it becomes _AT, for which we already have a bucket, so we can put CAT and BAT in same bucket. Similarly when we replace second character of COT, it becomes C_T, for which we already have a bucket, hence CAT and COT are placed in once bucket. With above given words buckets will be like below.

Now connect all words in same buckets with each other. Final graph will be like below:

#include<stdio.h>
#include<stdlib.h>
#include<string.h>

#define NUM_WORDS 10
#define NUM_CHAR 4 
#define true 1
#define false 0

typedef struct node_s{
        char value[NUM_CHAR];
        int wt;
        struct node_s *next;
}Node_S;

Node_S *graph_s[NUM_WORDS + 1];

Node_S * create_node_s(char *temp){

    Node_S * new_node = (Node_S *)malloc(sizeof(Node_S));
    if(new_node){
        strcpy(new_node->value, temp);
        new_node->next = NULL;
    }
    else{
        printf("\n Node cannot be allocated");
    }
    return new_node;
}
void add_edge_3(int i, char *str){

    Node_S * temp = graph_s[i];
    if(temp == NULL){
        graph_s[i] = create_node_s(str);
    }
    else{
        while(temp->next){
            temp = temp->next;
        }
        temp->next = create_node_s(str);
    }
}
/* Check if there is already exisitng bucket */
int search_buckets(char buckets[NUM_WORDS * NUM_CHAR][NUM_CHAR], int count, char *temp){
    int i;
    for(i=0; i<count; i++){
        if(!strcmp(buckets[i], temp))
            return i;
    }
    return -1;
}

/* This adds string to a bucket */
void  add_string(Node_S *pre_graph[], int index, char * str){

    Node_S * temp = pre_graph[index];

    if(!temp){
        pre_graph[index] = create_node_s(str);
    }
    else{
        while(temp->next){
            temp  = temp->next;
        }
        temp->next = create_node_s(str);
    }
}

/* This function finds the index of the word in dictionary
Inefficient, can be done using binary search as words are in sorted 
order in dictionary */

int find_index(char dict[NUM_WORDS][NUM_CHAR], char * temp){
    int i;
    for(i=0; i<NUM_WORDS; i++){
        if(!strcmp(dict[i], temp))
            return i;
        }
    return -1;
}

/* This function links all words in buckets to create edges of graph */

void create_graph(Node_S *pre_graph[NUM_WORDS * NUM_CHAR],
                        char dict[NUM_WORDS][NUM_CHAR], int count){
    int i;
    int index, index_1, index_2;
    for(i=0; i<count; i++){
        Node_S * current =  pre_graph[i];
        while(current){
            index  = find_index(dict, current->value);
                if(!graph_s[index]){
                    add_edge_3(index, current->value);
                }

                next = current->next;
                while(next){
                    index_1  = find_index(dict, next->value);
                    if(!graph_s[index_1])
                        add_edge_3(index_1, next->value);
                    add_edge_3(index_1, current->value);
                    add_edge_3(index, next->value);
                    next = next->next;
                }
                current = current->next;
        }
    }
}

/*This function creates buckets and using those buckets also
creates graph where all neighbour nodes differ by at most one
character */
void create_buckets(char buckets[NUM_WORDS * NUM_CHAR][NUM_CHAR],
                        char dict[NUM_WORDS][NUM_CHAR]){

    int i,j;
    int count = 0;
    Node_S * pre_graph[NUM_WORDS * NUM_CHAR];

    int key[NUM_WORDS * NUM_CHAR];

    for(i=0; i<NUM_WORDS * NUM_CHAR; i++){
        pre_graph[i] = NULL;
    }
    for(i=0; i<NUM_WORDS; i++){
        for(j = 0; j<NUM_CHAR-1; j++ ){
            char temp[NUM_CHAR];
            strcpy(temp, dict[i]);
            temp[j] = '_';

            int index = search_buckets(buckets, count, temp);
            if(index != -1){
                add_string(pre_graph, index, dict[i]);
            }
            else{
                strcpy(buckets[count], temp);
                add_string(pre_graph, count,dict[i]);
                count++;
            }
        }
    }
    create_graph(pre_graph, dict, count);
    // Adjancancy matrix based representation       
    for(i=0; i<NUM_WORDS; i++){
        printf("Nodes adjacent to %d : ", i);

        Node_S * current  = graph_s[i];
        while(current){
            printf("%s ", current->value);
            current = current->next;
        }
        printf("\n");
    }
}


//Driver program
int main(){
    char buckets[NUM_WORDS * NUM_CHAR][NUM_CHAR];
    char dict[NUM_WORDS][NUM_CHAR] = {"CAT", "BAT", "COT", "COG", "COW", 
                                        "RAT", "BUT","CUT", "DOG", "WEB"};
    
    create_buckets(buckets, dict);

    bfs(graph_s[0]);
    return 0;
}

This above code needs optimization, please provide suggestions to improve. Breadth First Search implementation is as follows

typedef struct queue{
        Node_S *front;
        Node_S *rear;
}Queue;

int is_empty(Queue *q){
        if(q->front == NULL && q->rear == NULL) return true;
        return false;
}

void enqueue(Queue *q, char value[]){
        Node_S * temp = (Node_S *)malloc(sizeof(Node_S));
        strcpy(temp->value, value);
        if(is_empty(q)){
                q->front = temp;
                q->rear = temp;
        }
        else{
                if(q->rear){
                        q->rear->next = temp;
                        q->rear = temp;
                }
                else{
                        printf("\n Error ");
                }
        }
}

void dequeue(Queue *q, char str[]){
        if(!is_empty(q)){
                strcpy(str, q->front->value);
                Node_S * curr = q->front;
                if(q->front == q->rear){
                        q->rear = NULL;
                        q->front = NULL;
                }
                else{
                        q->front = curr->next;
                }
                free(curr);
        }
}
void init_queue(Queue **q){
        *q = (Queue *)malloc(sizeof(Queue));
        (*q)->front = NULL;
        (*q)->rear = NULL;
}

void bfs_1(Node_S * current, char *second, char dict[NUM_WORDS][NUM_CHAR]){

        Queue *q = NULL;

        init_queue(&q);
        char str[NUM_CHAR];
        int index ;
        if(!current) return;

        enqueue(q, current->value);
        while(!is_empty(q)){
                dequeue(q, str);
                printf("\n%s ", str);
                index = find_index(dict, str);
                visited[index] = true;
                if(!strcmp(str, second)){
                        return;
                }
                Node_S *temp = graph_s[index]->next;
                while(temp){

                        index = find_index(dict, temp->value);
                        if(visited[index] == false){
                                visited[index] = true;
                                enqueue(q, temp->value);
                        }
                        temp = temp->next;
                }
        }

}