Markov Model - Random word/gibberish generator

654 views Asked by At

My code works fine until the random word generating. Sometimes it creates words/gibberish and sometimes it doesn't (probably going through an infinite loop). However, whenever it does create words/gibberish it doesn't seem so "random". The words would either repeat themselves or most of the words will be generating near the same character length.

The problem lies in the def genRandomWord:

import random

def getTransitions(astring):            
    d = {}
    for i in range(len(astring)):
        if astring[i:i+2] in d:
            d[astring[i:i+2]] += 1
        else:
            d[astring[i:i+2]] = 1
    #h = tuple(d.items())    #gets the indexes of the dictionary
    #print(h[2][1])
    if ' ' in d:
        del d[' ']
    return d

def getFirstLetters(astring):
    d = []
    for i in astring:
        if i not in d:
            d.append(i)
    d.remove(' ')
    return d


def letterCount(astring):
    d = {}
    for i in astring:
        if i not in d.keys():
            d[i] = 1
        else:
            d[i] +=1
    d[' ']-= 1
    return d

def getProb(astring):
    d = {}
    h = tuple(getTransitions(astring).items())
    j = tuple(letterCount(astring).items())
    #print("h", h)
    #print()
    #print()
    #print("j", j)
    for i in h:
        for n in j:
            if i[0][0] == n[0]:
                d[i[0]] = i[1]/n[1]
    return d

def genFletter(astring):
    d = {}
    r = random.random()
    fl ='*'
    #print("r",r)
    a = getProb(astring)
    suma = 0
    count = -1
    for i in a:
        if i[0][0] == ' ':
            d[i[1]] = a[i]
    d = sorted(tuple(d.items()))
    #print(d)
    while suma < r:
        count += 1
        suma += d[count][1]
        fl = d[count][0]
        #print(suma)
    return fl

def genRandomWord(astring):
    h = getProb(The_List)
    htrans = tuple(getProb(The_List).keys())
    hprob = tuple(getProb(The_List).values())
    #print(hprob)
    z = genFletter(The_List)
    word = z
    #print(word)
    fletterprob = h[' '+z]
    r = random.random()
    while word[-1]!= ' ':
        index = 0
        suma = 0
        for i in range(len(htrans)):
            if htrans[i][0] == word[-1]:
                index = i
        suma += hprob[index]
        for j in range(len(hprob)):
            if suma >= r:
                word += htrans[index][1]
                break
            else:
                suma += hprob[index]
    return word









The_List = ' steam teams meets teems eat ate state tease test mast mates '

trans = getTransitions(The_List)
lcount = letterCount(The_List)
fletter = getFirstLetters(The_List)
transProb = getProb(The_List)


#Sorting
#print('LETTER TRANSITIONS'+'\n'+str(sorted(trans.items()))+'\n')
#print('LETTER COUNT'+'\n'+str(sorted(lcount.items()))+'\n')
#print('FIRST LETTERS'+'\n'+str(sorted(fletter))+'\n')
#print('TRANSITION PROBABILITIES'+'\n'+str(sorted(transProb.items()))+'\n')

print('LETTER TRANSITIONS'+'\n'+str(trans)+'\n')
print('LETTER COUNT'+'\n'+str(lcount)+'\n')
print('FIRST LETTERS'+'\n'+str(fletter)+'\n')
print('TRANSITION PROBABILITIES'+'\n'+str(transProb)+'\n')


#print(genFletter(The_List))
for i in range(10):
    print("'"+genRandomWord(The_List)+"'")
0

There are 0 answers