ML: Word2Vec from a dict

Hi everyone,
I am trying to load a Word2Vec object from a dictionary but I get a message error (UnpicklingError: could not find MARK) when I try to read the model from a .BIN
I’ve read the dictionary fro a .txt file downloaded from nlp.stanford.edu/projects/glove

Anyone can help me???

This my script:

import gensim
from tqdm import tqdm

def save_word2vec_format(fname, vocab, vector_size, binary=True):
    total_vec = len(vocab)
    with gensim.utils.open(fname, 'wb') as fout:
        fout.write(gensim.utils.to_utf8("%s %s\n" % (total_vec, vector_size)))
        for word, row in tqdm(vocab.items()):
            if binary:
                row = row.astype(np.float32)
                fout.write(gensim.utils.to_utf8(word) + b" " + row.tobytes())
            else:
                fout.write(gensim.utils.to_utf8("%s %s\n" % (word, ' '.join(repr(val) for val in row))))

save_word2vec_format(binary=True, fname='ppl6B50d.bin', vocab=w,vector_size=50)

# load model
new_model = gensim.models.Word2Vec.load('./ppl6B50d.bin')
print(new_model)     ## >>> UnpicklingError: could not find MARK
1 Like