from finalcorpus import * import gzip start_idx = 0 m = lda_m[20] metadata = c.view_metadata(m.context_type) def export_model(): with gzip.open('model_to_mallet.gz', 'wb') as f: f.write("#doc source pos typeindex type topic") alpha = m.alpha f.write(alpha) beta = m.beta for end_idx, doc in metadata: for i in range(start_idx, end_idx): doc = doc source = "/" pos = i typeIndex = c.corpus[i] ttype = c.words[c.corpus[i]] topic = m.Z[i] line = "{} {} {} {} {} {}\n".format(doc, source, pos, typeIndex, ttype, topic) f.write(line) start_idx = end_idx def import_model(): startPos = [] corpus = [] z = [] words = {} prevDoc = 0; with gzip.open('topic-state.gz', 'rb') as f: for i, line in enumerate(f, start = -3): #skip first three lines with header info if i >= 0: #columns - #doc source pos typeindex type topic doc, _, _, typeindex, type, topic = line.split() corpus.append(typeindex) z.append(topic) words[typeindex] = type if doc != prevDoc: startPos.append(i) prevDoc = doc