baseconstituyenteestudiantesgeneralplan_patriasala
Last change
on this file since 7095598 was
0ff122b,
checked in by rudmanmrrod <rudman22@…>, 7 años ago
|
Agregado módulo de gestión de perfiles de procesamiento, incorporado el módulo de visualización de modelado de tópicos
|
-
Propiedad mode establecida a
100644
|
File size:
1.3 KB
|
Línea | |
---|
1 | from finalcorpus import * |
---|
2 | import gzip |
---|
3 | |
---|
4 | start_idx = 0 |
---|
5 | m = lda_m[20] |
---|
6 | metadata = c.view_metadata(m.context_type) |
---|
7 | |
---|
8 | def export_model(): |
---|
9 | with gzip.open('model_to_mallet.gz', 'wb') as f: |
---|
10 | f.write("#doc source pos typeindex type topic") |
---|
11 | alpha = m.alpha |
---|
12 | f.write(alpha) |
---|
13 | beta = m.beta |
---|
14 | |
---|
15 | for end_idx, doc in metadata: |
---|
16 | for i in range(start_idx, end_idx): |
---|
17 | doc = doc |
---|
18 | source = "/" |
---|
19 | pos = i |
---|
20 | typeIndex = c.corpus[i] |
---|
21 | ttype = c.words[c.corpus[i]] |
---|
22 | topic = m.Z[i] |
---|
23 | line = "{} {} {} {} {} {}\n".format(doc, source, pos, typeIndex, ttype, topic) |
---|
24 | f.write(line) |
---|
25 | start_idx = end_idx |
---|
26 | |
---|
27 | |
---|
28 | def import_model(): |
---|
29 | startPos = [] |
---|
30 | corpus = [] |
---|
31 | z = [] |
---|
32 | words = {} |
---|
33 | prevDoc = 0; |
---|
34 | |
---|
35 | with gzip.open('topic-state.gz', 'rb') as f: |
---|
36 | for i, line in enumerate(f, start = -3): |
---|
37 | #skip first three lines with header info |
---|
38 | if i >= 0: |
---|
39 | #columns - #doc source pos typeindex type topic |
---|
40 | doc, _, _, typeindex, type, topic = line.split() |
---|
41 | corpus.append(typeindex) |
---|
42 | z.append(topic) |
---|
43 | words[typeindex] = type |
---|
44 | if doc != prevDoc: |
---|
45 | startPos.append(i) |
---|
46 | prevDoc = doc |
---|
47 | |
---|
Nota: Vea
TracBrowser
para ayuda de uso del navegador del repositorio.