Conjunto 1a2167d en modelado_topicos
- Fecha y hora:
- 26/01/2016 16:50:38 (hace 8 años)
- Branches:
- master, preprocesamiento, v1.0
- Children:
- 485135c
- Parents:
- 8ebf4a3
- Ficheros:
-
- 4 editados
Leyenda
- No modificado
- Añadido
- Eliminado
-
django_topic_explorer/settings.py
r80f1533 r1a2167d 105 105 #URL_COMUN='http://192.168.12.126:8000/' 106 106 ## TOPIC EXPLORER SETTINGS 107 #TOPIC_EXPLORER_PATH = '/home/cenditel/Interpretacion/' 107 108 TOPIC_EXPLORER_PATH = '/home/jredondo/Proyectos/Analisis_del_Discurso/src/topic-explorer/' 108 #TOPIC_EXPLORER_PATH = '/home/cenditel/Interpretacion/'109 109 FILES_PATH = TOPIC_EXPLORER_PATH +'demo-data/corpus_propuestas/noaccent' 110 110 MODELS_PATH = TOPIC_EXPLORER_PATH + 'demo-data/corpus_propuestas/models/' 111 #FILES_PATH = TOPIC_EXPLORER_PATH +'demo-data/ap/' 112 #MODELS_PATH = TOPIC_EXPLORER_PATH + 'demo-data/corpus_propuestas/lda2vsm_models/' 111 113 CORPUS_FILE = MODELS_PATH + 'pp-nltk-en-freq5.npz' 114 #MODEL_PATTERN = MODELS_PATH + 'model.npz' 115 112 116 #CORPUS_FILE = MODELS_PATH + 'ap-nltk-en-freq5.npz' 113 MODEL_PATTERN = MODELS_PATH + 'pp-nltk-en-freq5-LDA-K{0}-document-200.npz'117 #MODEL_PATTERN = MODELS_PATH + 'pp-nltk-en-freq5-LDA-K{0}-document-200.npz' 114 118 #MODEL_PATTERN = MODELS_PATH + 'ap-nltk-en-freq5-LDA-K{0}-document-20.npz' 115 119 CONTEXT_TYPE = 'document' 116 TOPICS = '10, 20, 30, 40, 50, 60, 70'120 #TOPICS = '10, 20, 30, 40, 50, 60, 70' 117 121 #TOPICS = '10, 20, 30, 40, 50, 60, 70, 80, 90, 100' 118 #TOPICS = '10, 20, 30, 40, 50, 60'122 TOPICS = '10, 20, 30, 40, 50, 60' 119 123 CORPUS_NAME = 'Deafult' 120 124 ICONS = 'link' … … 125 129 DOC_URL_FORMAT = None 126 130 127 -
templates/topic_explorer/index.html
rbd6e395 r1a2167d 129 129 130 130 $.getJSON('/topic_explorer/docs.json', function(data) { 131 console.log(data); 131 132 $(".typeahead").typeahead({items: 12, 132 133 source: function(query, process) { … … 284 285 var tops; 285 286 d3.json(url, function(error, data) { 286 console.log( data);287 console.log("DATA",data); 287 288 $('#status .bar').css('width', '50%').text('Loading topics...'); 288 289 if (error) { -
topic_explorer/views.py
r431bd02 r1a2167d 1 # coding: utf-8 2 1 3 from django.shortcuts import render 2 4 … … 8 10 9 11 from utils import colorlib 12 from ldac2vsm import * 10 13 import itertools 11 14 from vsm.corpus import Corpus … … 27 30 #path = settings.PATH 28 31 corpus_file = settings.CORPUS_FILE 29 context_type = settings.CONTEXT_TYPE 30 model_pattern = settings.MODEL_PATTERN 32 #context_type = settings.CONTEXT_TYPE 33 context_type = 'propesta' 34 #model_pattern = settings.MODEL_PATTERN 31 35 topics = settings.TOPICS 32 corpus_name = settings.CORPUS_NAME36 #corpus_name = settings.CORPUS_NAME 33 37 icons = settings.ICONS 34 38 … … 40 44 #global lda_m, lda_v 41 45 42 lda_c = Corpus.load(corpus_file) 46 # Integración LDA-c topic_explorer 47 lda_c,lda_m = corpus_model() 48 #lda_c = Corpus.load(corpus_file) 49 #lda_c.save('/home/jredondo/tmp/corpus.npz') 50 lda_v = LDAViewer(lda_c, lda_m) 51 43 52 #lda_m = LCM.load(model_pattern.format(k)) 44 #lda_v = LDAViewer(lda_c, lda_m)45 53 label = lambda x: x 46 54 … … 66 74 67 75 def doc_csv(request, k_param,doc_id,threshold=0.2): 68 lda_m = LCM.load(model_pattern.format(k_param))69 lda_v = LDAViewer(lda_c, lda_m)76 #lda_m = LCM.load(model_pattern.format(k_param)) 77 #lda_v = LDAViewer(lda_c, lda_m) 70 78 data = lda_v.sim_doc_doc(doc_id) 71 79 … … 79 87 def topic_json(request,k_param,topic_no, N=40): 80 88 #global lda_v 81 lda_m = LCM.load(model_pattern.format(k_param))82 lda_v = LDAViewer(lda_c, lda_m)89 #lda_m = LCM.load(model_pattern.format(k_param)) 90 #lda_v = LDAViewer(lda_c, lda_m) 83 91 try: 84 92 N = int(request.query.n) … … 171 179 def index(request): 172 180 global lda_m,lda_v 173 lda_m = LCM.load(model_pattern.format(10))174 lda_v = LDAViewer(lda_c, lda_m)181 #lda_m = LCM.load(model_pattern.format(10)) 182 #lda_v = LDAViewer(lda_c, lda_m) 175 183 template_name = 'topic_explorer/index.html' 176 184 return render(request,template_name, 177 185 {'filename':None, 178 'corpus_name' : corpus_name,186 #'corpus_name' : corpus_name, 179 187 'corpus_link' : corpus_link, 180 188 'context_type' : context_type, … … 185 193 def visualize(request,k_param,filename=None,topic_no=None): 186 194 global lda_m,lda_v 187 lda_m = LCM.load(model_pattern.format(k_param))188 lda_v = LDAViewer(lda_c, lda_m)195 #lda_m = LCM.load(model_pattern.format(k_param)) 196 #lda_v = LDAViewer(lda_c, lda_m) 189 197 template_name = 'topic_explorer/index.html' 190 198 return render(request,template_name, … … 192 200 'k_param':k_param, 193 201 'topic_no':topic_no, 194 'corpus_name' : corpus_name,202 #'corpus_name' : corpus_name, 195 203 'corpus_link' : corpus_link, 196 204 'context_type' : context_type, … … 221 229 archivo.close() 222 230 except: 223 text='No se encontro el documento' 231 return dump_exception() 232 texto='No se encontro el documento' 224 233 return render(request,self.template_name, 225 234 {'topicos':topicos, -
utils/ldac2vsm.py
r80f1533 r1a2167d 3 3 from vsm.corpus import Corpus 4 4 from vsm.model.ldacgsmulti import LdaCgsMulti 5 from vsm.viewer.ldagibbsviewer import LDAGibbsViewer as LDAViewer 5 6 6 7 from vsm.model.ldafunctions import * … … 8 9 import numpy as np 9 10 10 11 path = '/home/jredondo/Proyectos/Analisis_del_Discurso/src/lda-blei/lda-c-dist/output/' 12 #corpus_file = '/home/jredondo/Proyectos/Analisis_del_Discurso/src/vsm_tmp/corpus.dat' 13 corpus_file = '/home/jredondo/Proyectos/Analisis_del_Discurso/src/lda-blei/ap/ap.dat' 14 vocab_file = '/home/jredondo/Proyectos/Analisis_del_Discurso/src/lda-blei/ap/vocab.txt' 11 path = '/home/jredondo/Proyectos/Analisis_del_Discurso/src/lda-blei/lda-c-dist/test50/' 12 corpus_file = '/home/jredondo/Proyectos/Analisis_del_Discurso/src/vsm2ldac/corpus.dat' 13 vocab_file = '/home/jredondo/Proyectos/Analisis_del_Discurso/src/vsm2ldac/vocab.txt' 14 corpus_dir = '/home/jredondo/Proyectos/Analisis_del_Discurso/src/topic-explorer/demo-data/corpus_propuestas/noaccent' 15 #path = '/home/jredondo/Proyectos/Analisis_del_Discurso/src/lda-blei/lda-c-dist/output/' 16 #corpus_file = '/home/jredondo/Proyectos/Analisis_del_Discurso/src/lda-blei/ap/ap.dat' 17 #vocab_file = '/home/jredondo/Proyectos/Analisis_del_Discurso/src/lda-blei/ap/vocab.txt' 15 18 16 19 def likelihood(path=path): … … 99 102 return compute_log_prob(c, z, wt, td) 100 103 101 if __name__=='__main__': 102 104 def corpus_model(path=path): 103 105 z,indices = word_assigments(path) 106 zeta = [] 107 for item in z: 108 zeta.extend(item) 104 109 b = beta(path) 105 110 v = vocab() 106 111 a = alpha_list(z,path) 107 c = import_corpus(corpus_file,vocab_file) 108 m = LdaCgsMulti(corpus=c,K=20,V=v,alpha=a,beta=b) 112 c = import_corpus(corpusfilename=corpus_file, vocabfilename=vocab_file, path=corpus_dir ,context_type='propesta') 113 alpha = [] 114 115 for i in range(len(b)): 116 alpha.append(a) 117 alpha = (np.array(alpha, dtype=np.float).reshape(len(alpha),len(alpha[0]))) 118 119 b = (np.array(b, dtype=np.float).reshape(len(b[0]),len(b))) 120 m = LdaCgsMulti(corpus=c, 121 context_type='propesta', 122 K=50, 123 V=v, 124 #alpha=alpha, 125 #beta=b, 126 Z=np.array(zeta)) 109 127 110 128 129 return c,m 130 131 if __name__=='__main__': 132 print "******************** MAIN **********************" 133 save_path = '/home/jredondo/Proyectos/Analisis_del_Discurso/src/topic-explorer/demo-data/corpus_propuestas/lda2vsm_models/' 134 c,m = corpus_model() 135 #c.save(save_path+'corpus.npz') 136 #save_lda(m,save_path+'model.npz')
Nota: Vea TracChangeset
para ayuda en el uso del visor de conjuntos de cambios.