Navegación de contexto

source: modelado_topicos/vsm/vsm/spatial.py

v1.0

Last change on this file was d81e301, checked in by rudmanmrrod <rudman22@…>, 7 años ago
Agregado submodulo del vsm
Propiedad mode establecida a `100644`
File size: 7.1 KB

Línea
1	import numpy as np
2	from scipy.sparse import coo_matrix, csr_matrix, csc_matrix, issparse
3
4
5
6	def count_matrix(arr, slices, m=None):
7	"""
8	arr : numpy array
9	slices : list of slices
10	m : integer
11	"""
12	if not m:
13	m = arr.max()
14	shape = (m, len(slices))
15
16	data = np.ones_like(arr)
17	row_indices = arr
18	col_indices = np.empty_like(arr)
19	for i,s in enumerate(slices):
20	col_indices[s] = i
21
22	return coo_matrix((data, (row_indices, col_indices)),
23	shape=shape, dtype=np.int32)
24
25
26	def rand_pt_unit_sphere(n, seed=None):
27	"""
28	"""
29	np.random.seed(seed)
30
31	pt = np.random.normal(size=n)
32	pt = pt / np.dot(pt, pt)**.5
33
34	return pt
35
36
37	def naive_cconv(v, w):
38	"""
39	"""
40	out = np.empty_like(v)
41	for i in xrange(v.shape[0]):
42	out[i] = np.dot(v, np.roll(w[::-1], i + 1))
43
44	return out
45
46
47	def scipy_cdist(**kwargs):
48	"""
49	Returns a wrapper of `scipy.spatial.distance.cdist`.
50
51	P and Q are arrays of either dimension 1 or 2.
52
53	If P is a matrix, then angles are computed wrt the rows of P. If Q
54	is a matrix, then angles are computed wrt the columns of Q.
55	"""
56	from scipy.spatial.distance import cdist
57
58	def dist_fn(P, Q):
59
60	P = P.astype(np.float)
61	Q = Q.astype(np.float)
62
63	if P.ndim < 2:
64	P = P.reshape((1,P.size))
65	if Q.ndim < 2:
66	Q = Q.reshape((Q.size,1))
67
68	out = cdist(P, Q.T, **kwargs)
69
70	out = np.squeeze(out)
71	if out.ndim == 0:
72	return out[()]
73	return out
74
75	return dist_fn
76
77
78	#
79	# Functions for computing angular distances between points projected
80	# onto an n-sphere
81	#
82
83
84	def angle(P, Q):
85	"""
86	P and Q are arrays of either dimension 1 or 2.
87
88	If P is a matrix, then angles are computed wrt the rows of P. If Q
89	is a matrix, then angles are computed wrt the columns of Q.
90
91	"""
92	P = P.astype(np.float)
93	Q = Q.astype(np.float)
94
95	if P.ndim < 2:
96	P = P.reshape((1,P.size))
97	if Q.ndim < 2:
98	Q = Q.reshape((Q.size,1))
99
100	# Normalize P row-wise and Q column-wise
101	P /= np.sqrt((P*P).sum(1)[:,np.newaxis])
102	Q /= np.sqrt((Q*Q).sum(0)[np.newaxis,:])
103
104	cos_PQ = np.dot(P,Q)
105
106	# Rounding errors may result in values outside of the domain of
107	# inverse cosine.
108	cos_PQ[(cos_PQ > 1)] = 1.
109	cos_PQ[(cos_PQ < -1)] = -1.
110
111	out = np.arccos(cos_PQ)
112
113	out = np.squeeze(out)
114	if out.ndim == 0:
115	return out[()]
116	return out
117
118
119	def angle_sparse(P, Q):
120	"""
121	P and Q are sparse matrices from scipy.sparse.
122
123	Angles are computed wrt the rows of P and wrt the columns of Q.
124	"""
125	P = P.tocsc().astype(np.float)
126	Q = Q.tocsr().astype(np.float)
127
128	# Normalize P row-wise and Q column-wise
129	P_inv_norms = 1 / np.sqrt(P.multiply(P).sum(1))
130	Q_inv_norms = 1 / np.sqrt(Q.multiply(Q).sum(0))
131
132	# Requires scipy version >= 0.13
133	P = P.multiply(csc_matrix(P_inv_norms))
134	Q = Q.multiply(csr_matrix(Q_inv_norms))
135
136	P = P.tocsr()
137	Q = Q.tocsc()
138	cos_PQ = P.dot(Q).toarray()
139
140	# Rounding errors may result in values outside of the domain of
141	# inverse cosine.
142	cos_PQ[(cos_PQ > 1)] = 1.
143	cos_PQ[(cos_PQ < -1)] = -1.
144
145	out = np.arccos(cos_PQ)
146
147	out = np.squeeze(out)
148	if out.ndim == 0:
149	return out[()]
150	return out
151
152
153	#
154	# Functions for computing information theoretic distances
155	#
156
157
158	def H(P):
159	"""
160	P is an array of dimension 1 or 2.
161
162	If P is a matrix, entropy is computed row-wise. (P is assumed to
163	be left stochastic)
164	"""
165	P = P.astype(np.float)
166
167	if P.ndim < 2:
168	P = P.reshape((1,P.size))
169
170	# Allow negative infinity without complaint
171	old_settings = np.seterr(divide='ignore')
172	logP = np.log2(P)
173	np.seterr(**old_settings)
174
175	logP[(P == 0)] = 0.
176	out = -1 * (P * logP).sum(1)
177
178	out = np.squeeze(out)
179	if out.ndim == 0:
180	return out[()]
181	return out
182
183
184	def cross_H(P,Q):
185	"""
186	P and Q are arrays of either dimension 1 or 2.
187
188	If P is a matrix, then cross entropy is computed row-wise on P. (P
189	is assumed to be left stochastic.) If Q is a matrix, cross entropy
190	is computed column-wise on Q. (Q is assumed to be right
191	stochastic.)
192	"""
193	P = P.astype(np.float)
194	Q = Q.astype(np.float)
195
196	if P.ndim < 2:
197	P = P.reshape((1,P.size))
198	if Q.ndim < 2:
199	Q = Q.reshape((Q.size,1))
200
201	P_ = np.tile(P.reshape(P.shape[0], P.shape[1], 1), (1,1,Q.shape[1]))
202	Q_ = np.tile(Q.reshape(1, Q.shape[0], Q.shape[1]), (P.shape[0],1,1))
203
204	# Allow negative infinity without complaint
205	old_settings = np.seterr(divide='ignore')
206	logQ_ = np.tile(np.log2(Q.reshape(1, Q.shape[0], Q.shape[1])),
207	(P.shape[0],1,1))
208	np.seterr(**old_settings)
209
210	out = (P_ * logQ_)
211
212	P_zeros = (P_ == 0)
213	Q_zeros = (Q_ == 0)
214	PQ_zeros = np.bitwise_and(P_zeros, Q_zeros)
215	out[PQ_zeros] = 0.
216
217	out = np.squeeze(-1 * out.sum(1))
218	if out.ndim == 0:
219	return out[()]
220	return out
221
222
223	def KL_div(P, Q):
224	"""
225	P and Q are arrays of either dimension 1 or 2.
226
227	If P is a matrix, then KL divergence is computed row-wise on P. (P
228	is assumed to be left stochastic.) If Q is a matrix, KL divergence
229	is computed column-wise on Q. (Q is assumed to be right
230	stochastic.)
231	"""
232	P = P.astype(np.float)
233	Q = Q.astype(np.float)
234
235	if P.ndim < 2:
236	return np.squeeze(cross_H(P, Q) - H(P))
237	out = np.squeeze(cross_H(P, Q) - H(P)[:,np.newaxis])
238	if out.ndim == 0:
239	return out[()]
240	return out
241
242
243	def JS_div(P, Q, metric=False):
244	"""
245	P and Q are arrays of either dimension 1 or 2.
246
247	If P is a matrix, then JS divergence is computed row-wise on P. (P
248	is assumed to be left stochastic.) If Q is a matrix, JS divergence
249	is computed column-wise on Q. (Q is assumed to be right
250	stochastic.)
251
252	The square root of the JS divergence is a metric.
253	"""
254	P = P.astype(np.float)
255	Q = Q.astype(np.float)
256
257	if P.ndim < 2:
258	P = P.reshape((1,P.size))
259	if Q.ndim < 2:
260	Q = Q.reshape((Q.size,1))
261
262	P_ = np.tile(P[:,:,np.newaxis], (1,1,Q.shape[1]))
263	Q_ = np.tile(Q[np.newaxis,:,:], (P.shape[0],1,1))
264	M_ = .5 * (P_ + Q_)
265
266	# Allow negative infinity without complaint
267	old_settings = np.seterr(divide='ignore')
268	logM_ = np.log2(M_)
269	np.seterr(**old_settings)
270
271	P_zeros = (P_ == 0)
272	Q_zeros = (Q_ == 0)
273	PQ_zeros = np.bitwise_and(P_zeros, Q_zeros)
274	logM_[PQ_zeros] = 0.
275
276	HP = H(P)
277	HQ = H(Q.T)
278
279	PM_KLdiv = -1 * (P_ * logM_).sum(1) - HP.reshape(HP.size,1)
280	QM_KLdiv = -1 * (Q_ * logM_).sum(1) - HQ.reshape(1,HQ.size)
281
282	out = .5 * (PM_KLdiv + QM_KLdiv)
283
284	out[out < 0] = 0.
285
286	if metric:
287	out = np.sqrt(out)
288
289	out = np.squeeze(out)
290	if out.ndim == 0:
291	return out[()]
292	return out
293
294
295	def JS_dist(P, Q):
296	"""
297	P and Q are arrays of either dimension 1 or 2.
298
299	If P is a matrix, then JS distance is computed row-wise on P. (P
300	is assumed to be left stochastic.) If Q is a matrix, JS divergence
301	is computed column-wise on Q. (Q is assumed to be right
302	stochastic.)
303	"""
304	return JS_div(P, Q, metric=True)

Nota: Vea TracBrowser para ayuda de uso del navegador del repositorio.

Descargar en otros formatos: