source: modelado_topicos/vsm/vsm/spatial.py

v1.0
Last change on this file was d81e301, checked in by rudmanmrrod <rudman22@…>, 7 años ago

Agregado submodulo del vsm

  • Propiedad mode establecida a 100644
File size: 7.1 KB
Línea 
1import numpy as np
2from scipy.sparse import coo_matrix, csr_matrix, csc_matrix, issparse
3
4
5
6def count_matrix(arr, slices, m=None):
7    """
8    arr : numpy array
9    slices : list of slices
10    m : integer
11    """
12    if not m:
13        m = arr.max()
14    shape = (m, len(slices))
15
16    data = np.ones_like(arr)
17    row_indices = arr
18    col_indices = np.empty_like(arr)
19    for i,s in enumerate(slices):
20        col_indices[s] = i
21
22    return coo_matrix((data, (row_indices, col_indices)),
23                      shape=shape, dtype=np.int32)
24
25
26def rand_pt_unit_sphere(n, seed=None):
27    """
28    """
29    np.random.seed(seed)
30
31    pt = np.random.normal(size=n)
32    pt = pt / np.dot(pt, pt)**.5
33
34    return pt
35
36
37def naive_cconv(v, w):
38    """
39    """
40    out = np.empty_like(v)
41    for i in xrange(v.shape[0]):
42        out[i] = np.dot(v, np.roll(w[::-1], i + 1))
43
44    return out
45
46
47def scipy_cdist(**kwargs):
48    """
49    Returns a wrapper of `scipy.spatial.distance.cdist`.
50
51    P and Q are arrays of either dimension 1 or 2.
52
53    If P is a matrix, then angles are computed wrt the rows of P. If Q
54    is a matrix, then angles are computed wrt the columns of Q.
55    """
56    from scipy.spatial.distance import cdist
57
58    def dist_fn(P, Q):
59
60        P = P.astype(np.float)
61        Q = Q.astype(np.float)
62
63        if P.ndim < 2:
64            P = P.reshape((1,P.size))
65        if Q.ndim < 2:
66            Q = Q.reshape((Q.size,1))
67           
68        out = cdist(P, Q.T, **kwargs)
69
70        out = np.squeeze(out)
71        if out.ndim == 0:
72            return out[()]
73        return out
74
75    return dist_fn
76
77
78#
79# Functions for computing angular distances between points projected
80# onto an n-sphere
81#
82
83
84def angle(P, Q):
85    """
86    P and Q are arrays of either dimension 1 or 2.
87
88    If P is a matrix, then angles are computed wrt the rows of P. If Q
89    is a matrix, then angles are computed wrt the columns of Q.
90
91    """
92    P = P.astype(np.float)
93    Q = Q.astype(np.float)
94
95    if P.ndim < 2:
96        P = P.reshape((1,P.size))
97    if Q.ndim < 2:
98        Q = Q.reshape((Q.size,1))
99
100    # Normalize P row-wise and Q column-wise
101    P /= np.sqrt((P*P).sum(1)[:,np.newaxis])
102    Q /= np.sqrt((Q*Q).sum(0)[np.newaxis,:])
103
104    cos_PQ = np.dot(P,Q)
105   
106    # Rounding errors may result in values outside of the domain of
107    # inverse cosine.
108    cos_PQ[(cos_PQ > 1)] = 1.
109    cos_PQ[(cos_PQ < -1)] = -1.
110
111    out = np.arccos(cos_PQ)
112
113    out = np.squeeze(out)
114    if out.ndim == 0:
115        return out[()]
116    return out
117
118
119def angle_sparse(P, Q):
120    """
121    P and Q are sparse matrices from scipy.sparse.
122
123    Angles are computed wrt the rows of P and wrt the columns of Q.
124    """
125    P = P.tocsc().astype(np.float)
126    Q = Q.tocsr().astype(np.float)
127
128    # Normalize P row-wise and Q column-wise
129    P_inv_norms = 1 / np.sqrt(P.multiply(P).sum(1))
130    Q_inv_norms = 1 / np.sqrt(Q.multiply(Q).sum(0))
131
132    # Requires scipy version >= 0.13
133    P = P.multiply(csc_matrix(P_inv_norms))
134    Q = Q.multiply(csr_matrix(Q_inv_norms))
135
136    P = P.tocsr()
137    Q = Q.tocsc()
138    cos_PQ = P.dot(Q).toarray()
139
140    # Rounding errors may result in values outside of the domain of
141    # inverse cosine.
142    cos_PQ[(cos_PQ > 1)] = 1.
143    cos_PQ[(cos_PQ < -1)] = -1.
144
145    out = np.arccos(cos_PQ)
146
147    out = np.squeeze(out)
148    if out.ndim == 0:
149        return out[()]
150    return out
151
152
153#
154# Functions for computing information theoretic distances
155#
156
157
158def H(P):
159    """
160    P is an array of dimension 1 or 2.
161
162    If P is a matrix, entropy is computed row-wise. (P is assumed to
163    be left stochastic)
164    """
165    P = P.astype(np.float)
166
167    if P.ndim < 2:
168        P = P.reshape((1,P.size))
169
170    # Allow negative infinity without complaint
171    old_settings = np.seterr(divide='ignore')
172    logP = np.log2(P)
173    np.seterr(**old_settings)
174
175    logP[(P == 0)] = 0.
176    out = -1 * (P * logP).sum(1)
177
178    out = np.squeeze(out)
179    if out.ndim == 0:
180        return out[()]
181    return out
182
183
184def cross_H(P,Q):
185    """
186    P and Q are arrays of either dimension 1 or 2.
187
188    If P is a matrix, then cross entropy is computed row-wise on P. (P
189    is assumed to be left stochastic.) If Q is a matrix, cross entropy
190    is computed column-wise on Q. (Q is assumed to be right
191    stochastic.)
192    """
193    P = P.astype(np.float)
194    Q = Q.astype(np.float)
195
196    if P.ndim < 2:
197        P = P.reshape((1,P.size))
198    if Q.ndim < 2:
199        Q = Q.reshape((Q.size,1))
200
201    P_ = np.tile(P.reshape(P.shape[0], P.shape[1], 1), (1,1,Q.shape[1]))
202    Q_ = np.tile(Q.reshape(1, Q.shape[0], Q.shape[1]), (P.shape[0],1,1))
203
204    # Allow negative infinity without complaint
205    old_settings = np.seterr(divide='ignore')
206    logQ_ = np.tile(np.log2(Q.reshape(1, Q.shape[0], Q.shape[1])), 
207                    (P.shape[0],1,1))
208    np.seterr(**old_settings)
209
210    out = (P_ * logQ_)
211
212    P_zeros = (P_ == 0)
213    Q_zeros = (Q_ == 0)
214    PQ_zeros = np.bitwise_and(P_zeros, Q_zeros)
215    out[PQ_zeros] = 0.
216
217    out = np.squeeze(-1 * out.sum(1))
218    if out.ndim == 0:
219        return out[()]
220    return out
221
222
223def KL_div(P, Q):
224    """
225    P and Q are arrays of either dimension 1 or 2.
226
227    If P is a matrix, then KL divergence is computed row-wise on P. (P
228    is assumed to be left stochastic.) If Q is a matrix, KL divergence
229    is computed column-wise on Q. (Q is assumed to be right
230    stochastic.)
231    """
232    P = P.astype(np.float)
233    Q = Q.astype(np.float)
234
235    if P.ndim < 2:
236        return np.squeeze(cross_H(P, Q) - H(P))
237    out = np.squeeze(cross_H(P, Q) - H(P)[:,np.newaxis])
238    if out.ndim == 0:
239        return out[()]
240    return out
241
242
243def JS_div(P, Q, metric=False):
244    """
245    P and Q are arrays of either dimension 1 or 2.
246
247    If P is a matrix, then JS divergence is computed row-wise on P. (P
248    is assumed to be left stochastic.) If Q is a matrix, JS divergence
249    is computed column-wise on Q. (Q is assumed to be right
250    stochastic.)
251   
252    The square root of the JS divergence is a metric.
253    """
254    P = P.astype(np.float)
255    Q = Q.astype(np.float)
256
257    if P.ndim < 2:
258        P = P.reshape((1,P.size))
259    if Q.ndim < 2:
260        Q = Q.reshape((Q.size,1))
261   
262    P_ = np.tile(P[:,:,np.newaxis], (1,1,Q.shape[1]))
263    Q_ = np.tile(Q[np.newaxis,:,:], (P.shape[0],1,1))
264    M_ = .5 * (P_ + Q_)
265
266    # Allow negative infinity without complaint
267    old_settings = np.seterr(divide='ignore')
268    logM_ = np.log2(M_)
269    np.seterr(**old_settings)
270
271    P_zeros = (P_ == 0)
272    Q_zeros = (Q_ == 0)
273    PQ_zeros = np.bitwise_and(P_zeros, Q_zeros)
274    logM_[PQ_zeros] = 0.
275
276    HP = H(P)
277    HQ = H(Q.T)
278
279    PM_KLdiv = -1 * (P_ * logM_).sum(1) - HP.reshape(HP.size,1)
280    QM_KLdiv = -1 * (Q_ * logM_).sum(1) - HQ.reshape(1,HQ.size) 
281
282    out = .5 * (PM_KLdiv + QM_KLdiv)
283
284    out[out < 0] = 0.
285   
286    if metric:
287        out = np.sqrt(out)
288
289    out = np.squeeze(out)
290    if out.ndim == 0:
291        return out[()]
292    return out
293
294
295def JS_dist(P, Q):
296    """
297    P and Q are arrays of either dimension 1 or 2.
298
299    If P is a matrix, then JS distance is computed row-wise on P. (P
300    is assumed to be left stochastic.) If Q is a matrix, JS divergence
301    is computed column-wise on Q. (Q is assumed to be right
302    stochastic.)
303    """
304    return JS_div(P, Q, metric=True)
Nota: Vea TracBrowser para ayuda de uso del navegador del repositorio.