diff --git a/scripts/loaddata.py b/scripts/example_loaddata.py similarity index 100% rename from scripts/loaddata.py rename to scripts/example_loaddata.py diff --git a/scripts/test.py b/scripts/example_test.py similarity index 100% rename from scripts/test.py rename to scripts/example_test.py diff --git a/scripts/isomap_large.py b/scripts/isomap_large.py index 4488408d23174d1b8bc0e12f9cebeafb769acf3e..602b2249a93c26a38d6596a9601d6a30bd709987 100644 --- a/scripts/isomap_large.py +++ b/scripts/isomap_large.py @@ -4,6 +4,9 @@ Created on Tue Jun 12 15:40:44 2018 @author: xiayu + +the first index contains a num of 12048 points, whitch is too large for PC's +memory capacity. run this on a server. """ import numpy as np @@ -16,10 +19,8 @@ with np.load('../data/stddata.npz') as d: with np.load('../data/sifteredindex.npz') as d: data = d['data'] -X_ind = data[6]['index'] +X_ind = data[0]['index'] iso = manifold.Isomap(30, n_components=12) phi_s = getmapped(phi, X_ind, iso) -iso2 = manifold.Isomap(30, n_components=2) -phi_s2 = getmapped(phi, X_ind, iso2) -#np.savez_compressed('../data/sifteredisomappeddatalarge',data=phi_s) +np.savez_compressed('../data/sifteredisomappeddatalarge',data=phi_s) diff --git a/scripts/isomappedtomat.py b/scripts/isomappedtomat.py index 84a65b1e68274df2b36534f3c78f324ed42ef6aa..0d8af4c3dfc4fa9312040a08dee772b812d1bdd3 100644 --- a/scripts/isomappedtomat.py +++ b/scripts/isomappedtomat.py @@ -6,10 +6,6 @@ Created on Tue Jun 12 16:38:07 2018 @author: xiayu """ -from scipy.io import savemat -import numpy as np - -with np.load('../data/stddata.npz') as d: - savemat('../data/stddata.mat',d) - +from pkgs.convertor import npz2mat +npz2mat('../data/isomappeddata.npz','../data/isomappeddata.mat') diff --git a/scripts/pkgs/__pycache__/__init__.cpython-36.pyc b/scripts/pkgs/__pycache__/__init__.cpython-36.pyc index fd86a4153888db0802bde6b1e5c331f979d74a1b..f8392d92ce392d7725e41bba55b436acc1fe63b0 100644 Binary files a/scripts/pkgs/__pycache__/__init__.cpython-36.pyc and b/scripts/pkgs/__pycache__/__init__.cpython-36.pyc differ diff --git a/scripts/pkgs/__pycache__/convertor.cpython-36.pyc b/scripts/pkgs/__pycache__/convertor.cpython-36.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fa5d399cb714dae0c8278a003736c664b469a95e Binary files /dev/null and b/scripts/pkgs/__pycache__/convertor.cpython-36.pyc differ diff --git a/scripts/pkgs/convertor.py b/scripts/pkgs/convertor.py new file mode 100644 index 0000000000000000000000000000000000000000..3aec6f85bdf18f36b06dcfcae628c8a4d87f7b5c --- /dev/null +++ b/scripts/pkgs/convertor.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +Created on Thu Jun 14 11:30:48 2018 + +@author: xiayu +""" + +from scipy.io import savemat +from numpy import load + +def npz2mat(npz, mat): + with load(npz) as d: + savemat(mat, d) + pass + diff --git a/scripts/pkgs/mapping.py b/scripts/pkgs/mapping.py index 6c26498fc144ec30dcda0d9388c2870b4a3bf606..9815b547f770610b5c27ae485d876ab423b34fbf 100644 --- a/scripts/pkgs/mapping.py +++ b/scripts/pkgs/mapping.py @@ -9,6 +9,15 @@ Created on Tue Jun 12 11:30:26 2018 import numpy as np def getmapped(phi, X_ind, mapping): + ''' + :para phi: full set of points. + :para X_ind: index of points which are seclected as sample for mapping. + normally we don't use full set of points for mapping analysis. + :para mapping: mapping object from sklearn.manifold. + :return: full set of mapped points. selected points are directly mapped, + while the rest points are mapped by a out-of-sample algorithm + provided by the mapping object. + ''' X = phi[X_ind] X_s = mapping.fit_transform(X) diff --git a/scripts/pkgs/sifter.py b/scripts/pkgs/sifter.py index f457633c19d982e8046b33c986074257c607014b..236bbec3609bc9fa8a836c1a4f77b221e2d8022c 100644 --- a/scripts/pkgs/sifter.py +++ b/scripts/pkgs/sifter.py @@ -10,6 +10,11 @@ import numpy as np from sklearn.neighbors import NearestNeighbors def radius_sifter(X,radius): + """ + sifter X with radius, so that every two points remain is + with distance > radius + """ + neigh = NearestNeighbors(radius=radius) indices = np.empty(X.shape[0],dtype=int) ind_rest = np.arange(X.shape[0]) diff --git a/scripts/sifterindex.py b/scripts/sifterindex.py deleted file mode 100644 index 1abe0a0d0ed1da3a7f66f5a350a5638c81624f1c..0000000000000000000000000000000000000000 --- a/scripts/sifterindex.py +++ /dev/null @@ -1,21 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Tue Jun 12 11:57:35 2018 - -@author: xiayu -""" - -import numpy as np - -with np.load('../data/siftereddata.npz') as d: - data = d['data'] - -dt = np.dtype([('index', 'O')]) -a = np.array([],dtype=dt) - -for i in range(1,8): - b = np.array([(data[i]['index'],)],dtype=dt) - a = np.append(a,b) - -np.savez_compressed('../data/sifteredindex',data=a) \ No newline at end of file diff --git a/scripts/convertdata.py b/scripts/step1_convert_data.py similarity index 100% rename from scripts/convertdata.py rename to scripts/step1_convert_data.py diff --git a/scripts/scaleddata.py b/scripts/step2_scale_data.py similarity index 100% rename from scripts/scaleddata.py rename to scripts/step2_scale_data.py diff --git a/scripts/get_sifter.py b/scripts/step3_sift_data.py similarity index 59% rename from scripts/get_sifter.py rename to scripts/step3_sift_data.py index 5fb39192357486f7c29ac2cfa632477ac035b494..ac622e6a4379856a4135e4f5ee312fc0b50034ff 100644 --- a/scripts/get_sifter.py +++ b/scripts/step3_sift_data.py @@ -4,6 +4,8 @@ Created on Sun Jan 7 17:08:10 2018 @author: xiayu + +only index is required for the sake of space saving """ from pkgs.sifter import radius_sifter @@ -25,4 +27,16 @@ for r in radius_list: b = np.array([(phi,ind)],dtype=dt) a = np.append(a,b) -np.savez_compressed('../data/siftereddata',data=a) \ No newline at end of file +np.savez_compressed('../data/siftereddata',data=a) + +with np.load('../data/siftereddata.npz') as d: + data = d['data'] + +dt = np.dtype([('index', 'O')]) +a = np.array([],dtype=dt) + +for i in range(1,8): + b = np.array([(data[i]['index'],)],dtype=dt) + a = np.append(a,b) + +np.savez_compressed('../data/sifteredindex',data=a) \ No newline at end of file