201603 - JasonWayne/personal-wiki GitHub Wiki

2016年第三周

20160111 - 20160117

2016-01-11 11:18:24

import random

l = [_ for _ in xrange(1, 100)]
l.append(2016)

experiment_times = 100000

total = 0
for i in xrange(experiment_times):
    # 这个问题的实际结果,和sample_size有关,因为sample_size决定了每个数出现在实际抽样数列中的概率。
    sample_size = random.randint(0, 100)
    # sample_size = 50
    # 这种双重随机的采样方式,决定了每个数在最终的采样序列的概率是1/2,因为平均的采样长度会是总长度的一半,因此可以用邹博在ppt中的那个公式
    samples = random.sample(l, sample_size)

    result = 0
    for sample in samples:
        result ^= sample 
    
    total += result
print total / experiment_times

python sample的代码阅读

    def sample(self, population, k):
        """Chooses k unique random elements from a population sequence.

        Returns a new list containing elements from the population while
        leaving the original population unchanged.  The resulting list is
        in selection order so that all sub-slices will also be valid random
        samples.  This allows raffle winners (the sample) to be partitioned
        into grand prize and second place winners (the subslices).

        Members of the population need not be hashable or unique.  If the
        population contains repeats, then each occurrence is a possible
        selection in the sample.

        To choose a sample in a range of integers, use xrange as an argument.
        This is especially fast and space efficient for sampling from a
        large population:   sample(xrange(10000000), 60)
        """

        # Sampling without replacement entails tracking either potential
        # selections (the pool) in a list or previous selections in a set.

        # When the number of selections is small compared to the
        # population, then tracking selections is efficient, requiring
        # only a small set and an occasional reselection.  For
        # a larger number of selections, the pool tracking method is
        # preferred since the list takes less space than the
        # set and it doesn't suffer from frequent reselections.

        n = len(population)
        if not 0 <= k <= n:
            raise ValueError("sample larger than population")
        random = self.random
        _int = int
        result = [None] * k
        setsize = 21        # size of a small set minus size of an empty list
        if k > 5:
            setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets
        if n <= setsize or hasattr(population, "keys"):
            # An n-length list is smaller than a k-length set, or this is a
            # mapping type so the other algorithm wouldn't work.
            pool = list(population)
            for i in xrange(k):         # invariant:  non-selected at [0,n-i)
                j = _int(random() * (n-i))
                result[i] = pool[j]
                pool[j] = pool[n-i-1]   # move non-selected item into vacancy
        else:
            try:
                selected = set()
                selected_add = selected.add
                for i in xrange(k):
                    j = _int(random() * n)
                    while j in selected:
                        j = _int(random() * n)
                    selected_add(j)
                    result[i] = population[j]
            except (TypeError, KeyError):   # handle (at least) sets
                if isinstance(population, list):
                    raise
                return self.sample(tuple(population), k)
        return result

2016-01-11 16:13:28

# http://stackoverflow.com/questions/17213607/how-to-install-python-for-one-user-on-centos中larsmans的答案
# 安装python到指定目录
mkdir -p ~/sw/src
cd ~/sw/src
wget wget https://www.python.org/ftp/python/2.7.11/Python-2.7.11.tgz --no-check-certificate

./configure --prefix=/data0/wenjie/sw
make
make install

# 建立软连接
ln -s /data0/wenjie/sw/bin/python /usr/bin/wjpython

# 安装pip(失败)
wget -e "http_proxy=10.39.6.24:8087" https://bootstrap.pypa.io/get-pip.py --no-check-certificate
sudo /data0/wenjie/sw/bin/python2 get-pip.py --verbose -i http://pypi.douban.com/simple/

# 直接下包来安装
# 先安装setuptools,再安装pip,方法一样,这里只写安装setuptools的方法
# 打开http://pypi.douban.com/simple/setuptools,找个版本下下来
wget http://pypi.douban.com/packages/source/s/setuptools/setuptools-19.2.tar.gz#md5=78353b1f80375ca5e088f4b4627ffe03
tar zxvf setuptools-19.2.tar.gz
wjpython setup.py install --prefix=/data0/wenjie/sw

# 省去了下载安装pip的步骤,和安装setuptools一模一样
ln -s /data0/wenjie/sw/bin/pip /usr/bin/wjpip

2016-01-12 11:11:09 +0800

wjipython notebook --ip=* --port=7438 --no-browser
wjpip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com scikit-learn
print ", ".join(map(m, model.most_similar(u"韩国")))
# 在35上
git clone ssh://git@localhost:5928/data0/wenjie/git_repo/word2vec-src.git

# 在本机
git clone ssh://[email protected]:5928/data0/wenjie/git_repo/word2vec-src.git

2016-01-13 15:02:36

ps -aux | grep "run-half" | awk '{print $2}' | xargs kill -9
SHOW TABLES '*whole*';
scp -r -P 5928 data/* [email protected]:/data0/wenjie/word2vec/corpus_raw/
⚠️ **GitHub.com Fallback** ⚠️