201603 - JasonWayne/personal-wiki GitHub Wiki
import random
l = [_ for _ in xrange(1, 100)]
l.append(2016)
experiment_times = 100000
total = 0
for i in xrange(experiment_times):
# 这个问题的实际结果,和sample_size有关,因为sample_size决定了每个数出现在实际抽样数列中的概率。
sample_size = random.randint(0, 100)
# sample_size = 50
# 这种双重随机的采样方式,决定了每个数在最终的采样序列的概率是1/2,因为平均的采样长度会是总长度的一半,因此可以用邹博在ppt中的那个公式
samples = random.sample(l, sample_size)
result = 0
for sample in samples:
result ^= sample
total += result
print total / experiment_times
python sample的代码阅读
def sample(self, population, k):
"""Chooses k unique random elements from a population sequence.
Returns a new list containing elements from the population while
leaving the original population unchanged. The resulting list is
in selection order so that all sub-slices will also be valid random
samples. This allows raffle winners (the sample) to be partitioned
into grand prize and second place winners (the subslices).
Members of the population need not be hashable or unique. If the
population contains repeats, then each occurrence is a possible
selection in the sample.
To choose a sample in a range of integers, use xrange as an argument.
This is especially fast and space efficient for sampling from a
large population: sample(xrange(10000000), 60)
"""
# Sampling without replacement entails tracking either potential
# selections (the pool) in a list or previous selections in a set.
# When the number of selections is small compared to the
# population, then tracking selections is efficient, requiring
# only a small set and an occasional reselection. For
# a larger number of selections, the pool tracking method is
# preferred since the list takes less space than the
# set and it doesn't suffer from frequent reselections.
n = len(population)
if not 0 <= k <= n:
raise ValueError("sample larger than population")
random = self.random
_int = int
result = [None] * k
setsize = 21 # size of a small set minus size of an empty list
if k > 5:
setsize += 4 ** _ceil(_log(k * 3, 4)) # table size for big sets
if n <= setsize or hasattr(population, "keys"):
# An n-length list is smaller than a k-length set, or this is a
# mapping type so the other algorithm wouldn't work.
pool = list(population)
for i in xrange(k): # invariant: non-selected at [0,n-i)
j = _int(random() * (n-i))
result[i] = pool[j]
pool[j] = pool[n-i-1] # move non-selected item into vacancy
else:
try:
selected = set()
selected_add = selected.add
for i in xrange(k):
j = _int(random() * n)
while j in selected:
j = _int(random() * n)
selected_add(j)
result[i] = population[j]
except (TypeError, KeyError): # handle (at least) sets
if isinstance(population, list):
raise
return self.sample(tuple(population), k)
return result
# http://stackoverflow.com/questions/17213607/how-to-install-python-for-one-user-on-centos中larsmans的答案
# 安装python到指定目录
mkdir -p ~/sw/src
cd ~/sw/src
wget wget https://www.python.org/ftp/python/2.7.11/Python-2.7.11.tgz --no-check-certificate
./configure --prefix=/data0/wenjie/sw
make
make install
# 建立软连接
ln -s /data0/wenjie/sw/bin/python /usr/bin/wjpython
# 安装pip(失败)
wget -e "http_proxy=10.39.6.24:8087" https://bootstrap.pypa.io/get-pip.py --no-check-certificate
sudo /data0/wenjie/sw/bin/python2 get-pip.py --verbose -i http://pypi.douban.com/simple/
# 直接下包来安装
# 先安装setuptools,再安装pip,方法一样,这里只写安装setuptools的方法
# 打开http://pypi.douban.com/simple/setuptools,找个版本下下来
wget http://pypi.douban.com/packages/source/s/setuptools/setuptools-19.2.tar.gz#md5=78353b1f80375ca5e088f4b4627ffe03
tar zxvf setuptools-19.2.tar.gz
wjpython setup.py install --prefix=/data0/wenjie/sw
# 省去了下载安装pip的步骤,和安装setuptools一模一样
ln -s /data0/wenjie/sw/bin/pip /usr/bin/wjpip
wjipython notebook --ip=* --port=7438 --no-browser
wjpip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com scikit-learn
print ", ".join(map(m, model.most_similar(u"韩国")))
# 在35上
git clone ssh://git@localhost:5928/data0/wenjie/git_repo/word2vec-src.git
# 在本机
git clone ssh://[email protected]:5928/data0/wenjie/git_repo/word2vec-src.git
ps -aux | grep "run-half" | awk '{print $2}' | xargs kill -9
SHOW TABLES '*whole*';
scp -r -P 5928 data/* [email protected]:/data0/wenjie/word2vec/corpus_raw/