Programming Language - socrateslab/zh GitHub Wiki
http://svalver.github.io/Proglang/
http://svalver.github.io/Proglang/paradigms.html
https://en.wikipedia.org/wiki/History_of_programming_languages
it is shown that programming language evolution is highly uneven, marked by innovation events where new languages are created out of improved combinations of different structural components belonging to previous languages. These radiation events occur in a bursty pattern and are tied to novel technological and social niches.
Sergi Valverde; Ricard Sole. 2015 Punctuated Equilibrium in the Large-Scale Evolution of Programming Languages. Journal of Royal Society Interface. 12(107) [[https://www.researchgate.net/publication/277078861_Punctuated_equilibrium_in_the_large-scale_evolution_of_programming_languages pdf]]
Infobox programming language
import urllib2 from bs4 import BeautifulSoup url = 'https://en.wikipedia.org/wiki/List_of_programming_languages' content = urllib2.urlopen(url).read() soup = BeautifulSoup(content, 'html.parser') s = soup.find_all('div', {'class':'div-col columns column-count column-count-3'}) len(s) links = {} for ss in s: for i in ss('li'): link = i.a['href'] lang = i.a.text links[lang] = link len(links) def crawler(url): # crawl the package page url = 'https://en.wikipedia.org'+url package_page = urllib2.urlopen(url).read() soup = BeautifulSoup(package_page, 'html.parser') # extract data from tables box = soup.find('table', {'class':'infobox vevent'}) data = {} ths = box('th') tds = box('td') if len(ths) != len(tds): tds = tds[1:] for k, t in enumerate(ths): tc = t.text.encode('utf8').strip() vc = tds[k].text.encode('utf8').strip() data[tc] = vc lang = box.find('caption').text.encode('utf8').strip() data[u'lang']=lang return data import sys def flushPrint(s): sys.stdout.write('\r') sys.stdout.write('%s' % s) sys.stdout.flush() import time, random import json with open('/Users/chengjun/bigdata/prolang_data1.json', 'a') as f: for k, package_url in enumerate(links.values()[6:8]): flushPrint(str(k)+'==>'+ package_url) time.sleep(random.random()) try: data = crawler(package_url) data_str = dict([(k, str(v)) for k, v in data.items()]) # json needs k and v to be str json_str = json.dumps(data_str) # json needs double quotes f.write(package_url + '\t' +str(json_str) + '\n' ) except Exception, e: print e pass