years - Daria-Maltseva/mixedmethods GitHub Wiki
Final Dataset
December 2019
Hits
setwd("C:/Mail.Ru Cloud/ANR HSE/ANR Projects/Mixed methods/Data/2019_FINAL DATASET_copy")
# hits
number<-read.table(file="DC1.clu", sep=",", header=FALSE, skip=2)$V1
t<-table(number)
t
years <- as.integer(names(t))
length(years)
years
year <- years[11:34]
year
length(year)
freq <- t[11:34]
length(freq)
freq
yt <- c(0,100,500,1000,2000,3000, 5000)
plot(year,freq,cex=1,main="Hits per year", yaxt="n", xlab = "Years", ylab = "Freq", pch=16)
axis(side=2,at=yt, labels=yt)
model <- nls(freq~c*a^(year-1992),start=list(c=10000,a=2)) #Vlado proposed these values for c and a
model
Nonlinear regression model
model: freq ~ c * a^(year - 1992)
data: parent.frame()
c a
3.762 1.326
residual sum-of-squares: 51275
Number of iterations to convergence: 34
Achieved convergence tolerance: 1.057e-06
plot(year,freq,cex=1,main="Hits per year", yaxt="n", xlab = "Years", ylab = "Freq", pch=16)
lines(year,predict(model,list(x=year)),col="red",lw=2)
Then count the N of years when the field doubles:
a^x = 2;
xlog a = log 2;
x = log 2 / log 2
log(2)/log(1.326)
OR:
summary(model)
coef(model)
a <- coef(model)
a[1]
a[2] - we need this
log(2)/log(a[2])
Cited only
# cited only
Y<-read.table(file="DC0.clu", sep=",", header=FALSE, skip=2)$V1
t <- table(Y)
years <- as.integer(names(t))
length(years)
years
year0 <- years[132:252]
year0
freq0 <- t[132:252]
#log = "y",
length(freq0)
yt <- c(0,1000,5000,10000,15000, 20000, 30000);
plot(year0,freq0,cex=1,main="Cited only works per year", yaxt="n", xlab = "Years", ylab = "Freq", pch=16)
axis(side=2,at=yt, labels=yt)
model <- nls(freq0~c*dlnorm(2019-year0,a,b),start=list(c=1e6,a=2,b=0.7))
model
max(freq)
plot(year0,freq0,cex=0.75,main="Cited only works per year", yaxt="n", xlab = "Years", ylab = "Freq")
axis(side=2,at=yt, labels=yt)
lines(year0,predict(model,list(x=2019-year0)),col="red",lw=2)
model
Nonlinear regression model
model: freq0 ~ c * dlnorm(2019 - year0, a, b)
data: parent.frame()
c a b
5.311e+05 2.480e+00 6.351e-01
residual sum-of-squares: 36815713
Number of iterations to convergence: 8
Achieved convergence tolerance: 1.59e-06
> coef(model)
c a b
5.311115e+05 2.479916e+00 6.350575e-01
>
Preparation
DC.clu
==============================================================================
3. C:\Mail.Ru Cloud\ANR HSE\ANR Projects\Mixed methods\Data\2019_FINAL DATASET_copy\DC.clu (547407)
==============================================================================
Dimension: 547407
The lowest value: 0
The highest value: 5
Frequency distribution of cluster values:
Cluster Freq Freq% CumFreq CumFreq% Representative
----------------------------------------------------------------
0 529238 96.6809 529238 96.6809 ABBOTT_L(2015)26:340
1 14636 2.6737 543874 99.3546 HILL_M(2018)34:71
2 11 0.0020 543885 99.3566 WHITTEMO_R(2005)52:546
4 1873 0.3422 545758 99.6988 COLLINS_K(2006)4:67
5 1649 0.3012 547407 100.0000 ANDERSON_V(2017)28:125
----------------------------------------------------------------
Sum 547407 100.0000
Transformed: binarized
==============================================================================
6. Binarized C3 [1-*] (547407)
==============================================================================
Dimension: 547407
The lowest value: 0
The highest value: 1
Frequency distribution of cluster values:
Cluster Freq Freq% CumFreq CumFreq% Representative
----------------------------------------------------------------
0 529238 96.6809 529238 96.6809 ABBOTT_L(2015)26:340
1 18169 3.3191 547407 100.0000 HILL_M(2018)34:71
----------------------------------------------------------------
Sum 547407 100.0000
Saved as DCbin.clu
According to this, constructed subpartitions for years: DC1.clu
and DC0.clu
.
Distributions in R
Hits
> setwd("C:/Mail.Ru Cloud/ANR HSE/ANR Projects/Mixed methods/Data/2019_FINAL DATASET_copy")
> number<-read.table(file="DC1.clu", sep=",", header=FALSE, skip=2)$V1
> t<-table(number)
> head(t)
number
0 1959 1967 1978 1979 1983
15 1 1 1 1 1
> years <- as.integer(names(t))
> length(years)
[1] 37
> year <- years[2:37]
> year
[1] 1959 1967 1978 1979 1983 1985 1988 1989 1991 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
[21] 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
> length(year)
[1] 36
> freq <- t[2:37]
> length(freq)
[1] 36
> freq
number
1959 1967 1978 1979 1983 1985 1988 1989 1991 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003
1 1 1 1 1 1 2 4 1 5 4 4 6 13 8 12 15 13 27 41
2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019
54 79 133 209 354 465 684 822 1045 1429 1827 2609 3214 3600 1142 326
> yt <- c(0,100,500,1000,2000,3000)
> plot(year,freq,cex=1,main="Hits per year", yaxt="n", xlab = "Years", ylab = "Freq", pch=16)
> axis(side=2,at=yt, labels=yt)
Cited only
> Y<-read.table(file="DC0.clu", sep=",", header=FALSE, skip=2)$V1
> t <- table(Y)
> years <- as.integer(names(t))
> length(years)
[1] 252
> years
[1] 0 1 2 1003 1093 1142 1513 1575 1595 1603 1610 1651 1669 1676 1687 1707 1710 1724 1736
[20] 1739 1748 1750 1751 1756 1757 1758 1762 1775 1776 1781 1785 1786 1787 1788 1789 1790 1791 1794
[39] 1795 1796 1797 1802 1803 1805 1807 1808 1809 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820
[58] 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1833 1834 1835 1836 1837 1838 1839 1840
[77] 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1857 1858 1859 1860
[96] 1861 1862 1863 1864 1865 1867 1868 1869 1870 1871 1872 1875 1876 1877 1878 1879 1880 1881 1882
[115] 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901
[134] 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920
[153] 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939
[172] 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958
[191] 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977
[210] 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996
[229] 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
[248] 2016 2017 2018 2019 2020
> year0 <- years[7:243]
> year0
[1] 1513 1575 1595 1603 1610 1651 1669 1676 1687 1707 1710 1724 1736 1739 1748 1750 1751 1756 1757
[20] 1758 1762 1775 1776 1781 1785 1786 1787 1788 1789 1790 1791 1794 1795 1796 1797 1802 1803 1805
[39] 1807 1808 1809 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826
[58] 1827 1828 1829 1830 1831 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846
[77] 1847 1848 1849 1850 1851 1852 1853 1854 1855 1857 1858 1859 1860 1861 1862 1863 1864 1865 1867
[96] 1868 1869 1870 1871 1872 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888
[115] 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907
[134] 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926
[153] 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945
[172] 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964
[191] 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983
[210] 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002
[229] 2003 2004 2005 2006 2007 2008 2009 2010 2011
> freq0 <- t[7:243]
> length(freq0)
[1] 237
> yt <- c(0,1000,5000,10000,15000, 20000, 30000);
> plot(year0,freq0,cex=1,main="Cited only works per year", yaxt="n", xlab = "Years", ylab = "Freq", pch=16)
> axis(side=2,at=yt, labels=yt)
> length(freq0)
[1] 237
> yt <- c(0,1000,5000,10000,15000, 20000, 30000);
> plot(year0,freq0,cex=1,main="Cited only works per year", yaxt="n", xlab = "Years", ylab = "Freq", pch=16)
> axis(side=1,at=yt, labels=yt)
> length(freq0)
[1] 237
> yt <- c(0,1000,5000,10000,15000, 20000, 30000);
> plot(year0,freq0,cex=1,main="Cited only works per year", yaxt="n", xlab = "Years", ylab = "Freq", pch=16)
> axis(side=2,at=yt, labels=yt)
> model <- nls(freq0~c*dlnorm(2019-year0,a,b),start=list(c=1e6,a=2,b=0.7))
> model
Nonlinear regression model
model: freq0 ~ c * dlnorm(2019 - year0, a, b)
data: parent.frame()
c a b
4.830e+05 2.517e+00 5.533e-01
residual sum-of-squares: 7720160
Number of iterations to convergence: 7
Achieved convergence tolerance: 1.711e-06
> freq0
Y
1513 1575 1595 1603 1610 1651 1669 1676 1687 1707 1710 1724 1736 1739 1748 1750 1751
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1756 1757 1758 1762 1775 1776 1781 1785 1786 1787 1788 1789 1790 1791 1794 1795 1796
1 1 1 1 1 3 1 1 1 1 1 1 2 1 3 2 1
1797 1802 1803 1805 1807 1808 1809 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820
1 1 1 1 2 1 2 3 1 2 1 3 1 2 4 1 1
1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1833 1834 1835 1836 1837 1838
1 2 1 4 2 2 5 2 6 4 4 7 4 5 3 4 5
1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855
4 3 1 2 4 4 2 1 1 2 2 2 1 1 1 1 2
1857 1858 1859 1860 1861 1862 1863 1864 1865 1867 1868 1869 1870 1871 1872 1875 1876
4 6 3 3 2 1 2 2 2 2 2 2 3 5 3 1 4
1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893
5 4 2 2 1 3 3 2 6 3 2 2 8 9 3 11 6
1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910
3 5 8 8 9 9 7 10 13 11 17 14 10 9 18 10 23
1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927
11 13 18 16 16 18 9 5 11 18 16 19 8 16 25 21 25
1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944
24 25 35 26 37 31 32 38 33 40 37 37 18 29 27 29 22
1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961
32 51 60 75 84 77 67 66 89 108 96 115 135 144 145 166 166
1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978
189 198 258 239 278 332 351 388 442 440 496 563 575 666 710 848 959
1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995
1019 1124 1196 1374 1575 1664 1934 2119 2421 2585 2958 3757 4168 4552 5035 5932 6857
1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
7747 8423 10106 11349 14837 15865 18059 20120 22195 24837 26844 28466 30177 31272 32849 32714
Removing years before 1900 and adding after 2011:
> t <- table(Y)
> years <- as.integer(names(t))
> length(years)
[1] 252
> years
[1] 0 1 2 1003 1093 1142 1513 1575 1595 1603 1610 1651 1669 1676 1687 1707 1710 1724 1736
[20] 1739 1748 1750 1751 1756 1757 1758 1762 1775 1776 1781 1785 1786 1787 1788 1789 1790 1791 1794
[39] 1795 1796 1797 1802 1803 1805 1807 1808 1809 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820
[58] 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1833 1834 1835 1836 1837 1838 1839 1840
[77] 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1857 1858 1859 1860
[96] 1861 1862 1863 1864 1865 1867 1868 1869 1870 1871 1872 1875 1876 1877 1878 1879 1880 1881 1882
[115] 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901
[134] 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920
[153] 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939
[172] 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958
[191] 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977
[210] 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996
[229] 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015
[248] 2016 2017 2018 2019 2020
> year0 <- years[126:252]
> year0
[1] 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912
[20] 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931
[39] 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950
[58] 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969
[77] 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988
[96] 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007
[115] 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
> freq0 <- t[126:252]
>
> length(freq0)
[1] 127
> yt <- c(0,1000,5000,10000,15000, 20000, 30000);
> plot(year0,freq0,cex=1,main="Cited only works per year", yaxt="n", xlab = "Years", ylab = "Freq", pch=16)
> axis(side=2,at=yt, labels=yt)
>
> model <- nls(freq0~c*dlnorm(2019-year0,a,b),start=list(c=1e6,a=2,b=0.7))
> model
Nonlinear regression model
model: freq0 ~ c * dlnorm(2019 - year0, a, b)
data: parent.frame()
c a b
5.311e+05 2.480e+00 6.351e-01
residual sum-of-squares: 36815818
Number of iterations to convergence: 8
Achieved convergence tolerance: 1.591e-06
> max(freq)
[1] 3600
> plot(year0,freq0,cex=0.75,main="Cited only works per year", yaxt="n", xlab = "Years", ylab = "Freq")
> axis(side=2,at=yt, labels=yt)
> lines(year0,predict(model,list(x=2019-year0)),col="red",lw=2)
>
freq
1513 1575 1595 1603 1610 1651 1669 1676 1687 1707 1710 1724 1736 1739 1748 1750 1751
1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
1756 1757 1758 1762 1775 1776 1781 1785 1786 1787 1788 1789 1790 1791 1794 1795 1796
1 1 1 1 1 3 1 1 1 1 1 1 2 1 3 2 1
1797 1802 1803 1805 1807 1808 1809 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820
1 1 1 1 2 1 2 3 1 2 1 3 1 2 4 1 1
1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1833 1834 1835 1836 1837 1838
1 2 1 4 2 2 5 2 6 4 4 7 4 5 3 4 5
1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855
4 3 1 2 4 4 2 1 1 2 2 2 1 1 1 1 2
1857 1858 1859 1860 1861 1862 1863 1864 1865 1867 1868 1869 1870 1871 1872 1875 1876
4 6 3 3 2 1 2 2 2 2 2 2 3 5 3 1 4
1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893
5 4 2 2 1 3 3 2 6 3 2 2 8 9 3 11 6
1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910
3 5 8 8 9 9 7 10 13 11 17 14 10 9 18 10 23
1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927
11 13 18 16 16 18 9 5 11 18 16 19 8 16 25 21 25
1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944
24 25 35 26 37 31 32 38 33 40 37 37 18 29 27 29 22
1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961
32 51 60 75 84 77 67 66 89 108 96 115 135 144 145 166 166
1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978
189 198 258 239 278 332 351 388 442 440 496 563 575 666 710 848 959
1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995
1019 1124 1196 1374 1575 1664 1934 2119 2421 2585 2958 3757 4168 4552 5035 5932 6857
1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012
7747 8423 10106 11349 14837 15865 18059 20120 22195 24837 26844 28466 30177 31272 32849 32714 32126
2013 2014 2015 2016 2017 2018 2019 2020
30451 25867 20012 12030 4606 1137 130 1
Old dataset
Hits
setwd("C:/Mail.Ru Cloud/ANR HSE/ANR Projects/Mixed methods/WoS2Pajek Data with EXTRA 3rd/Additional search from July 2019")
number<-read.table(file="DC1.clu", sep=",", header=FALSE, skip=2)$V1
t<-table(number)
head(t)
t
years <- as.integer(names(t))
length(years)
years
year <- years[2:37]
year
length(year)
freq <- t[2:37]
length(freq)
freq
yt <- c(0,100,500,1000,2000,3000)
plot(year,freq,cex=1,main="Hits per year", yaxt="n", xlab = "Years", ylab = "Freq", pch=16)
axis(side=2,at=yt, labels=yt)
Cited only works
Y<-read.table(file="DC0.clu", sep=",", header=FALSE, skip=2)$V1
t <- table(Y)
years <- as.integer(names(t))
length(years)
years
year0 <- years[7:243]
freq0 <- t[7:243]
year0
length(freq0)
yt <- c(0,1000,5000,10000,15000, 20000, 30000);
plot(year0,freq0,cex=1,main="Cited only works per year", yaxt="n", xlab = "Years", ylab = "Freq", pch=16)
axis(side=2,at=yt, labels=yt)
model <- nls(freq0~c*dlnorm(2019-year0,a,b),start=list(c=1e6,a=2,b=0.7))
model
max(freq)
plot(year0,freq0,cex=0.75,main="Cited only works per year", yaxt="n", xlab = "Years", ylab = "Freq")
axis(side=2,at=yt, labels=yt)
lines(year0,predict(model,list(x=2019-year0)),col="red",lw=2)
Fitting result:
> model <- nls(freq0~c*dlnorm(2019-year0,a,b),start=list(c=1e6,a=2,b=0.7))
> model
Nonlinear regression model
model: freq0 ~ c * dlnorm(2019 - year0, a, b)
data: parent.frame()
c a b
4.840e+05 2.482e+00 6.227e-01
residual sum-of-squares: 24739746
Number of iterations to convergence: 8
Achieved convergence tolerance: 1.261e-06