SVD Results 1500 - derlin/bda-lsa-project GitHub Wiki

We can see in this lengthy table a comparison of identical query but different model parameters. When looking at the data we can say that having a bigger dictionary have an enormous on the model accuracy.

This is especially true when looking for generic queries (bottom of the table).

Term Parameters (#terms = 250, K 100) Parameters (#terms = 3000, K 200)
ID:5 Social
scala> q.printTopTermsForTerm(5)
(social,0.99),
(link,0.70),
(provide,0.56),
(different,0.55),
(way,0.54),
(important,0.54),
(likely,0.54),
(support,0.53),
(develop,0.53),
(rather,0.52)

scala> q.printTopDocsForTerm(5)
(Social support,206.27),
(Social constructionism,150.05),
(Social network,142.94),
(Rhetoric of social intervention model,113.88),
(Development communication,112.11),
(Documentality,107.92),
(Social undermining,104.31),
(Corporate social responsibility,86.08),
(Social media mining,81.78),
(Language and gender,77.43)
scala> q printTopTermsForTerm 5
(social,1.00), 
(link,0.66), 
(support,0.60), 
(perceive,0.59), 
(explain,0.57), 
(construction,0.57), 
(socially,0.57), 
(different,0.57), 
(examine,0.57), 
(provide,0.55)

scala> q printTopDocsForTerm 5
(Social support,220.58),
(Social network,141.91),
(Rhetoric of social intervention model,119.70),
(Development communication,112.60),
(Social undermining,111.02),
(Social constructionism,90.15),
(Corporate social responsibility,87.43),
(Shyness,76.56),
(Language and gender,72.87),
(Interpersonal communication,71.10)
ID:1 Woman
scala> q.printTopTermsForTerm(1)
(woman,0.99),
(man,0.68),
(female,0.67),
(national,0.56),
(role,0.55),
(male,0.53),
(country,0.51),
(society,0.51),
(century,0.51),
(part,0.47)

scala> q.printTopDocsForTerm(1)
(Women's rights,653.17),
(EGM: prevention of violence against [...],576.35),
(Women in government,463.013),
(Women in the military by country,391.09),
(Matriarchy,370.33),
(Gender equality,323.52),
(Gender role,292.92),
(Women and the environment,288.63),
(Women in philosophy,274.10),
(Women in medicine,255.24)
scala> q printTopTermsForTerm 1
(woman,1.00),
(women,0.84),
(man,0.70),
(status,0.69), 
(female,0.68), 
(advancement,0.62), 
(discrimination,0.62), 
(household,0.61), 
(custom,0.60), 
(reform,0.59)

scala> q printTopDocsForTerm 1
(Women's rights,657.38), 
(EGM: prevention of violence against [...],578.46), 
(Women in government,464.19), 
(Women in the military by country,398.73), 
(Matriarchy,370.06), 
(Gender equality,327.54), 
(Gender role,299.30), 
(Women and the environment,292.24), 
(Women in philosophy,272.12), 
(Women in medicine,256.20)
ID:10 Media
scala> q.printTopTermsForTerm(10)
(media,1.00),
(education,0.49),
(influence,0.40),
(understand,0.36),
(study,0.35),
(information,0.35),
(new,0.35),
(develop,0.34),
(university,0.34),
(focus,0.34)

scala> q.printTopDocsForTerm(10)
(Media literacy,514.98),
(Media ecology,272.30),
(Transparency of media ownership in Europe,271.17),
(Media studies,238.63),
(Uses and gratifications theory,191.25),
(Comparing Media Systems,178.30),
(Transparency of media ownership in Croatia,131.30),
(New media studies,128.86),
(Mediated cross-border communication,125.29),
(Transformation processes (media systems),119.95)
scala> q printTopTermsForTerm 10
(media,1.00), 
(literacy,0.62), 
(mass,0.57), 
(advertising,0.56), 
(education,0.55), 
(curriculum,0.55), 
(ecology,0.51), 
(outlet,0.51), 
(broadcasting,0.50), 
(analyze,0.50)

scala> q printTopDocsForTerm 10
(Media literacy,521.94), 
(Transparency of media ownership in Europe,290.04), 
(Media ecology,258.61), 
(Media studies,213.74), 
(Uses and gratifications theory,212.58), 
(Comparing Media Systems,179.19), 
(Mediated cross-border communication,128.28), 
(Transparency of media ownership in Croatia,120.87), 
(Study of global communication,115.91), 
(Transformation processes (media systems),111.85)
ID:99 War
scala> q.printTopTermsForTerm(99)
(war,1.00),
(force,0.56),
(states,0.44),
(begin,0.41),
(must,0.40),
(united,0.40),
(government,0.37),
(since,0.36),
(state,0.33),
(end,0.33)

scala> q.printTopDocsForTerm(99)
(Just war theory,450.55),
(War on Terror,273.40),
(Criticism of the War on Terror,179.88),
(Women in the military by country,131.15),
(Women in the military,108.41),
(Women in the military in Europe,89.93),
(Critical terrorism studies,89.09),
(The Spitting Image,87.61),
(Insurgency,79.71),
(History of terrorism,71.69)
scala> q printTopTermsForTerm 99
(war,1.0), 
(declare,0.65), 
(military,0.61), 
(enemy,0.61), 
(bush,0.60), 
(iraq,0.60), 
(force,0.59), 
(fight,0.59), 
(legitimate,0.59), 
(terror,0.56)

scala> q printTopDocsForTerm 99
(Just war theory,402.06), 
(War on Terror,291.09), 
(Criticism of the War on Terror,168.38), 
(Women in the military by country,133.55), 
(The Spitting Image,113.55), 
(Women in the military,107.94), 
(Critical terrorism studies,96.50), 
(Insurgency,96.18), 
(Women in the military in Europe,93.87), 
(Women in journalism,85.07)
ID:125 Argue
scala> q.printTopTermsForTerm(125)
(argue,1.00),
(rather,0.79),
(view,0.79),
(thus,0.76),
(idea,0.76),
(even,0.72),
(consider,0.71),
(see,0.70),
(present,0.69),
(believe,0.68)

scala> q.printTopDocsForTerm(125)
(Human rights,40.64),
(Conscience,36.27),
(Women in philosophy,33.14),
(Matriarchy,31.66),
(Free will,30.20),
(Queer theory,27.80),
(Women's rights,27.46),
(Philosophical progress,24.25),
(Rhetoric,21.81),
(Good and evil,20.63)
scala> q printTopTermsForTerm 125
(argue,0.99), 
(view,0.74), 
(idea,0.74), 
(rather,0.73), 
(thus,0.68), 
(consider,0.68), 
(mean,0.67), 
(see,0.66), 
(could,0.65), 
(example,0.65)

scala> q printTopDocsForTerm 125
(Human rights,41.75), 
(Conscience,36.45), 
(Queer theory,33.70), 
(Women in philosophy,33.53), 
(Free will,32.24), 
(Matriarchy,29.57), 
(Philosophical progress,26.92), 
(Women's rights,26.06), 
(Social alienation,25.77), 
(Rhetoric,24.489118110484608)
ID:225 Measure
scala> q.printTopTermsForTerm(225)
(measure,1.00),
(base,0.51),
(general,0.49),
(level,0.49),
(suggest,0.42),
(give,0.42),
(much,0.40),
(include,0.40),
(well,0.39),
(similar,0.38)

scala> q.printTopDocsForTerm(225)
(Special measures for gender equality [...],110.04),
(Happiness economics,100.31),
(Empathy,65.21),
(Cephalopod size,64.02),
(Salsa music,63.33),
(Quality of life,56.82),
(Cluster analysis,52.83),
(EGM: prevention of violence against [...],49.99),
(Happiness,38.35),
(Measures of guilt and shame,36.72)
scala> q printTopTermsForTerm 225
(measure,1.00), 
(measurement,0.64), 
(index,0.62), 
(level,0.58), 
(related,0.53), 
(leisure,0.53), 
(base,0.51), 
(average,0.50), 
(higher,0.50), 
(correlate,0.50)

scala> q printTopDocsForTerm 225
(Happiness economics,71.09),
(Empathy,63.63),
(Salsa music,63.47),
(Cephalopod size,61.03),
(Cluster analysis,53.12),
(EGM: prevention of violence against [...],49.92),
(Happiness,47.55),
(Subjective well-being,47.27),
(Special measures for gender equality [...],45.56),
(Counter-terrorism,38.18)
ID:246 Art
scala> q.printTopTermsForTerm(246)
(art,1.00),
(work,0.58),
(century,0.42),
(create,0.40),
(history,0.37), 
(isbn,0.33),
(often,0.31),
(new,0.29),
(begin,0.29),
(write,0.28)

scala> q.printTopDocsForTerm(246)
(Women artists,227.45),
(Art and emotion,216.01),
(Rhetoric,141.52),
(Minimalism,103.68),
(Women in photography,48.92),
(Art music,41.17),
(Music genre,35.50),
(Women in architecture,33.11),
(Media literacy,31.43),
(Film studies,27.26)
scala> q printTopTermsForTerm 246
(art,1.00),
(painting,0.84),
(museum,0.83),
(artist,0.74),
(maria,0.73),
(workshop,0.70),
(exhibition,0.68),
(academy,0.68),
(judith,0.68),
(renaissance,0.67)

scala> q printTopDocsForTerm 246
(Women artists,266.32),
(Rhetoric,138.84),
(Art and emotion,120.05),
(Minimalism,83.95),
(Women in photography,42.20),
(Suffering,36.70),
(Women in architecture,34.44),
(Media literacy,33.45),
(Visual rhetoric,29.42),
(Women Surrealists,24.99)
docTitle: 1 Sorting network
scala> q.printTopDocsForDoc(1)
(Sorting network,1.00),
(Value (ethics),0.78),
(Value judgment,0.74),
(Instrumental and intrinsic value,0.73),
(Commensurability (ethics),0.69),
(Commensurability (ethics),0.69),
(Übermensch,0.66),
(Taylor scraping flow,0.66),
(Ressentiment (Scheler),0.61),
(Infinity,0.59),
(Expectancy-value theory,0.58)
scala> q printTopDocsForDoc 1
(Sorting network,1),
(Eyeball network,0.94),
(Weighted correlation network analysis,0.9),
(Social network,0.89),
(Processing delay,0.86),
(Queuing delay,0.81),
(Co-occurrence networks,0.77),
(Social objects,0.69),
(Underground tunnel network,0.54),
(Dunst (performance group),0.5)
docTitle: 4 Tyranny of numbers
scala> q.printTopDocsForDoc(4)
(Tyranny of numbers,1.00),
(Computer engineering,0.76),
(J. Halcombe Laning,0.74),
(Processor design,0.71),
(Outline of computer engineering,0.69),
(Armando Stettner,0.68),
(Computer science,0.68),
(Chamber of Computer Engineers of Turkey,0.65),
(Informatics engineering,0.63),
(Reflected-wave switching,0.63)
scala> q printTopDocsForDoc 4
(Tyranny of numbers,1),
(Processor design,0.68),
(George M. Galambos,0.67),
(Computer engineering,0.67),
(J. Halcombe Laning,0.66),
(Computer science,0.64),
(Industrial data processing,0.63),
(Test vector,0.61),
(Outline of computer engineering,0.61),
(Rice University Electrical and Computer Engineering,0.61)
docTitle: 4468 Gender identity
scala> q.printTopDocsForDoc(4471)
(Gender identity,1.00),
(Gender variance,0.91),
(Transgender,0.89),
(Gender policing,0.89),
(Gender binary,0.87),
(Dysphoria,0.86),
(Third gender,0.86),
(Gender studies,0.85),
(Discrimination against non-binary gender persons,0.84),
(Heteronormativity,0.82)
scala> q printTopDocsForDoc 4468
(Gender identity,1),
(Gender variance,0.96),
(Gender binary,0.95),
(Gender systems,0.93),
(Gender polarization,0.92),
(Sex and gender distinction,0.91),
(Third gender,0.91),
(Gender-blind,0.89),
(Gender sensitization,0.87),
(Gender Park,0.87)
3850 Data Mining ...
scala> q.printTopDocsForDoc(3850)
(Data Mining and Knowledge Discovery,1.00),
(Domain driven data mining,0.98),
(Data stream mining,0.89),
(Software mining,0.83),
(Data mining,0.81),
(Alpha strike (engineering),0.77),
(Structure mining,0.75),
(Web mining,0.73),
(Instance selection,0.73),
(Uncertain data,0.72)
scala> q printTopDocsForDoc 3847
(Data Mining and Knowledge Discovery,1),
(Agent mining,0.98),
(Domain driven data mining,0.98),
(Data mining,0.97),
(SIGKDD,0.97),
(Structure mining,0.95),
(Web mining,0.94),
(Data stream mining,0.94),
(Bibliomining,0.94),
(K-optimal pattern discovery,0.91)
docTitle: 3166 Shooting ranges in Switzerland
scala> q.printTopDocsForDoc(3166)
(Shooting ranges in Switzerland,1.00),
(Supererogation,0.57),
(Obligation,0.57),
(Miller's law,0.50),
(Adoration,0.45),
(Gun laws in Switzerland,0.43),
(Internet Privacy Act,0.43),
(Righteousness,0.42),
(Principled Distance,0.41),
(Principled Distance,0.41),
(Headlight flashing,0.41)
scala> q printTopDocsForDoc 3166
(Shooting ranges in Switzerland,1),
(Gun laws in Switzerland,0.92),
(Schweizerischer Schützenverein,0.88),
(Röstigraben,0.78),
(Homelessness in Switzerland,0.77),
(Swiss people,0.77),
(Eidgenossenschaft,0.76),
(Betty Bossi,0.76),
(Religion in Switzerland,0.76),
(Demographics of Switzerland,0.75)
query: war woman communication
scala> q.printTopDocsForTermQuery(
List("war", "woman", "communication"))
(Development communication,1482.82),
(Women's rights,1197.99),
(Just war theory,1049.92),
(EGM: prevention ...,997.93),
(Women in the ...,985.56),
(Women in government,786.34),
(Interpersonal communication,731.81),
(Women in the ...,682.36),
(Matriarchy,669.44),
(War on Terror,641.82)
scala> q printTopDocsForTermQuery(
List("war","woman","communication"))
(Development communication,1490.43),
(Women's rights,1216.45),
(Women in the military by country,1004.25),
(EGM: prevention of violence against women and girls,999.62),
(Just war theory,934.74),
(Women in government,787.77),
(Interpersonal communication,758.96),
(War on Terror,686.54),
(Women in the military,674.3),
(Matriarchy,667.65)
query: war men communication
scala> q.printTopDocsForTermQuery(
List("war","man","communication"))
(Development communication,1386.19),
(Just war theory,1058.64),
(Interpersonal communication,727.13),
(War on Terror,645.88),
(Health communication,439.67),
(Criticism of the War on Terror,428.39),
(Organizational communication,380.08),
(History of communication studies,377.18),
(Intercultural communication,373.59),
(Women in the military by country,372.27)
scala> q printTopDocsForTermQuery(
List("war", "man", "communication"))
(Development communication,1396.56),
(Just war theory,950.63),
(Interpersonal communication,750.01),
(War on Terror,688.02),
(Health communication,452.31),
(Criticism of the War on Terror,395.14),
(Women in the military by country,374.97),
(Organizational communication,364.83),
(Models of communication,363.12),
(Intercultural communication,360.21)
query: feel subject reasearch moral
scala> q.printTopDocsForTermQuery(
List("feel", "subject", "research", "moral"));
(Conscience,543),
(Disgust,298.83),
(Empathy,278.58),
(Moral responsibility,259.4),
(Moral universe,251.51),
(Free
will,236.37),
(Good and evil,218.1),
(Eudaimonia,165.44),
(Satanic ritual abuse,144.73),
(Critical terrorism studies,134.51)
scala> q printTopDocsForTermQuery(
List("feel", "subject", "research", "moral"));
(Conscience,529.14),
(Disgust,297.61),
(Empathy,295.73),
(Free will,255.95),
(Good and evil,236.42),
(Eudaimonia,181.5),
(Informed consent,148.49),
(Cognitive dissonance,141.24),
(Anger,136.38),
(Development communication,131.35)
⚠️ **GitHub.com Fallback** ⚠️