Experimentación - XChanitoX/Proyecto1_BD2 GitHub Wiki
Para la experimentación y prueba de nuestros métodos de organización utilizamos la tabla EduData.csv
que tiene la siguiente estructura:
class RecordEdu
{
private:
char ID [10]{' '};
char Gender [10]{' '};
char Nacionalidad [25]{' '};
char PlaceofBirth [25]{' '};
char StageID [15]{' '};
char GradeID [10]{' '};
char SectionID [10]{' '};
char Topic [15]{' '};
char Semester [10]{' '};
char Relation [10]{' '};
char raisedhands [10]{' '};
char visitedResources [10]{' '};
char announcements [10]{' '};
char discussion [10]{' '};
char parentAnswer [10]{' '};
char parentSchool [10]{' '};
char studentAbsent [10]{' '};
char clase [10]{' '};
public:
int nextDel;
char ref;
}
En este caso tenemos la tabla original que cuenta con un total de 400 registros, con el fin de probar los tiempos de ejecución y espacio en memoria creamos el siguiente script en Python
que genera registros de las siguientes longitudes 100, 500, 1000, 5000, 10000
, basado en la información obtenida de la tabla.
size = [100,500,1000,5000,10000]
header = ['ID', 'gender', 'NationalITy', 'PlaceOfBirth', 'StageID', 'SectionID', 'Topic', 'Semester', 'Relation', 'raisedhands',
'VisITedResources', 'AnnouncementsView', 'Discussion', 'ParentAnsweringSurvey', 'ParentschoolSatisfaction',
'StudentAbsenceDays', 'Class']
gender = {0: 'M', 1: 'F'}
NationalITy = {0: 'KW', 1: 'lebanon', 2: 'Egypt', 3: 'SaudiArabia', 4: 'USA',
5: 'Jordan', 6: 'Iran', 7: 'Canada', 8: 'South Africa', 9: 'Turkey'}
PlaceOfBirth = {0: 'KuwaIT', 1: 'lebanon', 2: 'Egypt', 3: 'SaudiArabia',
4: 'USA', 5: 'Jordan', 6: 'Iran', 7: 'Canada', 8: 'South Africa', 9: 'Turkey'}
StageID = {0: 'lowerlevel', 1: 'MiddleSchool', 2: 'HighSchool'}
GradeID = {0: 'G-01', 1: 'G-02', 2: 'G-03', 3: 'G-04', 4: 'G-05', 5: 'G-06', 6: 'G-07',
7: 'G-08', 8: 'G-09', 9: 'G-10', 10: 'G-11', 11: 'G-12', 12: 'G-13', 13: 'G-14', 14: 'G-15'}
SectionID = {0: 'A', 1: 'B', 2: 'C', 3: 'D', 4: 'E'}
Topic = {0: 'IT', 1: 'Math', 2: 'Arabic',
3: 'Science', 4: 'English', 5: 'Quran'}
Semester = {0: 'F', 1: 'S'}
Relation = {0: 'Father', 1: 'Mum', 2: 'Relative'}
parentAnsweringSurvey = {0: 'Yes', 1: 'No'}
parentschoolSatisfaction = {0: 'Good', 1: 'Bad'}
StudentAbsenceDays = {0: 'Under-7', 1: 'Above-7'}
clase = {0: 'M', 1: 'H', 2: 'L'}
def random_input(n):
rows = []
for i in range(n):
v = randint(0, 10)
row = [str(i+1),gender[v % 2], NationalITy[v % 10], PlaceOfBirth[v % 10], StageID[v % 3], GradeID[v % 15], SectionID[v % 5], Topic[v % 6], Semester[v % 2], Relation[v % 3], str(randint(
0, 99)), str(randint(0, 99)), str(randint(0, 99)), str(randint(0, 99)), parentAnsweringSurvey[v % 2], parentschoolSatisfaction[v % 2], StudentAbsenceDays[v % 2], clase[v % 3]]
rows.append(row)
return rows
def w():
for i in range(len(size)):
filename = str(size[i]) + '.csv'
with open(filename, 'w') as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(header)
data = random_input(size[i])
csvwriter.writerows(data)