import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings("ignore")


              
                df = pd.read_csv('students_performance.csv')
df.head()


              
                test = df[['gender', 'reading score']]
test.head()


              
                female = test['gender'] == 'female'
test[female].head()


              
                male = test['gender'] == 'male'
test[male].head()


              
                fe_avg = test[female]['reading score'].mean()
male_avg = test[male]['reading score'].mean()
print("Femenino: ", fe_avg, "Masculino: ", male_avg)

Femenino:  72.60810810810811 Masculino:  65.47302904564316


              
                df_reading = pd.DataFrame({'Gender': ['female', 'male'], 'reading score': [fe_avg, male_avg]})
df_reading.head()


              
                test.groupby('gender').mean()


              
                race = df.groupby('race/ethnicity')
print(race)

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x00000201AA804C88>


              
                type(race)

pandas.core.groupby.generic.DataFrameGroupBy


              
                race.get_group('group B')


              
                race.size()

race/ethnicity
group A     89
group B    190
group C    319
group D    262
group E    140
dtype: int64


              
                for name, group in race:
    print(name, 'has', group.shape[0], 'data')

group A has 89 data
group B has 190 data
group C has 319 data
group D has 262 data
group E has 140 data


              
                df.groupby(['gender', 'race/ethnicity']).size()

gender  race/ethnicity
female  group A            36
        group B           104
        group C           180
        group D           129
        group E            69
male    group A            53
        group B            86
        group C           139
        group D           133
        group E            71
dtype: int64


              
                df.groupby('race/ethnicity').agg([np.max, np.min])


              
                df.groupby('race/ethnicity')['math score', 'reading score', 'writing score'].agg([np.max, np.min])


              
                df.groupby(['race/ethnicity', 'gender'])['math score', 'reading score', 'writing score'].agg([np.max, np.min])


              
                df.groupby('race/ethnicity').agg({'math score': ['max', 'mean'], 
                                 'reading score': ['median','min']})


              
                math_read = df.groupby('race/ethnicity').agg({'math score': ['max', 'mean'], 'reading score': ['max', 'mean']})
math_read.columns = ['Max Math Score', 'Average Math Score', 'Max Reading Score', 'Average Reading Score' ]


              
                math_read.head()


              
                df.groupby(['race/ethnicity', 'gender']).agg({'lunch': pd.Series.mode, 'parental level of education': pd.Series.mode, 'math score':np.mean})


              
                df.groupby(df['parental level of education'].apply(lambda x: 'high' in x)).size()

parental level of education
False    625
True     375
dtype: int64


              
                df.groupby(pd.qcut(x=df['math score'], q=3, labels=['low', 'average', 'high'])).size()

math score
low        339
average    341
high       320
dtype: int64


              
                df.groupby(pd.cut(df['math score'], [0, 40, 70, 100])).size()

math score
(0, 40]       49
(40, 70]     559
(70, 100]    391
dtype: int64


              
                df['Distance From the Mean'] = df.groupby(['race/ethnicity', 'gender'])['math score'].transform(lambda x: x - x.mean())
df.head()


              
                df_n = df.groupby('race/ethnicity').filter(lambda x: len(x) > 100)


              
                print(len(df))
print(len(df_n))

1000
911


              
                df['New'] = df['race/ethnicity'].map(df.groupby(['race/ethnicity'])['reading score'].mean())
df.head()


              
                plt.clf()
df.groupby('parental level of education').size().plot(kind='bar');


              
                plt.clf()
df.groupby('parental level of education').size().plot(kind='pie');


              
                df.groupby('race/ethnicity')['math score'].mean().plot();

	gender	race/ethnicity	parental level of education	lunch	test preparation course	math score	reading score	writing score
0	female	group B	bachelor's degree	standard	none	72	72	74
2	female	group B	master's degree	standard	none	90	95	93
5	female	group B	associate's degree	standard	none	71	83	78
6	female	group B	some college	standard	completed	88	95	92
7	male	group B	some college	free/reduced	none	40	43	39
...	...	...	...	...	...	...	...	...
969	female	group B	bachelor's degree	standard	none	75	84	80
976	male	group B	some college	free/reduced	completed	60	62	60
980	female	group B	high school	free/reduced	none	8	24	23
982	male	group B	some high school	standard	completed	79	85	86
991	female	group B	some high school	standard	completed	65	82	78

	gender		parental level of education		lunch		test preparation course		math score		reading score		writing score
	amax	amin	amax	amin	amax	amin	amax	amin	amax	amin	amax	amin	amax	amin
race/ethnicity
group A	male	female	some high school	associate's degree	standard	free/reduced	none	completed	100	28	100	23	97	19
group B	male	female	some high school	associate's degree	standard	free/reduced	none	completed	97	8	97	24	96	15
group C	male	female	some high school	associate's degree	standard	free/reduced	none	completed	98	0	100	17	100	10
group D	male	female	some high school	associate's degree	standard	free/reduced	none	completed	100	26	100	31	100	32
group E	male	female	some high school	associate's degree	standard	free/reduced	none	completed	100	30	100	26	100	22

		lunch	parental level of education	math score
race/ethnicity	gender
group A	female	standard	some high school	58.527778
group A	male	standard	some high school	63.735849
group B	female	standard	high school	61.403846
group B	male	standard	some college	65.930233
group C	female	standard	associate's degree	62.033333
group C	male	standard	high school	67.611511
group D	female	standard	some college	65.248062
group D	male	standard	some college	69.413534
group E	female	standard	associate's degree	70.811594
group E	male	standard	associate's degree	76.746479

	gender	race/ethnicity	parental level of education	lunch	test preparation course	math score	reading score	writing score	Distance From the Mean
0	female	group B	bachelor's degree	standard	none	72	72	74	10.596154
1	female	group C	some college	standard	completed	69	90	88	6.966667
2	female	group B	master's degree	standard	none	90	95	93	28.596154
3	male	group A	associate's degree	free/reduced	none	47	57	44	-16.735849
4	male	group C	some college	standard	none	76	78	75	8.388489

	gender	race/ethnicity	parental level of education	lunch	test preparation course	math score	reading score	writing score	Distance From the Mean	New
0	female	group B	bachelor's degree	standard	none	72	72	74	10.596154	67.352632
1	female	group C	some college	standard	completed	69	90	88	6.966667	69.103448
2	female	group B	master's degree	standard	none	90	95	93	28.596154	67.352632
3	male	group A	associate's degree	free/reduced	none	47	57	44	-16.735849	64.674157
4	male	group C	some college	standard	none	76	78	75	8.388489	69.103448

Función groupby() de Pandas.

Una función para resumen y análisis de datos.

Carga de Librerías

Carga del conjunto de datos.

¿Cómo funciona groupby()?

Grupos con groupby()

Tamaño de cada grupo

Bucles sobre cada grupo¶

Agrupación por múltiples variables

Agrupando por múltiples variables y utilizando múltiples funciones agregadas.¶

Diferentes funciones agregadas en diferentes columnas.

Uso de funciones agregadas en columnas nominales.

Aplicar una función en groupby().

Transformación con groupby().

Filtros con groupby()

Mapeo de datos

Visualización usando groupby().

Conclusiones

	math score		reading score
	max	mean	median	min
race/ethnicity
group A	100	61.629213	64	23
group B	97	63.452632	67	24
group C	98	64.463950	71	17
group D	100	67.362595	71	31
group E	100	73.821429	74	26