In [1]:
import pandas as pd
In [2]:
mba1 = pd.read_excel('BW_MBA_data.xlsx')
mba1.head()
Out[2]:
In [3]:
mba=pd.ExcelFile('BW_MBA_data.xlsx')
mba_data=mba.parse('MBA Data')
mba_data.head()
Out[3]:
Fulltime Business Week Ranking School Name State Type Enrollment Avg GMAT Resident Tuition, Fees Pct International Pct Female Pct Asian American Pct Minority Pct with job offers Avg starting base salary
0 1 University of Chicago Illinois Private 1144 713.0 97165.0 35.0 35.0 16.0 7.0 92.0 107091.0
1 2 Harvard University Massachusetts Private 1801 720.0 101660.0 33.0 38.0 NaN NaN 94.0 124378.0
2 3 Northwestern University Illinois Private 1200 711.0 93918.0 34.0 36.0 25.0 13.0 95.0 108064.0
3 4 University of Pennsylvania Pennsylvania Private 1651 714.0 104410.0 44.0 36.0 7.8 9.0 89.0 112186.0
4 5 University of Michigan Michigan Public 898 706.0 80879.0 27.0 34.0 21.0 13.0 89.0 103608.0
In [4]:
ug_data=mba.parse('Undergraduate Data')
ug_data.head()
Out[4]:
2009 Rank 2008 Rank School Name Location Type Program Length Annual Cost Fulltime enrollment Student Rank Recruiter Rank Median Starting Salary MBA Feeder Rank Academic Quality Rank Faculty Student Ratio Average SAT Score Average ACT Score Teaching Quality Grade Facilities & Service Grade Job Placement Grade
0 1 2.0 Virginia (McIntire) Charlottesville Public 2 9490 655 1 52 58000 5 5 10.40 1355 30 A+ A+ A+
1 2 3.0 Notre Dame (Mendoza) South Bend, Ind. Private 3 36847 1669 2 12 55000 11 16 18.57 1405 32 A+ A+ A+
2 3 1.0 Pennsylvania (Wharton) Philadelphia Private 4 37526 2528 13 13 61001 10 1 10.89 1440 32 A+ A A+
3 4 6.0 Michigan (Ross) Ann Arbor Public 3 10848 1050 18 8 60000 7 8 15.22 1346 30 B A A+
4 5 7.0 Brigham Young (Marriott) Provo, Utah Private 2 4110 1783 6 1 50000 17 40 19.00 1231 27 A A+ A+
In [5]:
NZStats = pd.read_csv('annual-enterprise-survey-2019-financial-year-provisional-csv.csv')
NZStats.head()
Out[5]:
Year Industry_aggregation_NZSIOC Industry_code_NZSIOC Industry_name_NZSIOC Units Variable_code Variable_name Variable_category Value Industry_code_ANZSIC06
0 2019 Level 1 99999 All industries Dollars (millions) H01 Total income Financial performance 728,239 ANZSIC06 divisions A-S (excluding classes K633...
1 2019 Level 1 99999 All industries Dollars (millions) H04 Sales, government funding, grants and subsidies Financial performance 643,809 ANZSIC06 divisions A-S (excluding classes K633...
2 2019 Level 1 99999 All industries Dollars (millions) H05 Interest, dividends and donations Financial performance 62,924 ANZSIC06 divisions A-S (excluding classes K633...
3 2019 Level 1 99999 All industries Dollars (millions) H07 Non-operating income Financial performance 21,505 ANZSIC06 divisions A-S (excluding classes K633...
4 2019 Level 1 99999 All industries Dollars (millions) H08 Total expenditure Financial performance 634,710 ANZSIC06 divisions A-S (excluding classes K633...
In [6]:
air_quality=pd.read_csv('air_quality_LA.txt', sep=',') #sep='\t'
air_quality.head()
Out[6]:
Time Max Air Quality Index Los Angeles California Max AQI\n
0 20090507 47
1 20090501 53
2 20090425 36
3 20090419 36
4 20090413 55
In [7]:
employees=pd.read_excel('employee_data.xlsx')
employees.head()
Out[7]:
Employee Gender Age Prior Experience Beta Experience Education Annual Salary
0 1 1 39 5 12 4 57700
1 2 0 44 12 8 6 76400
2 3 0 24 0 2 4 44000
3 4 1 25 2 1 4 41600
4 5 0 56 5 25 8 163900
In [8]:
groups = ['Gender', 'Education']
by_sex_edu = employees.groupby(groups, as_index=False).mean()
round(by_sex_edu[['Gender','Education','Age']],2)
Out[8]:
Gender Education Age
0 0 0 33.50
1 0 2 37.22
2 0 4 37.22
3 0 6 40.75
4 0 8 45.50
5 1 0 30.38
6 1 2 39.70
7 1 4 42.72
8 1 6 38.26
9 1 8 35.40
In [9]:
import seaborn as sns
import matplotlib.pyplot as plt
sns.barplot(x='Gender',y='Age', data=employees)
plt.show()
In [10]:
sns.countplot(x='Gender', data=employees)
plt.show()
In [12]:
sns.set_style('whitegrid')
sns.countplot(x='Gender', data=employees)
plt.title('Count of Gender')
plt.ylabel('Count')
sns.despine()
plt.show()
In [13]:
plt.scatter(employees['Age'],employees['Annual Salary'], color='red')
plt.show()
In [14]:
plt.scatter(employees['Age'],employees['Annual Salary'], c=employees['Gender'])
plt.show()
In [ ]:
plt.xlabel('variable1')
plt.ylabel('variable2')
plt.xlim([1,2])
plt.ylim([0,50])
plt.title('text string')
plt.legend(loc='lower center')

#show
plt.savefig('name.png')
In [15]:
sns.regplot(x='Age',y='Annual Salary',data=employees, scatter=None,color='blue')
plt.show()
In [17]:
sns.lmplot(x='Age',y='Annual Salary',data=employees, hue='Gender',row='Gender')
plt.show()
In [18]:
sns.pairplot(employees)
plt.show()
In [ ]: