中国家庭追踪调查2018#

import pandas as pd
import numpy as np
import seaborn as sns
import pylab as plt

plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']  # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号, 注意['SimHei']对应这句不行.
df = pd.read_stata('/Users/datalab/bigdata/中国家庭追踪调查2018/cfps2018person_202012.dta', 
                    convert_categoricals=False, convert_missing=False)
df.head()
pid code fid18 fid16 fid14 fid12 fid10 pid_a_f pid_a_m selfrpt ... cfps2018eduy_im gdge gdgeyear gdgemonth catipilot pg02 pg1201_min pg1201_max interviewerid18 releaseversion
0 100051501.0 501.0 100051.0 100051.0 100051.0 -8.0 -8.0 110043201.0 110043105.0 1.0 ... 12.0 NaN NaN NaN 0.0 NaN NaN NaN 761040.0 2.1
1 100051502.0 502.0 100051.0 100051.0 100051.0 -8.0 -8.0 -8.0 -8.0 1.0 ... 12.0 NaN NaN NaN 0.0 NaN NaN NaN 761040.0 2.1
2 100160601.0 601.0 100160.0 -8.0 -8.0 -8.0 -8.0 -8.0 -8.0 1.0 ... NaN NaN NaN NaN 0.0 NaN NaN NaN 459505.0 2.1
3 100376551.0 551.0 100376.0 100376.0 -8.0 -8.0 -8.0 -8.0 -8.0 1.0 ... NaN NaN NaN NaN 0.0 NaN NaN NaN 526621.0 2.1
4 100551551.0 551.0 100551.0 100551.0 -8.0 -8.0 -8.0 -8.0 -8.0 1.0 ... NaN NaN NaN NaN 0.0 NaN NaN NaN 696822.0 2.1

5 rows × 1371 columns

len(df)
37354
print(sorted(df.columns.tolist()))
['a12hk', 'a12p', 'age', 'birthp', 'birthw', 'catipilot', 'cesd20sc', 'cesd8', 'cfps2018edu', 'cfps2018eduy', 'cfps2018eduy_im', 'cfps2018sch', 'cid18', 'cmonth', 'cmstart', 'code', 'cohabitn', 'countyid18', 'cyear', 'ear1', 'ear101', 'ear101a', 'ear101c_code', 'ear101e_code', 'ear102', 'ear103', 'ear104c', 'ear104m', 'ear104y', 'ear105m', 'ear105y', 'ear106_1fc_a_1', 'ear106_1fc_a_2', 'ear106_1fc_a_3', 'ear106_1fc_a_4', 'ear106_1fc_a_5', 'ear106_1fc_a_6', 'ear106_1fc_a_7', 'ear106_1fc_a_8', 'ear106_1fm_a_1', 'ear106_1fm_a_2', 'ear106_1fm_a_3', 'ear106_1fm_a_4', 'ear106_1fm_a_5', 'ear106_1fm_a_6', 'ear106_1fm_a_7', 'ear106_1fm_a_8', 'ear106_1fy_a_1', 'ear106_1fy_a_2', 'ear106_1fy_a_3', 'ear106_1fy_a_4', 'ear106_1fy_a_5', 'ear106_1fy_a_6', 'ear106_1fy_a_7', 'ear106_1fy_a_8', 'ear106_1sm_a_1', 'ear106_1sm_a_2', 'ear106_1sm_a_3', 'ear106_1sm_a_4', 'ear106_1sm_a_5', 'ear106_1sm_a_6', 'ear106_1sm_a_7', 'ear106_1sm_a_8', 'ear106_1sy_a_1', 'ear106_1sy_a_2', 'ear106_1sy_a_3', 'ear106_1sy_a_4', 'ear106_1sy_a_5', 'ear106_1sy_a_6', 'ear106_1sy_a_7', 'ear106_1sy_a_8', 'ear106_a_1', 'ear106_a_2', 'ear106_a_3', 'ear106_a_4', 'ear106_a_5', 'ear106_a_6', 'ear106_a_7', 'ear106_a_8', 'ear2', 'ear201', 'ear201a', 'ear201c_code', 'ear201e_code', 'ear202m', 'ear202y', 'ear3_1fc_a_1', 'ear3_1fc_a_2', 'ear3_1fc_a_3', 'ear3_1fc_a_4', 'ear3_1fc_a_5', 'ear3_1fc_a_6', 'ear3_1fc_a_7', 'ear3_1fm_a_1', 'ear3_1fm_a_2', 'ear3_1fm_a_3', 'ear3_1fm_a_4', 'ear3_1fm_a_5', 'ear3_1fm_a_6', 'ear3_1fm_a_7', 'ear3_1fy_a_1', 'ear3_1fy_a_2', 'ear3_1fy_a_3', 'ear3_1fy_a_4', 'ear3_1fy_a_5', 'ear3_1fy_a_6', 'ear3_1fy_a_7', 'ear3_1sm_a_1', 'ear3_1sm_a_2', 'ear3_1sm_a_3', 'ear3_1sm_a_4', 'ear3_1sm_a_5', 'ear3_1sm_a_6', 'ear3_1sm_a_7', 'ear3_1sy_a_1', 'ear3_1sy_a_2', 'ear3_1sy_a_3', 'ear3_1sy_a_4', 'ear3_1sy_a_5', 'ear3_1sy_a_6', 'ear3_1sy_a_7', 'ear3_a_1', 'ear3_a_2', 'ear3_a_3', 'ear3_a_4', 'ear3_a_5', 'ear3_a_6', 'ear3_a_7', 'ear3_a_8', 'ear4', 'ear5a_a_1', 'ear5a_a_2', 'ear5a_a_3', 'ear5a_a_4', 'ear5a_a_5', 'ear5a_a_6', 'ear5c_a_1_code', 'ear5c_a_2_code', 'ear5c_a_3_code', 'ear5c_a_4_code', 'ear5c_a_5_code', 'ear5c_a_6_code', 'ear5e_a_1_code', 'ear5e_a_2_code', 'ear5e_a_3_code', 'ear5e_a_4_code', 'ear5e_a_5_code', 'ear601_a_1', 'ear601_a_10', 'ear601_a_11', 'ear601_a_12', 'ear601_a_13', 'ear601_a_17', 'ear601_a_18', 'ear601_a_19', 'ear601_a_2', 'ear601_a_25', 'ear601_a_26', 'ear601_a_27', 'ear601_a_28', 'ear601_a_3', 'ear601_a_33', 'ear601_a_4', 'ear601_a_41', 'ear601_a_5', 'ear601_a_6', 'ear601_a_7', 'ear601_a_9', 'ear6fc_a_1', 'ear6fc_a_10', 'ear6fc_a_11', 'ear6fc_a_12', 'ear6fc_a_13', 'ear6fc_a_17', 'ear6fc_a_18', 'ear6fc_a_19', 'ear6fc_a_2', 'ear6fc_a_25', 'ear6fc_a_26', 'ear6fc_a_27', 'ear6fc_a_28', 'ear6fc_a_3', 'ear6fc_a_33', 'ear6fc_a_4', 'ear6fc_a_41', 'ear6fc_a_5', 'ear6fc_a_6', 'ear6fc_a_7', 'ear6fc_a_9', 'ear6fm_a_1', 'ear6fm_a_10', 'ear6fm_a_11', 'ear6fm_a_12', 'ear6fm_a_13', 'ear6fm_a_17', 'ear6fm_a_18', 'ear6fm_a_19', 'ear6fm_a_2', 'ear6fm_a_25', 'ear6fm_a_26', 'ear6fm_a_27', 'ear6fm_a_28', 'ear6fm_a_3', 'ear6fm_a_33', 'ear6fm_a_4', 'ear6fm_a_41', 'ear6fm_a_5', 'ear6fm_a_6', 'ear6fm_a_7', 'ear6fm_a_9', 'ear6fy_a_1', 'ear6fy_a_10', 'ear6fy_a_11', 'ear6fy_a_12', 'ear6fy_a_13', 'ear6fy_a_17', 'ear6fy_a_18', 'ear6fy_a_19', 'ear6fy_a_2', 'ear6fy_a_25', 'ear6fy_a_26', 'ear6fy_a_27', 'ear6fy_a_28', 'ear6fy_a_3', 'ear6fy_a_33', 'ear6fy_a_4', 'ear6fy_a_41', 'ear6fy_a_5', 'ear6fy_a_6', 'ear6fy_a_7', 'ear6fy_a_9', 'ear6sm_a_1', 'ear6sm_a_10', 'ear6sm_a_11', 'ear6sm_a_12', 'ear6sm_a_13', 'ear6sm_a_17', 'ear6sm_a_18', 'ear6sm_a_19', 'ear6sm_a_2', 'ear6sm_a_25', 'ear6sm_a_26', 'ear6sm_a_27', 'ear6sm_a_28', 'ear6sm_a_3', 'ear6sm_a_33', 'ear6sm_a_4', 'ear6sm_a_41', 'ear6sm_a_5', 'ear6sm_a_6', 'ear6sm_a_7', 'ear6sm_a_9', 'ear6sy_a_1', 'ear6sy_a_10', 'ear6sy_a_11', 'ear6sy_a_12', 'ear6sy_a_13', 'ear6sy_a_17', 'ear6sy_a_18', 'ear6sy_a_19', 'ear6sy_a_2', 'ear6sy_a_25', 'ear6sy_a_26', 'ear6sy_a_27', 'ear6sy_a_28', 'ear6sy_a_3', 'ear6sy_a_33', 'ear6sy_a_4', 'ear6sy_a_41', 'ear6sy_a_5', 'ear6sy_a_6', 'ear6sy_a_7', 'ear6sy_a_9', 'ear7a_a_1', 'ear7a_a_2', 'ear7a_a_3', 'ear7c_a_1_code', 'ear7c_a_2_code', 'ear7c_a_3_code', 'ear7e_a_1_code', 'ear7e_a_2_code', 'ear7mm_a_1', 'ear7mm_a_2', 'ear7mm_a_3', 'ear7my_a_1', 'ear7my_a_2', 'ear7my_a_3', 'ear7sefc_a_1', 'ear7sefc_a_2', 'ear7sefc_a_3', 'ear7sefm_a_1', 'ear7sefm_a_2', 'ear7sefm_a_3', 'ear7sefy_a_1', 'ear7sefy_a_2', 'ear7sefy_a_3', 'ear7sesm_a_1', 'ear7sesm_a_2', 'ear7sesm_a_3', 'ear7sesy_a_1', 'ear7sesy_a_2', 'ear7sesy_a_3', 'eb202_1', 'eb203_1', 'eb406_1_a_1', 'eb406_1_a_2', 'eb602_1_a_1', 'eb602_1_a_2', 'eb602_1_a_3', 'edu_last', 'edu_updated', 'eeb1', 'eeb2', 'eeb201m', 'eeb201y', 'eeb202c', 'eeb202m', 'eeb202y', 'eeb203c', 'eeb203m', 'eeb203y', 'eeb301', 'eeb302code', 'eeb4', 'eeb401m_a_1', 'eeb401m_a_2', 'eeb401y_a_1', 'eeb401y_a_2', 'eeb4021_a_1', 'eeb4021_a_2', 'eeb4022_a_1code', 'eeb402m_a_1', 'eeb402m_a_2', 'eeb402y_a_1', 'eeb402y_a_2', 'eeb403_a_1', 'eeb403_a_2', 'eeb404_a_1', 'eeb404_a_2', 'eeb406c_a_1', 'eeb406c_a_2', 'eeb406m_a_1', 'eeb406m_a_2', 'eeb406y_a_1', 'eeb406y_a_2', 'eeb407_a_1', 'eeb407_a_2', 'eeb409_a_1', 'eeb409_a_2', 'eeb5', 'eeb501', 'eeb6', 'eeb601m_a_1', 'eeb601m_a_2', 'eeb601m_a_3', 'eeb601y_a_1', 'eeb601y_a_2', 'eeb601y_a_3', 'eeb602c_a_1', 'eeb602c_a_2', 'eeb602c_a_3', 'eeb602m_a_1', 'eeb602m_a_2', 'eeb602m_a_3', 'eeb602y_a_1', 'eeb602y_a_2', 'eeb602y_a_3', 'eeb603m_a_1', 'eeb603m_a_2', 'eeb603m_a_3', 'eeb603y_a_1', 'eeb603y_a_2', 'eeb603y_a_3', 'eeb604_a_1', 'eeb604_a_2', 'eeb604_a_3', 'egc101', 'egc1011', 'egc103', 'egc104c', 'egc104m', 'egc104y', 'egc105', 'egc1052m', 'egc1052y', 'egc1053c', 'egc1053m', 'egc1053y', 'egc201', 'egc2012m_a_1', 'egc2012m_a_2', 'egc2012m_a_3', 'egc2012m_a_4', 'egc2012m_a_5', 'egc2012m_a_6', 'egc2012m_a_7', 'egc2012y_a_1', 'egc2012y_a_2', 'egc2012y_a_3', 'egc2012y_a_4', 'egc2012y_a_5', 'egc2012y_a_6', 'egc2012y_a_7', 'egc2013c_a_1', 'egc2013c_a_2', 'egc2013c_a_3', 'egc2013c_a_4', 'egc2013c_a_5', 'egc2013c_a_6', 'egc2013c_a_7', 'egc2013m_a_1', 'egc2013m_a_2', 'egc2013m_a_3', 'egc2013m_a_4', 'egc2013m_a_5', 'egc2013m_a_6', 'egc2013m_a_7', 'egc2013y_a_1', 'egc2013y_a_2', 'egc2013y_a_3', 'egc2013y_a_4', 'egc2013y_a_5', 'egc2013y_a_6', 'egc2013y_a_7', 'egc2021_a_1', 'egc2021_a_2', 'egc2021_a_3', 'egc2021_a_4', 'egc2021_a_5', 'egc2021_a_6', 'egc2021_a_7', 'egc2021_b_1', 'egc202_a_1', 'egc202_a_2', 'egc202_a_3', 'egc202_a_4', 'egc202_a_5', 'egc202_a_6', 'egc202_a_7', 'egc202_b_1', 'egc2031_a_1', 'egc2031_a_2', 'egc2031_a_3', 'egc2031_a_4', 'egc2031_a_5', 'egc2031_a_6', 'egc2031_a_7', 'egc2031_b_1', 'egc203_a_1', 'egc203_a_2', 'egc203_a_3', 'egc203_a_4', 'egc203_a_5', 'egc203_a_6', 'egc203_a_7', 'egc203_b_1', 'egc204_a_1', 'egc204_a_2', 'egc204_a_3', 'egc204_a_4', 'egc204_a_5', 'egc204_a_6', 'egc204_a_7', 'egc204_b_1', 'egc205_a_1', 'egc205_a_2', 'egc205_a_3', 'egc205_a_4', 'egc205_a_5', 'egc205_a_6', 'egc205_a_7', 'egc205_b_1', 'employ', 'eversmoke', 'father_dead', 'fid10', 'fid12', 'fid14', 'fid16', 'fid18', 'firstenterehcm', 'firstenterehcy', 'firstj_last', 'fml_count', 'gc1012_max', 'gc1012_min', 'gdge', 'gdgemonth', 'gdgeyear', 'gender', 'gender_update', 'gene', 'ibirthy', 'ibirthy_update', 'iinterv', 'iintervy', 'income', 'incomea', 'incomeb', 'interrupt', 'interviewerid18', 'jobclass', 'jobclass_2016', 'jobclass_a_1', 'jobclass_a_2', 'jobclass_a_3', 'jobclass_a_4', 'jobclass_a_5', 'jobclass_a_6', 'jobclass_a_7', 'jobclass_base', 'jobclass_base_2016', 'jobclass_base_a_1', 'jobclass_base_a_2', 'jobclass_base_a_3', 'jobclass_base_a_4', 'jobclass_base_a_5', 'jobclass_base_a_6', 'jobclass_base_a_7', 'jobstartn', 'kg1301', 'kg1302', 'kg13a_a_1', 'kg13a_a_2', 'kg13a_a_3', 'kg13a_a_4', 'kg13a_a_5', 'kg13a_a_77', 'kg13a_s_1', 'kg13a_s_2', 'kg13a_s_3', 'kg23', 'kgd101', 'kgd1a', 'kgd2', 'kgd2a_a_1', 'kgd2a_a_2', 'kgd2a_a_3', 'kgd2a_a_4', 'kgd2a_a_5', 'kgd2a_a_77', 'kgd2a_s_1', 'kgd2a_s_2', 'kgd2a_s_3', 'kgd2a_s_4', 'kgd3code', 'kgd4code', 'kgd4code_isco', 'kgd4code_isei', 'kgd4code_siops', 'kr425', 'kr426', 'kr4301', 'kr4302', 'kr431', 'kr432', 'kra605', 'kw0', 'kw1001_b_1', 'kw1001_b_2', 'kw1001_b_3', 'kw1001_b_4', 'kw1001_b_5', 'kw1001_b_6', 'kw1001_b_7', 'kw1002_b_1code', 'kw1002_b_2code', 'kw1003_a_1code', 'kw1003_a_2code', 'kw1003_a_3code', 'kw1003_a_4code', 'kw1004_b_1', 'kw1004_b_2', 'kw1004_b_3', 'kw1004_b_4', 'kw1004_b_5', 'kw1004_b_6', 'kw1004_b_7', 'kw1005_b_1', 'kw1005_b_2', 'kw1005_b_3', 'kw1_s_1', 'kw1_s_2', 'kw1_s_3', 'kw1_s_4', 'kw1_s_5', 'kw1_s_6', 'kw1_s_7', 'kw1_s_8', 'kw1_s_9', 'kw2m', 'kw2y', 'kw301_b_1', 'kw401_b_1', 'kw501_b_1', 'kw601_b_1', 'kw701_b_1', 'kw801_b_1', 'kw801_b_2', 'kwa1_b_1', 'kwa2_b_1', 'kz202', 'kz203', 'kz207', 'kz5', 'lastyjob_comm_a_1', 'lastyjob_comm_a_2', 'lastyjob_comm_a_3', 'lastyjob_comm_a_4', 'lastyjob_comm_a_5', 'lastyjob_comm_a_6', 'lastyjob_comm_a_7', 'lastyjob_cur', 'lastyjob_last', 'lsample', 'm1m', 'm401', 'm7', 'marriage_last', 'marriage_last_update', 'marriagen', 'mathlist', 'mathtest18', 'mathtest18_sc2', 'metotal', 'military', 'mincomea_a_1', 'mincomea_a_2', 'mincomea_a_3', 'mincomea_a_4', 'mincomea_a_5', 'mincomea_a_6', 'mincomea_a_7', 'mincomea_b_1', 'minzu', 'mother_dead', 'outunit18', 'party', 'pd501a', 'pd501b', 'pd502a', 'pd503a', 'pd503r', 'pd504a', 'pd577r', 'pd5ckp', 'pd5total', 'pd5total_m', 'pd6', 'pd7r', 'pension', 'pg02', 'pg1201_max', 'pg1201_min', 'pid', 'pid_a_f', 'pid_a_m', 'plane', 'provcd18', 'proxy_iwmode', 'proxyrpt', 'proxytype', 'psu', 'pt1', 'pt2', 'pt201', 'pt3', 'pt301', 'pt4', 'pt401', 'pt5', 'pt501', 'pt7', 'pt701', 'pt8', 'qa001b', 'qa001m', 'qa001y', 'qa002', 'qa101', 'qa301', 'qa302', 'qa3020', 'qa302a_code', 'qa302c_code', 'qa303m', 'qa303y', 'qa401', 'qa401a_code', 'qa401c_code', 'qa601', 'qa602', 'qa602a_code', 'qa602c_code', 'qa603', 'qa701code', 'qa9', 'qa901', 'qa902', 'qbb001', 'qbb002', 'qbb003', 'qbb004', 'qbb005', 'qc1', 'qc2', 'qc201', 'qc3', 'qc4', 'qc401', 'qc5', 'qc701', 'qc7a', 'qc7b', 'qea0', 'qea1', 'qea2', 'qea201m', 'qea201y', 'qea202', 'qea203code', 'qea204', 'qea205m', 'qea205y', 'qea206', 'qea207', 'qea2071', 'qea208m', 'qea208y', 'qea209m', 'qea209y', 'qea210code', 'qea2111', 'qea211m', 'qea211y', 'qeb405_1_a_1', 'qeb405_1_a_2', 'qext002', 'qext004', 'qext006', 'qext008', 'qext012', 'qext013', 'qf1_a_1', 'qf1_a_2', 'qf1_a_3', 'qf1_a_4', 'qf1_a_5', 'qf1_a_6', 'qf1_a_7', 'qf1_a_8', 'qf1_a_9', 'qf201_a_1', 'qf201_a_2', 'qf201_a_3', 'qf201_a_4', 'qf201_a_5', 'qf201_a_6', 'qf201_a_7', 'qf201_a_8', 'qf201_a_9', 'qf202_a_1', 'qf202_a_2', 'qf202_a_3', 'qf202_a_4', 'qf202_a_5', 'qf202_a_6', 'qf202_a_7', 'qf202_a_8', 'qf202_a_9', 'qf203_a_1', 'qf203_a_2', 'qf203_a_3', 'qf203_a_4', 'qf203_a_5', 'qf203_a_6', 'qf203_a_7', 'qf203_a_8', 'qf203_a_9', 'qf204_a_1', 'qf204_a_2', 'qf204_a_3', 'qf204_a_4', 'qf204_a_5', 'qf204_a_6', 'qf204_a_7', 'qf204_a_8', 'qf301_a_1', 'qf301_a_2', 'qf301_a_3', 'qf301_a_4', 'qf301_a_5', 'qf301_a_6', 'qf301_a_7', 'qf301_a_8', 'qf301_a_9', 'qf302_a_1', 'qf302_a_2', 'qf302_a_3', 'qf302_a_4', 'qf302_a_5', 'qf302_a_6', 'qf302_a_7', 'qf302_a_8', 'qf303_a_1', 'qf303_a_2', 'qf303_a_3', 'qf303_a_4', 'qf303_a_5', 'qf303_a_6', 'qf303_a_7', 'qf303_a_8', 'qf303_a_9', 'qf304_a_1', 'qf304_a_2', 'qf304_a_3', 'qf304_a_4', 'qf304_a_5', 'qf304_a_6', 'qf304_a_7', 'qf304_a_8', 'qf304_a_9', 'qf305_a_1', 'qf305_a_2', 'qf305_a_3', 'qf305_a_4', 'qf305_a_5', 'qf305_a_6', 'qf305_a_7', 'qf305_a_8', 'qf305_a_9', 'qf306_a_1', 'qf306_a_2', 'qf306_a_3', 'qf306_a_4', 'qf306_a_5', 'qf306_a_6', 'qf306_a_7', 'qf306_a_8', 'qf306_a_9', 'qf5_a_1', 'qf5_a_2', 'qf601_a_1', 'qf601_a_2', 'qf602_a_1', 'qf602_a_2', 'qf603_a_1', 'qf603_a_2', 'qf604_a_1', 'qf604_a_2', 'qf701_a_1', 'qf701_a_2', 'qf702_a_1', 'qf702_a_2', 'qf703_a_1', 'qf703_a_2', 'qf704_a_1', 'qf704_a_2', 'qf705_a_1', 'qf705_a_2', 'qf706_a_1', 'qf706_a_2', 'qg1', 'qg10', 'qg1001', 'qg101', 'qg11', 'qg1101', 'qg1102', 'qg12', 'qg1201_max', 'qg1201_min', 'qg1202', 'qg1203', 'qg1204_max', 'qg1204_min', 'qg1303', 'qg1304', 'qg1305', 'qg14', 'qg1401code', 'qg15', 'qg1501', 'qg16', 'qg17', 'qg1701', 'qg18', 'qg19', 'qg2', 'qg20', 'qg201', 'qg2032', 'qg22', 'qg301', 'qg3011', 'qg301a_code', 'qg301c_code', 'qg302code', 'qg303code', 'qg303code_egp', 'qg303code_isco', 'qg303code_isei', 'qg303code_siops', 'qg401', 'qg402', 'qg403', 'qg404', 'qg405', 'qg406', 'qg5', 'qg501', 'qg502', 'qg6', 'qg701_a_1', 'qg701_a_2', 'qg701_a_3', 'qg701_a_5', 'qg702', 'qg7_a_1', 'qg7_a_2', 'qg7_a_3', 'qg7_a_4', 'qg7_a_77', 'qg7_a_78', 'qg7_s_1', 'qg7_s_2', 'qg7_s_3', 'qg7_s_4', 'qg7_s_5', 'qg801_a_1', 'qg801_a_2', 'qg801_a_3', 'qg801_a_4', 'qg801_a_5', 'qg8_a_1', 'qg8_a_2', 'qg8_a_3', 'qg8_a_4', 'qg8_a_77', 'qg8_a_78', 'qg8_s_1', 'qg8_s_2', 'qg8_s_3', 'qg8_s_4', 'qg8_s_5', 'qg901', 'qg9_a_1', 'qg9_a_2', 'qg9_a_3', 'qg9_a_4', 'qg9_a_5', 'qg9_a_78', 'qg9_s_1', 'qg9_s_2', 'qg9_s_3', 'qg9_s_4', 'qg9_s_5', 'qga1', 'qga101', 'qga2', 'qga301', 'qga302', 'qga401code', 'qga401code_isco', 'qga401code_isei', 'qga401code_siops', 'qga4code', 'qgb1', 'qgb2', 'qgb201', 'qgb3', 'qgb4', 'qgb5', 'qgb501', 'qgb6', 'qgd0', 'qgd01', 'qi101', 'qi1011', 'qi102m', 'qi102y', 'qi200', 'qi2001', 'qi202', 'qi203_max', 'qi203_mean', 'qi203_min', 'qi301_a_1', 'qi301_a_2', 'qi301_a_3', 'qi301_a_4', 'qi301_a_5', 'qi301_a_6', 'qi301_a_7', 'qi301_a_77', 'qi301_a_78', 'qi301_s_1', 'qi301_s_2', 'qi301_s_3', 'qi301_s_4', 'qi301_s_5', 'qi301_s_6', 'qi301_s_7', 'qint001', 'qint003', 'qint005', 'qint007', 'qint009', 'qint010', 'qint011', 'qint014', 'qk5', 'qk6', 'qk701m', 'qk702', 'qk703', 'qk7m', 'qk811', 'qk812', 'qk815', 'qk816', 'qk821', 'qk822', 'qk825', 'qk826', 'qka201', 'qka202', 'qka203', 'qka204', 'qkz204', 'qm101m', 'qm102m', 'qm103m', 'qm104m', 'qm105m', 'qm106m', 'qm107m', 'qm108m', 'qm109m', 'qm110m', 'qm201', 'qm2011', 'qm2016', 'qm202', 'qm203', 'qm204', 'qm205', 'qm206', 'qm207', 'qm208', 'qm209', 'qm210', 'qm211', 'qm212', 'qm213', 'qm214', 'qm215', 'qm4011', 'qm40110', 'qm40111', 'qm4012', 'qm4013', 'qm4014', 'qm4015', 'qm4016', 'qm4017', 'qm4018', 'qm4019', 'qm501', 'qm501a', 'qm502', 'qm503', 'qm504', 'qm505', 'qm506', 'qm507', 'qm508', 'qm509', 'qm510', 'qm6', 'qm6010', 'qm6011', 'qm6012', 'qm6013', 'qm6014', 'qm6015', 'qm6016', 'qm6017', 'qm801', 'qm802', 'qm803', 'qm9', 'qn1001', 'qn10021', 'qn10022', 'qn10023', 'qn10024', 'qn10025', 'qn10026', 'qn101', 'qn102', 'qn103', 'qn104', 'qn105', 'qn1101', 'qn12012', 'qn12016', 'qn2', 'qn201_b_1', 'qn201_b_2', 'qn202', 'qn203', 'qn3', 'qn4001', 'qn4002', 'qn4003', 'qn4004', 'qn4005', 'qn402', 'qn406', 'qn407', 'qn411', 'qn412', 'qn414', 'qn416', 'qn418', 'qn420', 'qn6011', 'qn6012', 'qn6013', 'qn6014', 'qn6015', 'qn6016', 'qn6017', 'qn6018', 'qn8011', 'qn8012', 'qp101', 'qp102', 'qp103', 'qp201', 'qp202', 'qp301', 'qp302code', 'qp303', 'qp304', 'qp401', 'qp402acode', 'qp402bcode', 'qp501', 'qp502', 'qp601', 'qp602', 'qp603', 'qp605_a_1', 'qp605_a_2', 'qp605_a_3', 'qp605_a_4', 'qp605_a_5', 'qp605_a_78', 'qp605_s_1', 'qp605_s_2', 'qp605_s_3', 'qp605_s_4', 'qp605_s_5', 'qp701', 'qp702', 'qph1', 'qph2', 'qph3', 'qq1001', 'qq1002', 'qq1011', 'qq1012', 'qq1013', 'qq1014', 'qq1015', 'qq1016', 'qq1017', 'qq1101', 'qq1102', 'qq201', 'qq2011', 'qq202', 'qq204', 'qq205', 'qq301', 'qq401', 'qq4010', 'qq4011', 'qq4012', 'qq402', 'qq403a', 'qq403b', 'qq501', 'qq701a', 'qq9010', 'qq9011', 'qq9012', 'qr0', 'qr1', 'qr2', 'qr3', 'qr4', 'qr5', 'qr6', 'qr7', 'qs10', 'qs1001', 'qs1002', 'qs1003', 'qs1011', 'qs1012', 'qs102b', 'qs11', 'qs1102', 'qs1103', 'qs1_b_1code', 'qs1_b_2', 'qs2', 'qs201', 'qs202', 'qs3', 'qs3m', 'qs401code', 'qs4_b_1', 'qs4_b_2', 'qs5', 'qs501_b_1code', 'qs501_b_2', 'qs502', 'qs503', 'qs504', 'qs601', 'qs601n', 'qs602', 'qs602n', 'qs603', 'qs603mn', 'qs604', 'qs604n', 'qs605', 'qs605n', 'qs606', 'qs606mn', 'qs7', 'qs701_b_1code', 'qs701_b_2', 'qs702', 'qs703', 'qs704', 'qs705', 'qs8', 'qs801_b_1', 'qs801_b_2code', 'qs9code', 'qu102', 'qu102a', 'qu1m', 'qu201', 'qu202', 'qu230', 'qu231m', 'qu240', 'qu250m', 'qu301', 'qu302', 'qu303', 'qu304', 'qu305', 'qu601', 'qu602', 'qu603', 'qu701', 'qu702', 'qu703', 'qu704', 'qu705', 'qu7051', 'qu801', 'qu802', 'qu803', 'qu804', 'qu805', 'qu806', 'qx3', 'qx301', 'qx301un', 'qx4', 'qx401', 'qx401un', 'qx5', 'qx501', 'qx501un', 'qx6', 'qx601', 'qx601un', 'qx7', 'qz101_s_1', 'qz101_s_2', 'qz102_s_1', 'qz102_s_2', 'qz102_s_3', 'qz103', 'r1_last', 'releaseversion', 'respc1pid', 'retire', 'risko', 'rswt_natcs18n', 'rswt_natpn1018n', 'school', 'school_last', 'self_iwmode', 'selfrpt', 'smokeage', 'subpopulation', 'subsample', 'tb6_a18_p', 'train', 'urban18', 'w01', 'whichmathlist', 'whichwordlist', 'wk9', 'wl6', 'wl7', 'wl801', 'wl802', 'wl803', 'wm701', 'wm702', 'wm703', 'wm704', 'wm705', 'wm706', 'wm707', 'wm708', 'wm709', 'wm710', 'wm711', 'wm712', 'wordlist', 'wordtest18', 'wordtest18_sc2', 'wv101', 'wv102', 'wv103', 'wv104', 'wv105', 'wv106', 'wv107', 'wv108', 'xchildpid_a_1', 'xchildpid_a_10', 'xchildpid_a_2', 'xchildpid_a_3', 'xchildpid_a_4', 'xchildpid_a_5', 'xchildpid_a_6', 'xchildpid_a_7', 'xchildpid_a_8', 'xchildpid_a_9', 'yincomea_a_1', 'yincomea_a_2', 'yincomea_a_3', 'yincomea_a_4', 'yincomea_a_5', 'yincomea_a_6', 'yincomea_a_7', 'yincomea_b_1']

幸福感#

# 你有多幸福?
# QM2016"有多幸福(分)"若 0 分代表最低,10 分代表最高,“【CAPI】加载您/你”觉 得自己有多幸福? 0..10
df['qm2016'].value_counts()
 10.0    8571
 8.0     8172
 5.0     5031
 7.0     3531
 9.0     2870
 6.0     2556
 3.0      659
 4.0      656
 2.0      300
 0.0      272
-8.0      267
 1.0      192
-1.0       15
-2.0        5
Name: qm2016, dtype: int64
plt.hist(df['qm2016'], bins = 16);
_images/c916388b8f03ae6caa60556859ce503216cea6e956622a0071a09fa8a88ac0ed.png

缺失值#

CFPS小课堂 | 请不要问我,我“不知道”,我“拒绝回答”……

  • 问卷系统跳转造成的缺失,一般用“-8”(不适用)来表示

  • 不同问卷类型合并造成的缺失,一般在数据库中是用“.”来表示的。

  • 受访者原因造成的缺失,这种类型的缺失值在数据库中一般用“-2”(拒绝回答)和“-1”(不知道)来表示。

http://www.isss.pku.edu.cn/cfps/cjwt/cfpsxkt/1295293.htm

QA001B”年龄”

人口统计变量#

df['age'].value_counts()
 50.0     847
 55.0     810
 48.0     778
 31.0     774
 29.0     766
         ... 
 98.0       4
 99.0       3
-8.0        2
 102.0      1
 100.0      1
Name: age, Length: 94, dtype: int64
df['retired'] = [1 if i ==1.0 else 0 for i in df['retire']]
plt.hist(df['age']);
_images/6d3bb9e9178045ff6e32cd69c270e8e6f38a2a65585378fb65d1de9107d08bcc.png
plt.hist(df['edu_updated']);
_images/6d9e391125b490f066fca3119921d3b313532f9abe16a271933c8a7fb264a6c2.png
plt.hist(df['cfps2018eduy']);
_images/848e99eb939d218c17d420ecc2a6f7976d80cd25bdbd1ab3d3329f4265548a0b.png
plt.hist(np.log(df['income']+1));
_images/8ebed889cb5b4997d54acf9b3bef7aa8d667d3d9a771f6aa95682839636ae258.png
# 性别  1 男 5女
df['qa002'].value_counts()
5.0    16550
1.0    16547
Name: qa002, dtype: int64
df['male'] = [1 if i == 1.0 else 0 for i in df['qa002']]
# 性别  1 男 5女
df['edu_updated'].value_counts()
-8.0     14462
 4.0      5273
 3.0      3598
 5.0      3058
 0.0      2848
 6.0      1648
 7.0      1335
 10.0      756
 8.0       112
 9.0         7
Name: edu_updated, dtype: int64
df[df['edu_updated']==-8.0]['age'].min()
45.0
sns.boxplot(x="male", y="qm2016", data=df);
_images/c0f304a569ab8285adc402c9d70feed4c3cc886bceeebc5c02ed899c38c21e24.png
df['Income'] = np.log(df['income']+10)
sns.boxplot(x="male", y="Income",  data=df);
_images/5eade54fc2b8408c9bed765afb33934731c84eebc0d441854e4e4fb67bfbe1b8.png
sns.lineplot(y = 'qm2016', x = 'age',  hue = 'male', data = df);
_images/7273eb52e8b13958d068e5b5b06f76598699705d3ae79ae60b8a75d40761809d.png
sns.lineplot(y = 'qm2016', x = 'cfps2018eduy',  hue = 'male', data = df);
_images/13c1d03c62c70863f5f516500ecdbf66fc2cf9304146c60129272b988750b687.png
sns.lineplot(y = 'qm2016', x = 'edu_updated', hue = 'male', data = df);
_images/ff55a704850d01acb380541e379a3e0cc53717d0194836a69381fbc919200347.png
df['qu7051'].value_counts()
-8.0        21502
 1000.0      1261
 2000.0      1176
 3000.0       895
 5000.0       801
            ...  
 1740.0         1
 26400.0        1
 41000.0        1
 92000.0        1
 680.0          1
Name: qu7051, Length: 265, dtype: int64
df['qu705'].value_counts()
-8.0    15603
 7.0     5899
 4.0     2980
 3.0     2616
 1.0     1681
 5.0     1531
 2.0     1416
 6.0     1367
-1.0        3
-2.0        1
Name: qu705, dtype: int64
df['qu702'].value_counts()
-8.0    21274
 7.0     5556
 1.0     4160
 2.0      723
 3.0      688
 4.0      338
 5.0      199
 6.0      156
-1.0        2
-2.0        1
Name: qu702, dtype: int64
# 锻炼身体的频率(次)"
df['qp701'].value_counts()
 0.0     16198
 7.0      8171
 3.0      2333
 2.0      2311
 5.0      1094
 1.0       975
 4.0       915
-8.0       386
 6.0       281
 14.0      195
 10.0       84
 21.0       29
-1.0        25
 8.0        21
 20.0       15
 15.0       14
 28.0        9
 12.0        8
 13.0        6
 35.0        5
 11.0        4
 9.0         4
 25.0        3
 50.0        3
-2.0         2
 22.0        2
 33.0        1
 30.0        1
 19.0        1
 26.0        1
Name: qp701, dtype: int64
df['qp702'].value_counts()
-8.0     16611
 7.0      2984
 2.0      1696
 3.0      1438
 14.0     1397
         ...  
 33.0        1
 66.5        1
 4.3         1
 3.6         1
 23.0        1
Name: qp702, Length: 117, dtype: int64

认知水平#

# 'wordtest18', 'wordtest18_sc2'
df['wordtest18_sc2'].value_counts()
-8.0     7129
 0.0     3161
 22.0    1814
 21.0    1526
 28.0    1400
 31.0    1283
 26.0    1195
 30.0    1192
 24.0    1031
 25.0     997
 27.0     958
 19.0     907
 29.0     885
 23.0     846
 32.0     811
 12.0     634
 17.0     619
 18.0     580
 33.0     556
 20.0     497
 13.0     468
 16.0     447
 4.0      415
 11.0     410
 10.0     370
 6.0      348
 7.0      330
 34.0     329
 1.0      328
 5.0      316
 9.0      296
 15.0     264
 3.0      246
 14.0     206
 8.0      191
 2.0      112
Name: wordtest18_sc2, dtype: int64
# 'wordtest18', 'wordtest18_sc2'
df['mathtest18_sc2'].value_counts()
-8.0     7126
 9.0     3011
 8.0     2957
 0.0     2652
 7.0     2246
 10.0    2222
 4.0     1502
 15.0    1480
 5.0     1432
 6.0     1063
 11.0     937
 1.0      913
 3.0      881
 14.0     875
 17.0     630
 18.0     539
 16.0     450
 12.0     424
 23.0     362
 24.0     348
 2.0      318
 19.0     302
 13.0     245
 20.0      82
 22.0      71
 21.0      29
Name: mathtest18_sc2, dtype: int64

CES-D量表 个人的抑郁水平#

采用Center for Epidemiologic Studies Depression Scale (CES-D)量表来测试个人的抑郁水平。

‘cesd20sc’, ‘cesd8’

  • 我们建议用户如果只需要使用 CFPS2016 或者和 CFPS2018 年的数据时,采用 8 道题版本的总分,

  • 但是如果用户需要同时使用 CFPS2012的数据进行分析时,采用 CESD20sc。

df['cesd8'].value_counts()
10.0    3535
12.0    3439
11.0    3359
13.0    3046
14.0    2993
15.0    2537
8.0     2517
9.0     2406
16.0    2136
17.0    1851
18.0    1446
19.0     859
20.0     649
21.0     488
22.0     382
23.0     278
24.0     197
26.0     132
25.0     128
27.0      75
28.0      45
29.0      32
31.0      25
30.0      24
32.0      19
Name: cesd8, dtype: int64
plt.hist(df['cesd8']);
_images/c89fd236900f2810e15f12d3f064494f954b2a92336b14a2d0c8665a05b4a784.png
plt.hist(np.log(df['cesd8']));
_images/c772784d09191628c382b6134ed16e61ca9cef1535a29881ae4190632be0981b.png

大五人格#

大五人格(ocean),也被称之为人格的海洋,可以通过neo-pi-r评定。

·外倾性(extraversion):好交际对不好交际,爱娱乐对严肃,感情丰富对含蓄;表现出热情、社交、果断、活跃、冒险、乐观等特点。

·神经质或情绪稳定性(neuroticism):烦恼对平静,不安全感对安全感,自怜对自我满意,包括焦虑、敌对、压抑、自我意识、冲动、脆弱等特质。

·开放性(openness): 富于想象对务实,寻求变化对遵守惯例,自主对顺从。具有想象、审美、情感丰富、求异、创造、智慧等特征。

·随和性(agreeableness):热心对无情,信赖对怀疑,乐于助人对不合作。包括信任、利他、直率、谦虚、移情等品质。

·尽责性(conscientiousness):有序对无序,谨慎细心对粗心大意,自律对意志薄弱。包括胜任、公正、条理、尽职、成就、自律、谨慎、克制等特点。

['qm'+ str(i) for i in  range(201, 216)]
['qm201',
 'qm202',
 'qm203',
 'qm204',
 'qm205',
 'qm206',
 'qm207',
 'qm208',
 'qm209',
 'qm210',
 'qm211',
 'qm212',
 'qm213',
 'qm214',
 'qm215']
bigfive_o = ['qm'+ str(i) for i in  [204, 209, 214]]
bigfive_c = ['qm'+ str(i) for i in  [201, 207, 211]]
bigfive_e = ['qm'+ str(i) for i in  [202, 208, 212]]
bigfive_a = ['qm'+ str(i) for i in  [203, 206, 213]]
bigfive_n = ['qm'+ str(i) for i in  [205, 210, 215]]


df['bigfive_o'] = df[bigfive_o].sum(axis=1)/3
df['bigfive_c'] = df[bigfive_c].sum(axis=1)/3
df['bigfive_e'] = df[bigfive_e].sum(axis=1)/3
df['bigfive_a'] = df[bigfive_a].sum(axis=1)/3
df['bigfive_n'] = df[bigfive_n].sum(axis=1)/3
df['bigfive_o'].value_counts()
 3.333333    5237
 4.000000    4983
 0.000000    4276
 2.666667    4270
 3.000000    3353
 2.000000    2879
-8.000000    2843
 3.666667    2461
 2.333333    2049
 4.333333    1473
 1.666667     912
 4.666667     797
 5.000000     715
 1.333333     544
 1.000000     427
 0.666667      40
-1.000000      39
 0.333333      27
-0.333333      13
-4.666667       4
-0.666667       2
-3.666667       2
-2.000000       2
-4.333333       2
-1.333333       2
-1.666667       1
-4.000000       1
Name: bigfive_o, dtype: int64
plt.hist(df['bigfive_o']);
_images/da063c202f9d68dc0a276d669edf71b760c3e3f989938e285ac41f894d902688.png
df['qm201'].value_counts()
 4.0    18000
 5.0     8202
-8.0     2838
 3.0     2179
 2.0     1683
 1.0      183
-1.0       11
-2.0        1
Name: qm201, dtype: int64

描述性分析#

sns.lineplot(y = 'qp701', x = 'bigfive_o',  hue = 'male', data = df);
_images/c5edd172dc320d4917063abd4f274e66ffd67dcfb8890b1ffe2f570725f1dd9c.png
sns.lineplot(y = 'qp701', x = 'cesd8',  hue = 'male', data = df);
_images/a0cc2836de18c71b026b2aa926031ecd652ccbbf477b6e1071cba7b62295884f.png
sns.lineplot(y = 'qp701', x = 'cesd8',  hue = 'male', data = df);
_images/a0cc2836de18c71b026b2aa926031ecd652ccbbf477b6e1071cba7b62295884f.png
sns.lineplot(y = 'qp701', x = 'wordtest18_sc2',  hue = 'male', data = df);
_images/82c1def70c049c556262249b64fe3c0a9238b0f0445fa9dbe21f0f15ea01b582.png
sns.lineplot(y = 'qp701', x = 'mathtest18_sc2',  hue = 'male', data = df);
_images/a9090f0fe92f8e2f32b5428e748763297017eba76fa2226ce60c442a8492c7c9.png
sns.lineplot(y = 'qp701', x = 'age',  hue = 'male', data = df);
_images/88be4725d5fdfcb819aa4f38c7f9e0229d3b2fbd079a89b1d60897837748f4a1.png
sns.lineplot(y = 'qp702', x = 'age',  hue = 'male', data = df);
_images/f523fea47856b1058166186744c0ad762859837a9140dc1026ad7445a684ae39.png
# WV101"经济繁荣要拉大收入差距"为了经济繁荣就要拉大收入差距。
# 1. 十分不同意 2.不同意 3.同意 4.十分同意 5.既不同意也不反对【不读出】
df['wv101'].value_counts()
 3.0    14345
 2.0    13994
 1.0     1601
 5.0     1298
 4.0     1261
-8.0      312
-1.0      284
-2.0        2
Name: wv101, dtype: int64

价值观#

  • WV101”经济繁荣要拉大收入差距”为了经济繁荣就要拉大收入差距。

  • WV102”公平竞争才有和谐人际”

  • V103财富反映个人成就

  • V104 “努力工作能有回报

  • V105聪明才干能得回报

  • V106 成大事难免腐败

  • V107有关系比有能力重要

  • V108 “提高生活水平机会很大

df['wv101'] = df['wv101'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
df['wv102'] = df['wv102'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
df['wv103'] = df['wv103'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
df['wv104'] = df['wv104'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
df['wv105'] = df['wv105'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
df['wv106'] = df['wv106'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
df['wv107'] = df['wv107'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
df['wv108'] = df['wv108'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
# WV101"经济繁荣要拉大收入差距"为了经济繁荣就要拉大收入差距。
df['wv101'].value_counts()
 4.0    14345
 2.0    13994
 1.0     1601
 3.0     1298
 5.0     1261
-8.0      312
-1.0      284
-2.0        2
Name: wv101, dtype: int64
df['wv102'].value_counts()
 4.0    23152
 5.0     4605
 2.0     3616
 3.0      842
 1.0      366
-8.0      295
-1.0      219
-2.0        2
Name: wv102, dtype: int64
df['wv108'].value_counts()
 4.0    23264
 2.0     4759
 5.0     3296
 3.0     1084
-8.0      310
 1.0      306
-1.0       77
-2.0        1
Name: wv108, dtype: int64
sns.pointplot(y = 'wv101', x = 'wv106',  hue = 'male', data = df[df['wv102'] >= 1]);
_images/511038147a2baead2346c73eee6cb5230b9722b7d456b598b5b4a6d00101b9f1.png
df['ear101a'].unique()
array([-8., nan, 11., 12., 13., 37., 61., 14., 51., 43., 21., 46., 65.,
       44., 15., 22., 23., 31., 45., 34., 32., 33., 52., 35., 36., 42.,
       41., 50., 53., 62., 54.])
df['qa401c_code'].unique()
array([       nan,  2.311e+03,  6.090e+02,  8.300e+02,  4.700e+01,
        9.989e+03,  5.600e+01,  5.100e+01,  1.437e+03,  2.960e+02,
        4.900e+01,  5.300e+01,  4.300e+02,  2.480e+02,  5.200e+01,
        5.000e+01,  4.600e+01,  4.800e+01,  8.297e+03,  6.900e+01,
        4.510e+02,  8.510e+02,  7.400e+01,  2.179e+03,  2.859e+03,
        2.853e+03,  7.200e+01,  1.780e+02,  1.530e+02,  1.600e+02,
        6.520e+02,  6.870e+02, -9.000e+00,  1.550e+02,  6.990e+02,
        7.900e+02,  1.977e+03,  1.480e+02,  6.100e+02,  6.980e+02,
        1.560e+02,  2.827e+03,  7.020e+02,  1.480e+03,  1.580e+02,
        2.388e+03,  6.490e+02,  1.652e+03,  1.276e+03,  1.570e+02,
        1.425e+03,  1.520e+02,  8.330e+02,  6.890e+02,  6.340e+02,
        3.780e+02,  7.370e+02,  1.510e+02,  6.270e+02,  2.999e+03,
        6.000e+01,  6.100e+01,  5.900e+01,  7.250e+02,  5.500e+01,
        5.400e+01,  1.520e+03,  7.580e+02,  1.534e+03,  7.861e+03,
        8.980e+02,  8.400e+02,  8.160e+02,  5.610e+02,  1.990e+02,
        8.310e+02,  8.910e+02,  3.640e+02,  3.101e+03,  8.404e+03,
        5.700e+01,  6.460e+02,  7.150e+02,  3.119e+03,  7.560e+02,
        6.600e+01,  1.522e+03,  6.700e+01,  8.100e+01,  1.038e+03,
        9.240e+02,  1.189e+03,  1.130e+03,  8.500e+01,  1.213e+03,
        8.300e+01,  1.035e+03,  1.053e+03,  9.870e+02,  2.771e+03,
        1.216e+03,  9.810e+02,  1.011e+03,  9.030e+02,  9.220e+02,
        1.013e+03,  8.316e+03,  1.033e+03,  1.207e+03,  8.234e+03,
        9.820e+02,  1.024e+03,  1.147e+03,  2.908e+03,  9.980e+02,
        1.200e+01,  9.690e+02,  1.100e+01,  1.583e+03,  1.008e+03,
        1.744e+03,  9.000e+01,  9.620e+02,  3.600e+01,  3.500e+01,
        2.300e+03,  4.700e+02,  1.000e+00,  1.119e+03,  2.469e+03,
        3.000e+00,  2.000e+00,  7.550e+02,  1.238e+03,  2.700e+01,
        2.800e+01,  1.282e+03,  1.400e+01,  1.986e+03,  1.397e+03,
        1.500e+01,  1.300e+01,  1.770e+02,  7.940e+02,  8.020e+02,
        7.498e+03,  1.460e+03,  1.507e+03,  7.230e+02,  6.400e+01,
        2.260e+02,  8.467e+03,  1.729e+03,  2.770e+03,  1.651e+03,
        1.443e+03,  1.807e+03,  1.000e+02,  1.010e+02,  1.030e+02,
        2.480e+03,  1.040e+02,  1.659e+03,  1.050e+02,  1.660e+03,
        1.090e+02,  2.430e+03,  1.697e+03,  1.120e+02,  4.400e+02,
        2.229e+03,  1.140e+02,  2.587e+03,  1.130e+02,  8.080e+02,
        1.712e+03,  1.732e+03,  1.070e+02,  1.680e+03,  1.110e+02,
        1.020e+02,  9.992e+03,  1.681e+03,  1.080e+02,  1.505e+03,
        1.650e+02,  1.848e+03,  1.751e+03,  1.743e+03,  9.000e+00,
        1.873e+03,  1.781e+03,  1.477e+03,  1.780e+03,  2.996e+03,
        9.991e+03,  1.930e+03,  1.929e+03,  1.725e+03,  3.000e+01,
        2.228e+03,  3.100e+01,  3.200e+01,  3.300e+01,  1.270e+02,
        1.896e+03,  2.869e+03,  4.570e+02,  9.200e+01,  1.994e+03,
        2.044e+03,  2.292e+03,  1.210e+02,  1.240e+02,  1.170e+02,
        2.078e+03,  1.985e+03,  1.300e+02,  2.105e+03,  1.190e+02,
        1.160e+02,  1.828e+03,  2.015e+03,  2.016e+03,  2.045e+03,
        2.035e+03,  1.230e+02,  2.169e+03,  1.150e+02,  2.086e+03,
        2.027e+03,  1.250e+02,  2.054e+03,  2.377e+03,  2.111e+03,
        1.260e+02,  2.161e+03,  2.031e+03,  2.172e+03,  1.951e+03,
        1.949e+03,  1.290e+02,  2.559e+03,  4.061e+03,  1.739e+03,
        4.000e+00,  2.149e+03,  6.000e+00,  2.220e+03,  2.259e+03,
        2.253e+03,  2.262e+03,  2.100e+01,  2.000e+01,  2.257e+03,
        2.261e+03,  9.420e+02,  1.710e+02,  1.242e+03,  2.305e+03,
        3.900e+01,  4.000e+01,  2.608e+03,  4.200e+01,  2.363e+03,
        4.100e+01,  1.195e+03,  2.358e+03,  2.200e+01,  2.300e+01,
        2.400e+01,  1.632e+03,  2.500e+01,  2.600e+01,  2.507e+03,
        2.552e+03,  1.900e+01,  2.266e+03,  2.625e+03,  1.700e+01,
        2.650e+03,  2.663e+03,  2.637e+03,  7.700e+01,  1.675e+03,
        2.265e+03,  1.374e+03,  3.037e+03,  7.800e+01,  2.937e+03,
        2.905e+03,  1.380e+02,  1.440e+02,  1.390e+02,  1.460e+02,
        2.949e+03,  1.350e+02,  2.950e+03,  2.925e+03,  1.420e+02,
        2.906e+03,  8.070e+02,  1.410e+02,  1.430e+02,  1.320e+02,
        2.897e+03,  1.340e+02,  2.896e+03,  2.899e+03,  1.310e+02,
        1.360e+02,  2.916e+03,  2.917e+03,  2.915e+03,  2.124e+03,
        2.209e+03,  3.057e+03,  1.370e+02,  2.892e+03,  1.330e+02,
        2.924e+03,  2.819e+03,  1.615e+03,  1.400e+02,  2.952e+03])
df['ear101c_code'].unique()
array([      nan, 1.700e+02, 7.900e+01, 1.770e+02, 5.100e+01, 5.200e+01,
       1.930e+02, 1.990e+02, 4.800e+01, 5.000e+01, 1.564e+03, 4.600e+01,
       2.010e+02, 2.610e+02, 1.445e+03, 2.772e+03, 4.140e+02, 8.297e+03,
       8.415e+03, 6.900e+01, 3.840e+02, 7.100e+01, 7.400e+01, 7.200e+01,
       1.840e+02, 3.740e+02, 7.500e+01, 7.300e+01, 2.288e+03, 1.854e+03,
       1.530e+02, 1.600e+02, 6.080e+02, 6.090e+02, 6.550e+02, 1.470e+02,
       1.485e+03, 6.560e+02, 1.550e+02, 6.520e+02, 1.480e+02, 6.100e+02,
       1.610e+02, 6.610e+02, 1.560e+02, 1.920e+02, 3.066e+03, 1.690e+02,
       1.270e+02, 1.500e+02, 6.750e+02, 6.800e+02, 6.260e+02, 1.520e+02,
       6.350e+02, 6.730e+02, 1.620e+02, 6.840e+02, 1.570e+02, 6.690e+02,
       5.390e+02, 1.540e+02, 6.190e+02, 1.590e+02, 6.720e+02, 1.510e+02,
       2.558e+03, 7.370e+02, 6.100e+01, 7.710e+02, 6.000e+01, 6.715e+03,
       5.900e+01, 5.500e+01, 5.550e+02, 8.300e+02, 5.800e+01, 5.400e+01,
       9.010e+02, 8.150e+02, 8.170e+02, 9.200e+01, 5.700e+01, 8.040e+02,
       8.780e+02, 1.475e+03, 5.600e+01, 2.109e+03, 2.005e+03, 8.300e+01,
       8.900e+01, 8.400e+01, 8.500e+01, 8.700e+01, 8.800e+01, 9.000e+01,
       9.977e+03, 9.100e+01, 9.470e+02, 1.199e+03, 9.300e+01, 9.400e+01,
       9.500e+01, 9.700e+01, 9.978e+03, 1.200e+01, 9.530e+02, 1.100e+01,
       9.380e+02, 1.000e+01, 3.600e+01, 1.035e+03, 3.500e+01, 1.061e+03,
       2.500e+03, 8.450e+03, 1.000e+00, 1.114e+03, 3.000e+00, 2.000e+00,
       2.007e+03, 2.700e+01, 3.075e+03, 1.255e+03, 2.293e+03, 1.079e+03,
       2.800e+01, 1.234e+03, 1.400e+01, 1.986e+03, 1.345e+03, 2.006e+03,
       1.023e+03, 2.022e+03, 1.500e+01, 1.302e+03, 9.984e+03, 2.020e+03,
       2.291e+03, 1.300e+01, 6.700e+01, 1.432e+03, 1.457e+03, 6.600e+01,
       1.525e+03, 6.200e+02, 1.652e+03, 1.745e+03, 7.498e+03, 6.500e+01,
       1.476e+03, 8.467e+03, 6.300e+01, 6.400e+01, 9.900e+01, 1.583e+03,
       1.584e+03, 1.589e+03, 1.000e+02, 1.651e+03, 1.010e+02, 1.650e+02,
       2.770e+03, 1.637e+03, 1.030e+02, 9.930e+02, 9.840e+02, 1.040e+02,
       1.050e+02, 1.018e+03, 1.090e+02, 1.100e+02, 2.230e+03, 1.120e+02,
       1.140e+02, 1.673e+03, 1.731e+03, 1.130e+02, 1.742e+03, 6.382e+03,
       1.236e+03, 1.070e+02, 1.053e+03, 1.110e+02, 1.020e+02, 1.150e+02,
       1.080e+02, 1.060e+02, 1.102e+03, 7.000e+00, 1.811e+03, 9.000e+00,
       8.000e+00, 1.855e+03, 2.900e+01, 3.000e+01, 3.100e+01, 1.509e+03,
       3.200e+01, 1.902e+03, 3.300e+01, 1.160e+02, 1.220e+02, 4.061e+03,
       1.240e+02, 2.004e+03, 1.170e+02, 2.058e+03, 1.250e+02, 1.300e+02,
       2.093e+03, 1.190e+02, 2.013e+03, 1.987e+03, 1.180e+02, 2.156e+03,
       1.260e+02, 2.084e+03, 1.280e+02, 1.988e+03, 1.985e+03, 1.210e+02,
       2.010e+03, 1.290e+02, 1.230e+02, 4.000e+00, 2.149e+03, 6.000e+00,
       2.056e+03, 2.160e+03, 2.100e+01, 1.311e+03, 2.257e+03, 2.000e+01,
       2.683e+03, 3.800e+01, 3.900e+01, 2.290e+03, 4.000e+01, 2.295e+03,
       4.300e+01, 1.179e+03, 4.400e+01, 4.200e+01, 1.055e+03, 4.100e+01,
       8.000e+01, 2.200e+01, 1.903e+03, 2.300e+01, 2.400e+01, 2.500e+01,
       2.600e+01, 2.552e+03, 1.282e+03, 1.900e+01, 2.569e+03, 1.600e+01,
       2.588e+03, 1.700e+01, 1.800e+01, 7.800e+01, 7.700e+01, 7.600e+01,
       2.777e+03, 1.380e+02, 3.110e+03, 1.360e+02, 1.320e+02, 2.886e+03,
       6.543e+03, 1.440e+02, 1.390e+02, 2.915e+03, 2.939e+03, 3.127e+03,
       1.460e+02, 8.266e+03, 1.450e+02, 2.789e+03, 1.420e+02, 1.330e+02,
       2.884e+03, 2.601e+03, 1.410e+02, 1.430e+02, 3.125e+03, 2.913e+03,
       1.340e+02, 1.350e+02, 1.310e+02, 2.919e+03, 5.660e+02, 1.324e+03,
       3.057e+03, 1.370e+02, 2.890e+03, 1.400e+02, 2.018e+03, 7.040e+02,
       2.930e+03])
df['ear101a'].value_counts()
-8.0     31876
 44.0      165
 62.0      144
 31.0      111
 51.0       93
 41.0       90
 21.0       65
 32.0       57
 14.0       56
 13.0       47
 37.0       46
 23.0       42
 36.0       40
 35.0       33
 61.0       27
 53.0       27
 43.0       25
 33.0       24
 52.0       23
 45.0       20
 22.0       17
 42.0       17
 65.0       11
 34.0       11
 12.0       10
 50.0        8
 11.0        7
 46.0        2
 15.0        2
 54.0        1
Name: ear101a, dtype: int64
pd.DataFrame(df['qa401c_code'].value_counts())
qa401c_code
113.0 11
21.0 9
123.0 8
144.0 7
124.0 7
... ...
1053.0 1
8316.0 1
2363.0 1
1729.0 1
652.0 1

324 rows × 1 columns