中国家庭追踪调查2018#
import pandas as pd
import numpy as np
import seaborn as sns
import pylab as plt
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei'] # 用来正常显示中文标签
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号, 注意['SimHei']对应这句不行.
df = pd.read_stata('/Users/datalab/bigdata/中国家庭追踪调查2018/cfps2018person_202012.dta',
convert_categoricals=False, convert_missing=False)
df.head()
pid | code | fid18 | fid16 | fid14 | fid12 | fid10 | pid_a_f | pid_a_m | selfrpt | ... | cfps2018eduy_im | gdge | gdgeyear | gdgemonth | catipilot | pg02 | pg1201_min | pg1201_max | interviewerid18 | releaseversion | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 100051501.0 | 501.0 | 100051.0 | 100051.0 | 100051.0 | -8.0 | -8.0 | 110043201.0 | 110043105.0 | 1.0 | ... | 12.0 | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 761040.0 | 2.1 |
1 | 100051502.0 | 502.0 | 100051.0 | 100051.0 | 100051.0 | -8.0 | -8.0 | -8.0 | -8.0 | 1.0 | ... | 12.0 | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 761040.0 | 2.1 |
2 | 100160601.0 | 601.0 | 100160.0 | -8.0 | -8.0 | -8.0 | -8.0 | -8.0 | -8.0 | 1.0 | ... | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 459505.0 | 2.1 |
3 | 100376551.0 | 551.0 | 100376.0 | 100376.0 | -8.0 | -8.0 | -8.0 | -8.0 | -8.0 | 1.0 | ... | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 526621.0 | 2.1 |
4 | 100551551.0 | 551.0 | 100551.0 | 100551.0 | -8.0 | -8.0 | -8.0 | -8.0 | -8.0 | 1.0 | ... | NaN | NaN | NaN | NaN | 0.0 | NaN | NaN | NaN | 696822.0 | 2.1 |
5 rows × 1371 columns
len(df)
37354
print(sorted(df.columns.tolist()))
['a12hk', 'a12p', 'age', 'birthp', 'birthw', 'catipilot', 'cesd20sc', 'cesd8', 'cfps2018edu', 'cfps2018eduy', 'cfps2018eduy_im', 'cfps2018sch', 'cid18', 'cmonth', 'cmstart', 'code', 'cohabitn', 'countyid18', 'cyear', 'ear1', 'ear101', 'ear101a', 'ear101c_code', 'ear101e_code', 'ear102', 'ear103', 'ear104c', 'ear104m', 'ear104y', 'ear105m', 'ear105y', 'ear106_1fc_a_1', 'ear106_1fc_a_2', 'ear106_1fc_a_3', 'ear106_1fc_a_4', 'ear106_1fc_a_5', 'ear106_1fc_a_6', 'ear106_1fc_a_7', 'ear106_1fc_a_8', 'ear106_1fm_a_1', 'ear106_1fm_a_2', 'ear106_1fm_a_3', 'ear106_1fm_a_4', 'ear106_1fm_a_5', 'ear106_1fm_a_6', 'ear106_1fm_a_7', 'ear106_1fm_a_8', 'ear106_1fy_a_1', 'ear106_1fy_a_2', 'ear106_1fy_a_3', 'ear106_1fy_a_4', 'ear106_1fy_a_5', 'ear106_1fy_a_6', 'ear106_1fy_a_7', 'ear106_1fy_a_8', 'ear106_1sm_a_1', 'ear106_1sm_a_2', 'ear106_1sm_a_3', 'ear106_1sm_a_4', 'ear106_1sm_a_5', 'ear106_1sm_a_6', 'ear106_1sm_a_7', 'ear106_1sm_a_8', 'ear106_1sy_a_1', 'ear106_1sy_a_2', 'ear106_1sy_a_3', 'ear106_1sy_a_4', 'ear106_1sy_a_5', 'ear106_1sy_a_6', 'ear106_1sy_a_7', 'ear106_1sy_a_8', 'ear106_a_1', 'ear106_a_2', 'ear106_a_3', 'ear106_a_4', 'ear106_a_5', 'ear106_a_6', 'ear106_a_7', 'ear106_a_8', 'ear2', 'ear201', 'ear201a', 'ear201c_code', 'ear201e_code', 'ear202m', 'ear202y', 'ear3_1fc_a_1', 'ear3_1fc_a_2', 'ear3_1fc_a_3', 'ear3_1fc_a_4', 'ear3_1fc_a_5', 'ear3_1fc_a_6', 'ear3_1fc_a_7', 'ear3_1fm_a_1', 'ear3_1fm_a_2', 'ear3_1fm_a_3', 'ear3_1fm_a_4', 'ear3_1fm_a_5', 'ear3_1fm_a_6', 'ear3_1fm_a_7', 'ear3_1fy_a_1', 'ear3_1fy_a_2', 'ear3_1fy_a_3', 'ear3_1fy_a_4', 'ear3_1fy_a_5', 'ear3_1fy_a_6', 'ear3_1fy_a_7', 'ear3_1sm_a_1', 'ear3_1sm_a_2', 'ear3_1sm_a_3', 'ear3_1sm_a_4', 'ear3_1sm_a_5', 'ear3_1sm_a_6', 'ear3_1sm_a_7', 'ear3_1sy_a_1', 'ear3_1sy_a_2', 'ear3_1sy_a_3', 'ear3_1sy_a_4', 'ear3_1sy_a_5', 'ear3_1sy_a_6', 'ear3_1sy_a_7', 'ear3_a_1', 'ear3_a_2', 'ear3_a_3', 'ear3_a_4', 'ear3_a_5', 'ear3_a_6', 'ear3_a_7', 'ear3_a_8', 'ear4', 'ear5a_a_1', 'ear5a_a_2', 'ear5a_a_3', 'ear5a_a_4', 'ear5a_a_5', 'ear5a_a_6', 'ear5c_a_1_code', 'ear5c_a_2_code', 'ear5c_a_3_code', 'ear5c_a_4_code', 'ear5c_a_5_code', 'ear5c_a_6_code', 'ear5e_a_1_code', 'ear5e_a_2_code', 'ear5e_a_3_code', 'ear5e_a_4_code', 'ear5e_a_5_code', 'ear601_a_1', 'ear601_a_10', 'ear601_a_11', 'ear601_a_12', 'ear601_a_13', 'ear601_a_17', 'ear601_a_18', 'ear601_a_19', 'ear601_a_2', 'ear601_a_25', 'ear601_a_26', 'ear601_a_27', 'ear601_a_28', 'ear601_a_3', 'ear601_a_33', 'ear601_a_4', 'ear601_a_41', 'ear601_a_5', 'ear601_a_6', 'ear601_a_7', 'ear601_a_9', 'ear6fc_a_1', 'ear6fc_a_10', 'ear6fc_a_11', 'ear6fc_a_12', 'ear6fc_a_13', 'ear6fc_a_17', 'ear6fc_a_18', 'ear6fc_a_19', 'ear6fc_a_2', 'ear6fc_a_25', 'ear6fc_a_26', 'ear6fc_a_27', 'ear6fc_a_28', 'ear6fc_a_3', 'ear6fc_a_33', 'ear6fc_a_4', 'ear6fc_a_41', 'ear6fc_a_5', 'ear6fc_a_6', 'ear6fc_a_7', 'ear6fc_a_9', 'ear6fm_a_1', 'ear6fm_a_10', 'ear6fm_a_11', 'ear6fm_a_12', 'ear6fm_a_13', 'ear6fm_a_17', 'ear6fm_a_18', 'ear6fm_a_19', 'ear6fm_a_2', 'ear6fm_a_25', 'ear6fm_a_26', 'ear6fm_a_27', 'ear6fm_a_28', 'ear6fm_a_3', 'ear6fm_a_33', 'ear6fm_a_4', 'ear6fm_a_41', 'ear6fm_a_5', 'ear6fm_a_6', 'ear6fm_a_7', 'ear6fm_a_9', 'ear6fy_a_1', 'ear6fy_a_10', 'ear6fy_a_11', 'ear6fy_a_12', 'ear6fy_a_13', 'ear6fy_a_17', 'ear6fy_a_18', 'ear6fy_a_19', 'ear6fy_a_2', 'ear6fy_a_25', 'ear6fy_a_26', 'ear6fy_a_27', 'ear6fy_a_28', 'ear6fy_a_3', 'ear6fy_a_33', 'ear6fy_a_4', 'ear6fy_a_41', 'ear6fy_a_5', 'ear6fy_a_6', 'ear6fy_a_7', 'ear6fy_a_9', 'ear6sm_a_1', 'ear6sm_a_10', 'ear6sm_a_11', 'ear6sm_a_12', 'ear6sm_a_13', 'ear6sm_a_17', 'ear6sm_a_18', 'ear6sm_a_19', 'ear6sm_a_2', 'ear6sm_a_25', 'ear6sm_a_26', 'ear6sm_a_27', 'ear6sm_a_28', 'ear6sm_a_3', 'ear6sm_a_33', 'ear6sm_a_4', 'ear6sm_a_41', 'ear6sm_a_5', 'ear6sm_a_6', 'ear6sm_a_7', 'ear6sm_a_9', 'ear6sy_a_1', 'ear6sy_a_10', 'ear6sy_a_11', 'ear6sy_a_12', 'ear6sy_a_13', 'ear6sy_a_17', 'ear6sy_a_18', 'ear6sy_a_19', 'ear6sy_a_2', 'ear6sy_a_25', 'ear6sy_a_26', 'ear6sy_a_27', 'ear6sy_a_28', 'ear6sy_a_3', 'ear6sy_a_33', 'ear6sy_a_4', 'ear6sy_a_41', 'ear6sy_a_5', 'ear6sy_a_6', 'ear6sy_a_7', 'ear6sy_a_9', 'ear7a_a_1', 'ear7a_a_2', 'ear7a_a_3', 'ear7c_a_1_code', 'ear7c_a_2_code', 'ear7c_a_3_code', 'ear7e_a_1_code', 'ear7e_a_2_code', 'ear7mm_a_1', 'ear7mm_a_2', 'ear7mm_a_3', 'ear7my_a_1', 'ear7my_a_2', 'ear7my_a_3', 'ear7sefc_a_1', 'ear7sefc_a_2', 'ear7sefc_a_3', 'ear7sefm_a_1', 'ear7sefm_a_2', 'ear7sefm_a_3', 'ear7sefy_a_1', 'ear7sefy_a_2', 'ear7sefy_a_3', 'ear7sesm_a_1', 'ear7sesm_a_2', 'ear7sesm_a_3', 'ear7sesy_a_1', 'ear7sesy_a_2', 'ear7sesy_a_3', 'eb202_1', 'eb203_1', 'eb406_1_a_1', 'eb406_1_a_2', 'eb602_1_a_1', 'eb602_1_a_2', 'eb602_1_a_3', 'edu_last', 'edu_updated', 'eeb1', 'eeb2', 'eeb201m', 'eeb201y', 'eeb202c', 'eeb202m', 'eeb202y', 'eeb203c', 'eeb203m', 'eeb203y', 'eeb301', 'eeb302code', 'eeb4', 'eeb401m_a_1', 'eeb401m_a_2', 'eeb401y_a_1', 'eeb401y_a_2', 'eeb4021_a_1', 'eeb4021_a_2', 'eeb4022_a_1code', 'eeb402m_a_1', 'eeb402m_a_2', 'eeb402y_a_1', 'eeb402y_a_2', 'eeb403_a_1', 'eeb403_a_2', 'eeb404_a_1', 'eeb404_a_2', 'eeb406c_a_1', 'eeb406c_a_2', 'eeb406m_a_1', 'eeb406m_a_2', 'eeb406y_a_1', 'eeb406y_a_2', 'eeb407_a_1', 'eeb407_a_2', 'eeb409_a_1', 'eeb409_a_2', 'eeb5', 'eeb501', 'eeb6', 'eeb601m_a_1', 'eeb601m_a_2', 'eeb601m_a_3', 'eeb601y_a_1', 'eeb601y_a_2', 'eeb601y_a_3', 'eeb602c_a_1', 'eeb602c_a_2', 'eeb602c_a_3', 'eeb602m_a_1', 'eeb602m_a_2', 'eeb602m_a_3', 'eeb602y_a_1', 'eeb602y_a_2', 'eeb602y_a_3', 'eeb603m_a_1', 'eeb603m_a_2', 'eeb603m_a_3', 'eeb603y_a_1', 'eeb603y_a_2', 'eeb603y_a_3', 'eeb604_a_1', 'eeb604_a_2', 'eeb604_a_3', 'egc101', 'egc1011', 'egc103', 'egc104c', 'egc104m', 'egc104y', 'egc105', 'egc1052m', 'egc1052y', 'egc1053c', 'egc1053m', 'egc1053y', 'egc201', 'egc2012m_a_1', 'egc2012m_a_2', 'egc2012m_a_3', 'egc2012m_a_4', 'egc2012m_a_5', 'egc2012m_a_6', 'egc2012m_a_7', 'egc2012y_a_1', 'egc2012y_a_2', 'egc2012y_a_3', 'egc2012y_a_4', 'egc2012y_a_5', 'egc2012y_a_6', 'egc2012y_a_7', 'egc2013c_a_1', 'egc2013c_a_2', 'egc2013c_a_3', 'egc2013c_a_4', 'egc2013c_a_5', 'egc2013c_a_6', 'egc2013c_a_7', 'egc2013m_a_1', 'egc2013m_a_2', 'egc2013m_a_3', 'egc2013m_a_4', 'egc2013m_a_5', 'egc2013m_a_6', 'egc2013m_a_7', 'egc2013y_a_1', 'egc2013y_a_2', 'egc2013y_a_3', 'egc2013y_a_4', 'egc2013y_a_5', 'egc2013y_a_6', 'egc2013y_a_7', 'egc2021_a_1', 'egc2021_a_2', 'egc2021_a_3', 'egc2021_a_4', 'egc2021_a_5', 'egc2021_a_6', 'egc2021_a_7', 'egc2021_b_1', 'egc202_a_1', 'egc202_a_2', 'egc202_a_3', 'egc202_a_4', 'egc202_a_5', 'egc202_a_6', 'egc202_a_7', 'egc202_b_1', 'egc2031_a_1', 'egc2031_a_2', 'egc2031_a_3', 'egc2031_a_4', 'egc2031_a_5', 'egc2031_a_6', 'egc2031_a_7', 'egc2031_b_1', 'egc203_a_1', 'egc203_a_2', 'egc203_a_3', 'egc203_a_4', 'egc203_a_5', 'egc203_a_6', 'egc203_a_7', 'egc203_b_1', 'egc204_a_1', 'egc204_a_2', 'egc204_a_3', 'egc204_a_4', 'egc204_a_5', 'egc204_a_6', 'egc204_a_7', 'egc204_b_1', 'egc205_a_1', 'egc205_a_2', 'egc205_a_3', 'egc205_a_4', 'egc205_a_5', 'egc205_a_6', 'egc205_a_7', 'egc205_b_1', 'employ', 'eversmoke', 'father_dead', 'fid10', 'fid12', 'fid14', 'fid16', 'fid18', 'firstenterehcm', 'firstenterehcy', 'firstj_last', 'fml_count', 'gc1012_max', 'gc1012_min', 'gdge', 'gdgemonth', 'gdgeyear', 'gender', 'gender_update', 'gene', 'ibirthy', 'ibirthy_update', 'iinterv', 'iintervy', 'income', 'incomea', 'incomeb', 'interrupt', 'interviewerid18', 'jobclass', 'jobclass_2016', 'jobclass_a_1', 'jobclass_a_2', 'jobclass_a_3', 'jobclass_a_4', 'jobclass_a_5', 'jobclass_a_6', 'jobclass_a_7', 'jobclass_base', 'jobclass_base_2016', 'jobclass_base_a_1', 'jobclass_base_a_2', 'jobclass_base_a_3', 'jobclass_base_a_4', 'jobclass_base_a_5', 'jobclass_base_a_6', 'jobclass_base_a_7', 'jobstartn', 'kg1301', 'kg1302', 'kg13a_a_1', 'kg13a_a_2', 'kg13a_a_3', 'kg13a_a_4', 'kg13a_a_5', 'kg13a_a_77', 'kg13a_s_1', 'kg13a_s_2', 'kg13a_s_3', 'kg23', 'kgd101', 'kgd1a', 'kgd2', 'kgd2a_a_1', 'kgd2a_a_2', 'kgd2a_a_3', 'kgd2a_a_4', 'kgd2a_a_5', 'kgd2a_a_77', 'kgd2a_s_1', 'kgd2a_s_2', 'kgd2a_s_3', 'kgd2a_s_4', 'kgd3code', 'kgd4code', 'kgd4code_isco', 'kgd4code_isei', 'kgd4code_siops', 'kr425', 'kr426', 'kr4301', 'kr4302', 'kr431', 'kr432', 'kra605', 'kw0', 'kw1001_b_1', 'kw1001_b_2', 'kw1001_b_3', 'kw1001_b_4', 'kw1001_b_5', 'kw1001_b_6', 'kw1001_b_7', 'kw1002_b_1code', 'kw1002_b_2code', 'kw1003_a_1code', 'kw1003_a_2code', 'kw1003_a_3code', 'kw1003_a_4code', 'kw1004_b_1', 'kw1004_b_2', 'kw1004_b_3', 'kw1004_b_4', 'kw1004_b_5', 'kw1004_b_6', 'kw1004_b_7', 'kw1005_b_1', 'kw1005_b_2', 'kw1005_b_3', 'kw1_s_1', 'kw1_s_2', 'kw1_s_3', 'kw1_s_4', 'kw1_s_5', 'kw1_s_6', 'kw1_s_7', 'kw1_s_8', 'kw1_s_9', 'kw2m', 'kw2y', 'kw301_b_1', 'kw401_b_1', 'kw501_b_1', 'kw601_b_1', 'kw701_b_1', 'kw801_b_1', 'kw801_b_2', 'kwa1_b_1', 'kwa2_b_1', 'kz202', 'kz203', 'kz207', 'kz5', 'lastyjob_comm_a_1', 'lastyjob_comm_a_2', 'lastyjob_comm_a_3', 'lastyjob_comm_a_4', 'lastyjob_comm_a_5', 'lastyjob_comm_a_6', 'lastyjob_comm_a_7', 'lastyjob_cur', 'lastyjob_last', 'lsample', 'm1m', 'm401', 'm7', 'marriage_last', 'marriage_last_update', 'marriagen', 'mathlist', 'mathtest18', 'mathtest18_sc2', 'metotal', 'military', 'mincomea_a_1', 'mincomea_a_2', 'mincomea_a_3', 'mincomea_a_4', 'mincomea_a_5', 'mincomea_a_6', 'mincomea_a_7', 'mincomea_b_1', 'minzu', 'mother_dead', 'outunit18', 'party', 'pd501a', 'pd501b', 'pd502a', 'pd503a', 'pd503r', 'pd504a', 'pd577r', 'pd5ckp', 'pd5total', 'pd5total_m', 'pd6', 'pd7r', 'pension', 'pg02', 'pg1201_max', 'pg1201_min', 'pid', 'pid_a_f', 'pid_a_m', 'plane', 'provcd18', 'proxy_iwmode', 'proxyrpt', 'proxytype', 'psu', 'pt1', 'pt2', 'pt201', 'pt3', 'pt301', 'pt4', 'pt401', 'pt5', 'pt501', 'pt7', 'pt701', 'pt8', 'qa001b', 'qa001m', 'qa001y', 'qa002', 'qa101', 'qa301', 'qa302', 'qa3020', 'qa302a_code', 'qa302c_code', 'qa303m', 'qa303y', 'qa401', 'qa401a_code', 'qa401c_code', 'qa601', 'qa602', 'qa602a_code', 'qa602c_code', 'qa603', 'qa701code', 'qa9', 'qa901', 'qa902', 'qbb001', 'qbb002', 'qbb003', 'qbb004', 'qbb005', 'qc1', 'qc2', 'qc201', 'qc3', 'qc4', 'qc401', 'qc5', 'qc701', 'qc7a', 'qc7b', 'qea0', 'qea1', 'qea2', 'qea201m', 'qea201y', 'qea202', 'qea203code', 'qea204', 'qea205m', 'qea205y', 'qea206', 'qea207', 'qea2071', 'qea208m', 'qea208y', 'qea209m', 'qea209y', 'qea210code', 'qea2111', 'qea211m', 'qea211y', 'qeb405_1_a_1', 'qeb405_1_a_2', 'qext002', 'qext004', 'qext006', 'qext008', 'qext012', 'qext013', 'qf1_a_1', 'qf1_a_2', 'qf1_a_3', 'qf1_a_4', 'qf1_a_5', 'qf1_a_6', 'qf1_a_7', 'qf1_a_8', 'qf1_a_9', 'qf201_a_1', 'qf201_a_2', 'qf201_a_3', 'qf201_a_4', 'qf201_a_5', 'qf201_a_6', 'qf201_a_7', 'qf201_a_8', 'qf201_a_9', 'qf202_a_1', 'qf202_a_2', 'qf202_a_3', 'qf202_a_4', 'qf202_a_5', 'qf202_a_6', 'qf202_a_7', 'qf202_a_8', 'qf202_a_9', 'qf203_a_1', 'qf203_a_2', 'qf203_a_3', 'qf203_a_4', 'qf203_a_5', 'qf203_a_6', 'qf203_a_7', 'qf203_a_8', 'qf203_a_9', 'qf204_a_1', 'qf204_a_2', 'qf204_a_3', 'qf204_a_4', 'qf204_a_5', 'qf204_a_6', 'qf204_a_7', 'qf204_a_8', 'qf301_a_1', 'qf301_a_2', 'qf301_a_3', 'qf301_a_4', 'qf301_a_5', 'qf301_a_6', 'qf301_a_7', 'qf301_a_8', 'qf301_a_9', 'qf302_a_1', 'qf302_a_2', 'qf302_a_3', 'qf302_a_4', 'qf302_a_5', 'qf302_a_6', 'qf302_a_7', 'qf302_a_8', 'qf303_a_1', 'qf303_a_2', 'qf303_a_3', 'qf303_a_4', 'qf303_a_5', 'qf303_a_6', 'qf303_a_7', 'qf303_a_8', 'qf303_a_9', 'qf304_a_1', 'qf304_a_2', 'qf304_a_3', 'qf304_a_4', 'qf304_a_5', 'qf304_a_6', 'qf304_a_7', 'qf304_a_8', 'qf304_a_9', 'qf305_a_1', 'qf305_a_2', 'qf305_a_3', 'qf305_a_4', 'qf305_a_5', 'qf305_a_6', 'qf305_a_7', 'qf305_a_8', 'qf305_a_9', 'qf306_a_1', 'qf306_a_2', 'qf306_a_3', 'qf306_a_4', 'qf306_a_5', 'qf306_a_6', 'qf306_a_7', 'qf306_a_8', 'qf306_a_9', 'qf5_a_1', 'qf5_a_2', 'qf601_a_1', 'qf601_a_2', 'qf602_a_1', 'qf602_a_2', 'qf603_a_1', 'qf603_a_2', 'qf604_a_1', 'qf604_a_2', 'qf701_a_1', 'qf701_a_2', 'qf702_a_1', 'qf702_a_2', 'qf703_a_1', 'qf703_a_2', 'qf704_a_1', 'qf704_a_2', 'qf705_a_1', 'qf705_a_2', 'qf706_a_1', 'qf706_a_2', 'qg1', 'qg10', 'qg1001', 'qg101', 'qg11', 'qg1101', 'qg1102', 'qg12', 'qg1201_max', 'qg1201_min', 'qg1202', 'qg1203', 'qg1204_max', 'qg1204_min', 'qg1303', 'qg1304', 'qg1305', 'qg14', 'qg1401code', 'qg15', 'qg1501', 'qg16', 'qg17', 'qg1701', 'qg18', 'qg19', 'qg2', 'qg20', 'qg201', 'qg2032', 'qg22', 'qg301', 'qg3011', 'qg301a_code', 'qg301c_code', 'qg302code', 'qg303code', 'qg303code_egp', 'qg303code_isco', 'qg303code_isei', 'qg303code_siops', 'qg401', 'qg402', 'qg403', 'qg404', 'qg405', 'qg406', 'qg5', 'qg501', 'qg502', 'qg6', 'qg701_a_1', 'qg701_a_2', 'qg701_a_3', 'qg701_a_5', 'qg702', 'qg7_a_1', 'qg7_a_2', 'qg7_a_3', 'qg7_a_4', 'qg7_a_77', 'qg7_a_78', 'qg7_s_1', 'qg7_s_2', 'qg7_s_3', 'qg7_s_4', 'qg7_s_5', 'qg801_a_1', 'qg801_a_2', 'qg801_a_3', 'qg801_a_4', 'qg801_a_5', 'qg8_a_1', 'qg8_a_2', 'qg8_a_3', 'qg8_a_4', 'qg8_a_77', 'qg8_a_78', 'qg8_s_1', 'qg8_s_2', 'qg8_s_3', 'qg8_s_4', 'qg8_s_5', 'qg901', 'qg9_a_1', 'qg9_a_2', 'qg9_a_3', 'qg9_a_4', 'qg9_a_5', 'qg9_a_78', 'qg9_s_1', 'qg9_s_2', 'qg9_s_3', 'qg9_s_4', 'qg9_s_5', 'qga1', 'qga101', 'qga2', 'qga301', 'qga302', 'qga401code', 'qga401code_isco', 'qga401code_isei', 'qga401code_siops', 'qga4code', 'qgb1', 'qgb2', 'qgb201', 'qgb3', 'qgb4', 'qgb5', 'qgb501', 'qgb6', 'qgd0', 'qgd01', 'qi101', 'qi1011', 'qi102m', 'qi102y', 'qi200', 'qi2001', 'qi202', 'qi203_max', 'qi203_mean', 'qi203_min', 'qi301_a_1', 'qi301_a_2', 'qi301_a_3', 'qi301_a_4', 'qi301_a_5', 'qi301_a_6', 'qi301_a_7', 'qi301_a_77', 'qi301_a_78', 'qi301_s_1', 'qi301_s_2', 'qi301_s_3', 'qi301_s_4', 'qi301_s_5', 'qi301_s_6', 'qi301_s_7', 'qint001', 'qint003', 'qint005', 'qint007', 'qint009', 'qint010', 'qint011', 'qint014', 'qk5', 'qk6', 'qk701m', 'qk702', 'qk703', 'qk7m', 'qk811', 'qk812', 'qk815', 'qk816', 'qk821', 'qk822', 'qk825', 'qk826', 'qka201', 'qka202', 'qka203', 'qka204', 'qkz204', 'qm101m', 'qm102m', 'qm103m', 'qm104m', 'qm105m', 'qm106m', 'qm107m', 'qm108m', 'qm109m', 'qm110m', 'qm201', 'qm2011', 'qm2016', 'qm202', 'qm203', 'qm204', 'qm205', 'qm206', 'qm207', 'qm208', 'qm209', 'qm210', 'qm211', 'qm212', 'qm213', 'qm214', 'qm215', 'qm4011', 'qm40110', 'qm40111', 'qm4012', 'qm4013', 'qm4014', 'qm4015', 'qm4016', 'qm4017', 'qm4018', 'qm4019', 'qm501', 'qm501a', 'qm502', 'qm503', 'qm504', 'qm505', 'qm506', 'qm507', 'qm508', 'qm509', 'qm510', 'qm6', 'qm6010', 'qm6011', 'qm6012', 'qm6013', 'qm6014', 'qm6015', 'qm6016', 'qm6017', 'qm801', 'qm802', 'qm803', 'qm9', 'qn1001', 'qn10021', 'qn10022', 'qn10023', 'qn10024', 'qn10025', 'qn10026', 'qn101', 'qn102', 'qn103', 'qn104', 'qn105', 'qn1101', 'qn12012', 'qn12016', 'qn2', 'qn201_b_1', 'qn201_b_2', 'qn202', 'qn203', 'qn3', 'qn4001', 'qn4002', 'qn4003', 'qn4004', 'qn4005', 'qn402', 'qn406', 'qn407', 'qn411', 'qn412', 'qn414', 'qn416', 'qn418', 'qn420', 'qn6011', 'qn6012', 'qn6013', 'qn6014', 'qn6015', 'qn6016', 'qn6017', 'qn6018', 'qn8011', 'qn8012', 'qp101', 'qp102', 'qp103', 'qp201', 'qp202', 'qp301', 'qp302code', 'qp303', 'qp304', 'qp401', 'qp402acode', 'qp402bcode', 'qp501', 'qp502', 'qp601', 'qp602', 'qp603', 'qp605_a_1', 'qp605_a_2', 'qp605_a_3', 'qp605_a_4', 'qp605_a_5', 'qp605_a_78', 'qp605_s_1', 'qp605_s_2', 'qp605_s_3', 'qp605_s_4', 'qp605_s_5', 'qp701', 'qp702', 'qph1', 'qph2', 'qph3', 'qq1001', 'qq1002', 'qq1011', 'qq1012', 'qq1013', 'qq1014', 'qq1015', 'qq1016', 'qq1017', 'qq1101', 'qq1102', 'qq201', 'qq2011', 'qq202', 'qq204', 'qq205', 'qq301', 'qq401', 'qq4010', 'qq4011', 'qq4012', 'qq402', 'qq403a', 'qq403b', 'qq501', 'qq701a', 'qq9010', 'qq9011', 'qq9012', 'qr0', 'qr1', 'qr2', 'qr3', 'qr4', 'qr5', 'qr6', 'qr7', 'qs10', 'qs1001', 'qs1002', 'qs1003', 'qs1011', 'qs1012', 'qs102b', 'qs11', 'qs1102', 'qs1103', 'qs1_b_1code', 'qs1_b_2', 'qs2', 'qs201', 'qs202', 'qs3', 'qs3m', 'qs401code', 'qs4_b_1', 'qs4_b_2', 'qs5', 'qs501_b_1code', 'qs501_b_2', 'qs502', 'qs503', 'qs504', 'qs601', 'qs601n', 'qs602', 'qs602n', 'qs603', 'qs603mn', 'qs604', 'qs604n', 'qs605', 'qs605n', 'qs606', 'qs606mn', 'qs7', 'qs701_b_1code', 'qs701_b_2', 'qs702', 'qs703', 'qs704', 'qs705', 'qs8', 'qs801_b_1', 'qs801_b_2code', 'qs9code', 'qu102', 'qu102a', 'qu1m', 'qu201', 'qu202', 'qu230', 'qu231m', 'qu240', 'qu250m', 'qu301', 'qu302', 'qu303', 'qu304', 'qu305', 'qu601', 'qu602', 'qu603', 'qu701', 'qu702', 'qu703', 'qu704', 'qu705', 'qu7051', 'qu801', 'qu802', 'qu803', 'qu804', 'qu805', 'qu806', 'qx3', 'qx301', 'qx301un', 'qx4', 'qx401', 'qx401un', 'qx5', 'qx501', 'qx501un', 'qx6', 'qx601', 'qx601un', 'qx7', 'qz101_s_1', 'qz101_s_2', 'qz102_s_1', 'qz102_s_2', 'qz102_s_3', 'qz103', 'r1_last', 'releaseversion', 'respc1pid', 'retire', 'risko', 'rswt_natcs18n', 'rswt_natpn1018n', 'school', 'school_last', 'self_iwmode', 'selfrpt', 'smokeage', 'subpopulation', 'subsample', 'tb6_a18_p', 'train', 'urban18', 'w01', 'whichmathlist', 'whichwordlist', 'wk9', 'wl6', 'wl7', 'wl801', 'wl802', 'wl803', 'wm701', 'wm702', 'wm703', 'wm704', 'wm705', 'wm706', 'wm707', 'wm708', 'wm709', 'wm710', 'wm711', 'wm712', 'wordlist', 'wordtest18', 'wordtest18_sc2', 'wv101', 'wv102', 'wv103', 'wv104', 'wv105', 'wv106', 'wv107', 'wv108', 'xchildpid_a_1', 'xchildpid_a_10', 'xchildpid_a_2', 'xchildpid_a_3', 'xchildpid_a_4', 'xchildpid_a_5', 'xchildpid_a_6', 'xchildpid_a_7', 'xchildpid_a_8', 'xchildpid_a_9', 'yincomea_a_1', 'yincomea_a_2', 'yincomea_a_3', 'yincomea_a_4', 'yincomea_a_5', 'yincomea_a_6', 'yincomea_a_7', 'yincomea_b_1']
幸福感#
# 你有多幸福?
# QM2016"有多幸福(分)"若 0 分代表最低,10 分代表最高,“【CAPI】加载您/你”觉 得自己有多幸福? 0..10
df['qm2016'].value_counts()
10.0 8571
8.0 8172
5.0 5031
7.0 3531
9.0 2870
6.0 2556
3.0 659
4.0 656
2.0 300
0.0 272
-8.0 267
1.0 192
-1.0 15
-2.0 5
Name: qm2016, dtype: int64
plt.hist(df['qm2016'], bins = 16);
缺失值#
CFPS小课堂 | 请不要问我,我“不知道”,我“拒绝回答”……
问卷系统跳转造成的缺失,一般用“-8”(不适用)来表示
不同问卷类型合并造成的缺失,一般在数据库中是用“.”来表示的。
受访者原因造成的缺失,这种类型的缺失值在数据库中一般用“-2”(拒绝回答)和“-1”(不知道)来表示。
http://www.isss.pku.edu.cn/cfps/cjwt/cfpsxkt/1295293.htm
QA001B”年龄”
人口统计变量#
df['age'].value_counts()
50.0 847
55.0 810
48.0 778
31.0 774
29.0 766
...
98.0 4
99.0 3
-8.0 2
102.0 1
100.0 1
Name: age, Length: 94, dtype: int64
df['retired'] = [1 if i ==1.0 else 0 for i in df['retire']]
plt.hist(df['age']);
plt.hist(df['edu_updated']);
plt.hist(df['cfps2018eduy']);
plt.hist(np.log(df['income']+1));
# 性别 1 男 5女
df['qa002'].value_counts()
5.0 16550
1.0 16547
Name: qa002, dtype: int64
df['male'] = [1 if i == 1.0 else 0 for i in df['qa002']]
# 性别 1 男 5女
df['edu_updated'].value_counts()
-8.0 14462
4.0 5273
3.0 3598
5.0 3058
0.0 2848
6.0 1648
7.0 1335
10.0 756
8.0 112
9.0 7
Name: edu_updated, dtype: int64
df[df['edu_updated']==-8.0]['age'].min()
45.0
sns.boxplot(x="male", y="qm2016", data=df);
df['Income'] = np.log(df['income']+10)
sns.boxplot(x="male", y="Income", data=df);
sns.lineplot(y = 'qm2016', x = 'age', hue = 'male', data = df);
sns.lineplot(y = 'qm2016', x = 'cfps2018eduy', hue = 'male', data = df);
sns.lineplot(y = 'qm2016', x = 'edu_updated', hue = 'male', data = df);
df['qu7051'].value_counts()
-8.0 21502
1000.0 1261
2000.0 1176
3000.0 895
5000.0 801
...
1740.0 1
26400.0 1
41000.0 1
92000.0 1
680.0 1
Name: qu7051, Length: 265, dtype: int64
df['qu705'].value_counts()
-8.0 15603
7.0 5899
4.0 2980
3.0 2616
1.0 1681
5.0 1531
2.0 1416
6.0 1367
-1.0 3
-2.0 1
Name: qu705, dtype: int64
df['qu702'].value_counts()
-8.0 21274
7.0 5556
1.0 4160
2.0 723
3.0 688
4.0 338
5.0 199
6.0 156
-1.0 2
-2.0 1
Name: qu702, dtype: int64
# 锻炼身体的频率(次)"
df['qp701'].value_counts()
0.0 16198
7.0 8171
3.0 2333
2.0 2311
5.0 1094
1.0 975
4.0 915
-8.0 386
6.0 281
14.0 195
10.0 84
21.0 29
-1.0 25
8.0 21
20.0 15
15.0 14
28.0 9
12.0 8
13.0 6
35.0 5
11.0 4
9.0 4
25.0 3
50.0 3
-2.0 2
22.0 2
33.0 1
30.0 1
19.0 1
26.0 1
Name: qp701, dtype: int64
df['qp702'].value_counts()
-8.0 16611
7.0 2984
2.0 1696
3.0 1438
14.0 1397
...
33.0 1
66.5 1
4.3 1
3.6 1
23.0 1
Name: qp702, Length: 117, dtype: int64
认知水平#
# 'wordtest18', 'wordtest18_sc2'
df['wordtest18_sc2'].value_counts()
-8.0 7129
0.0 3161
22.0 1814
21.0 1526
28.0 1400
31.0 1283
26.0 1195
30.0 1192
24.0 1031
25.0 997
27.0 958
19.0 907
29.0 885
23.0 846
32.0 811
12.0 634
17.0 619
18.0 580
33.0 556
20.0 497
13.0 468
16.0 447
4.0 415
11.0 410
10.0 370
6.0 348
7.0 330
34.0 329
1.0 328
5.0 316
9.0 296
15.0 264
3.0 246
14.0 206
8.0 191
2.0 112
Name: wordtest18_sc2, dtype: int64
# 'wordtest18', 'wordtest18_sc2'
df['mathtest18_sc2'].value_counts()
-8.0 7126
9.0 3011
8.0 2957
0.0 2652
7.0 2246
10.0 2222
4.0 1502
15.0 1480
5.0 1432
6.0 1063
11.0 937
1.0 913
3.0 881
14.0 875
17.0 630
18.0 539
16.0 450
12.0 424
23.0 362
24.0 348
2.0 318
19.0 302
13.0 245
20.0 82
22.0 71
21.0 29
Name: mathtest18_sc2, dtype: int64
CES-D量表 个人的抑郁水平#
采用Center for Epidemiologic Studies Depression Scale (CES-D)量表来测试个人的抑郁水平。
‘cesd20sc’, ‘cesd8’
我们建议用户如果只需要使用 CFPS2016 或者和 CFPS2018 年的数据时,采用 8 道题版本的总分,
但是如果用户需要同时使用 CFPS2012的数据进行分析时,采用 CESD20sc。
df['cesd8'].value_counts()
10.0 3535
12.0 3439
11.0 3359
13.0 3046
14.0 2993
15.0 2537
8.0 2517
9.0 2406
16.0 2136
17.0 1851
18.0 1446
19.0 859
20.0 649
21.0 488
22.0 382
23.0 278
24.0 197
26.0 132
25.0 128
27.0 75
28.0 45
29.0 32
31.0 25
30.0 24
32.0 19
Name: cesd8, dtype: int64
plt.hist(df['cesd8']);
plt.hist(np.log(df['cesd8']));
大五人格#
大五人格(ocean),也被称之为人格的海洋,可以通过neo-pi-r评定。
·外倾性(extraversion):好交际对不好交际,爱娱乐对严肃,感情丰富对含蓄;表现出热情、社交、果断、活跃、冒险、乐观等特点。
·神经质或情绪稳定性(neuroticism):烦恼对平静,不安全感对安全感,自怜对自我满意,包括焦虑、敌对、压抑、自我意识、冲动、脆弱等特质。
·开放性(openness): 富于想象对务实,寻求变化对遵守惯例,自主对顺从。具有想象、审美、情感丰富、求异、创造、智慧等特征。
·随和性(agreeableness):热心对无情,信赖对怀疑,乐于助人对不合作。包括信任、利他、直率、谦虚、移情等品质。
·尽责性(conscientiousness):有序对无序,谨慎细心对粗心大意,自律对意志薄弱。包括胜任、公正、条理、尽职、成就、自律、谨慎、克制等特点。
['qm'+ str(i) for i in range(201, 216)]
['qm201',
'qm202',
'qm203',
'qm204',
'qm205',
'qm206',
'qm207',
'qm208',
'qm209',
'qm210',
'qm211',
'qm212',
'qm213',
'qm214',
'qm215']
bigfive_o = ['qm'+ str(i) for i in [204, 209, 214]]
bigfive_c = ['qm'+ str(i) for i in [201, 207, 211]]
bigfive_e = ['qm'+ str(i) for i in [202, 208, 212]]
bigfive_a = ['qm'+ str(i) for i in [203, 206, 213]]
bigfive_n = ['qm'+ str(i) for i in [205, 210, 215]]
df['bigfive_o'] = df[bigfive_o].sum(axis=1)/3
df['bigfive_c'] = df[bigfive_c].sum(axis=1)/3
df['bigfive_e'] = df[bigfive_e].sum(axis=1)/3
df['bigfive_a'] = df[bigfive_a].sum(axis=1)/3
df['bigfive_n'] = df[bigfive_n].sum(axis=1)/3
df['bigfive_o'].value_counts()
3.333333 5237
4.000000 4983
0.000000 4276
2.666667 4270
3.000000 3353
2.000000 2879
-8.000000 2843
3.666667 2461
2.333333 2049
4.333333 1473
1.666667 912
4.666667 797
5.000000 715
1.333333 544
1.000000 427
0.666667 40
-1.000000 39
0.333333 27
-0.333333 13
-4.666667 4
-0.666667 2
-3.666667 2
-2.000000 2
-4.333333 2
-1.333333 2
-1.666667 1
-4.000000 1
Name: bigfive_o, dtype: int64
plt.hist(df['bigfive_o']);
df['qm201'].value_counts()
4.0 18000
5.0 8202
-8.0 2838
3.0 2179
2.0 1683
1.0 183
-1.0 11
-2.0 1
Name: qm201, dtype: int64
描述性分析#
sns.lineplot(y = 'qp701', x = 'bigfive_o', hue = 'male', data = df);
sns.lineplot(y = 'qp701', x = 'cesd8', hue = 'male', data = df);
sns.lineplot(y = 'qp701', x = 'cesd8', hue = 'male', data = df);
sns.lineplot(y = 'qp701', x = 'wordtest18_sc2', hue = 'male', data = df);
sns.lineplot(y = 'qp701', x = 'mathtest18_sc2', hue = 'male', data = df);
sns.lineplot(y = 'qp701', x = 'age', hue = 'male', data = df);
sns.lineplot(y = 'qp702', x = 'age', hue = 'male', data = df);
# WV101"经济繁荣要拉大收入差距"为了经济繁荣就要拉大收入差距。
# 1. 十分不同意 2.不同意 3.同意 4.十分同意 5.既不同意也不反对【不读出】
df['wv101'].value_counts()
3.0 14345
2.0 13994
1.0 1601
5.0 1298
4.0 1261
-8.0 312
-1.0 284
-2.0 2
Name: wv101, dtype: int64
价值观#
WV101”经济繁荣要拉大收入差距”为了经济繁荣就要拉大收入差距。
WV102”公平竞争才有和谐人际”
V103财富反映个人成就
V104 “努力工作能有回报
V105聪明才干能得回报
V106 成大事难免腐败
V107有关系比有能力重要
V108 “提高生活水平机会很大
df['wv101'] = df['wv101'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
df['wv102'] = df['wv102'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
df['wv103'] = df['wv103'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
df['wv104'] = df['wv104'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
df['wv105'] = df['wv105'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
df['wv106'] = df['wv106'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
df['wv107'] = df['wv107'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
df['wv108'] = df['wv108'].replace([3.0, 4.0, 5.0], [4.0, 5.0, 3.0])
# WV101"经济繁荣要拉大收入差距"为了经济繁荣就要拉大收入差距。
df['wv101'].value_counts()
4.0 14345
2.0 13994
1.0 1601
3.0 1298
5.0 1261
-8.0 312
-1.0 284
-2.0 2
Name: wv101, dtype: int64
df['wv102'].value_counts()
4.0 23152
5.0 4605
2.0 3616
3.0 842
1.0 366
-8.0 295
-1.0 219
-2.0 2
Name: wv102, dtype: int64
df['wv108'].value_counts()
4.0 23264
2.0 4759
5.0 3296
3.0 1084
-8.0 310
1.0 306
-1.0 77
-2.0 1
Name: wv108, dtype: int64
sns.pointplot(y = 'wv101', x = 'wv106', hue = 'male', data = df[df['wv102'] >= 1]);
df['ear101a'].unique()
array([-8., nan, 11., 12., 13., 37., 61., 14., 51., 43., 21., 46., 65.,
44., 15., 22., 23., 31., 45., 34., 32., 33., 52., 35., 36., 42.,
41., 50., 53., 62., 54.])
df['qa401c_code'].unique()
array([ nan, 2.311e+03, 6.090e+02, 8.300e+02, 4.700e+01,
9.989e+03, 5.600e+01, 5.100e+01, 1.437e+03, 2.960e+02,
4.900e+01, 5.300e+01, 4.300e+02, 2.480e+02, 5.200e+01,
5.000e+01, 4.600e+01, 4.800e+01, 8.297e+03, 6.900e+01,
4.510e+02, 8.510e+02, 7.400e+01, 2.179e+03, 2.859e+03,
2.853e+03, 7.200e+01, 1.780e+02, 1.530e+02, 1.600e+02,
6.520e+02, 6.870e+02, -9.000e+00, 1.550e+02, 6.990e+02,
7.900e+02, 1.977e+03, 1.480e+02, 6.100e+02, 6.980e+02,
1.560e+02, 2.827e+03, 7.020e+02, 1.480e+03, 1.580e+02,
2.388e+03, 6.490e+02, 1.652e+03, 1.276e+03, 1.570e+02,
1.425e+03, 1.520e+02, 8.330e+02, 6.890e+02, 6.340e+02,
3.780e+02, 7.370e+02, 1.510e+02, 6.270e+02, 2.999e+03,
6.000e+01, 6.100e+01, 5.900e+01, 7.250e+02, 5.500e+01,
5.400e+01, 1.520e+03, 7.580e+02, 1.534e+03, 7.861e+03,
8.980e+02, 8.400e+02, 8.160e+02, 5.610e+02, 1.990e+02,
8.310e+02, 8.910e+02, 3.640e+02, 3.101e+03, 8.404e+03,
5.700e+01, 6.460e+02, 7.150e+02, 3.119e+03, 7.560e+02,
6.600e+01, 1.522e+03, 6.700e+01, 8.100e+01, 1.038e+03,
9.240e+02, 1.189e+03, 1.130e+03, 8.500e+01, 1.213e+03,
8.300e+01, 1.035e+03, 1.053e+03, 9.870e+02, 2.771e+03,
1.216e+03, 9.810e+02, 1.011e+03, 9.030e+02, 9.220e+02,
1.013e+03, 8.316e+03, 1.033e+03, 1.207e+03, 8.234e+03,
9.820e+02, 1.024e+03, 1.147e+03, 2.908e+03, 9.980e+02,
1.200e+01, 9.690e+02, 1.100e+01, 1.583e+03, 1.008e+03,
1.744e+03, 9.000e+01, 9.620e+02, 3.600e+01, 3.500e+01,
2.300e+03, 4.700e+02, 1.000e+00, 1.119e+03, 2.469e+03,
3.000e+00, 2.000e+00, 7.550e+02, 1.238e+03, 2.700e+01,
2.800e+01, 1.282e+03, 1.400e+01, 1.986e+03, 1.397e+03,
1.500e+01, 1.300e+01, 1.770e+02, 7.940e+02, 8.020e+02,
7.498e+03, 1.460e+03, 1.507e+03, 7.230e+02, 6.400e+01,
2.260e+02, 8.467e+03, 1.729e+03, 2.770e+03, 1.651e+03,
1.443e+03, 1.807e+03, 1.000e+02, 1.010e+02, 1.030e+02,
2.480e+03, 1.040e+02, 1.659e+03, 1.050e+02, 1.660e+03,
1.090e+02, 2.430e+03, 1.697e+03, 1.120e+02, 4.400e+02,
2.229e+03, 1.140e+02, 2.587e+03, 1.130e+02, 8.080e+02,
1.712e+03, 1.732e+03, 1.070e+02, 1.680e+03, 1.110e+02,
1.020e+02, 9.992e+03, 1.681e+03, 1.080e+02, 1.505e+03,
1.650e+02, 1.848e+03, 1.751e+03, 1.743e+03, 9.000e+00,
1.873e+03, 1.781e+03, 1.477e+03, 1.780e+03, 2.996e+03,
9.991e+03, 1.930e+03, 1.929e+03, 1.725e+03, 3.000e+01,
2.228e+03, 3.100e+01, 3.200e+01, 3.300e+01, 1.270e+02,
1.896e+03, 2.869e+03, 4.570e+02, 9.200e+01, 1.994e+03,
2.044e+03, 2.292e+03, 1.210e+02, 1.240e+02, 1.170e+02,
2.078e+03, 1.985e+03, 1.300e+02, 2.105e+03, 1.190e+02,
1.160e+02, 1.828e+03, 2.015e+03, 2.016e+03, 2.045e+03,
2.035e+03, 1.230e+02, 2.169e+03, 1.150e+02, 2.086e+03,
2.027e+03, 1.250e+02, 2.054e+03, 2.377e+03, 2.111e+03,
1.260e+02, 2.161e+03, 2.031e+03, 2.172e+03, 1.951e+03,
1.949e+03, 1.290e+02, 2.559e+03, 4.061e+03, 1.739e+03,
4.000e+00, 2.149e+03, 6.000e+00, 2.220e+03, 2.259e+03,
2.253e+03, 2.262e+03, 2.100e+01, 2.000e+01, 2.257e+03,
2.261e+03, 9.420e+02, 1.710e+02, 1.242e+03, 2.305e+03,
3.900e+01, 4.000e+01, 2.608e+03, 4.200e+01, 2.363e+03,
4.100e+01, 1.195e+03, 2.358e+03, 2.200e+01, 2.300e+01,
2.400e+01, 1.632e+03, 2.500e+01, 2.600e+01, 2.507e+03,
2.552e+03, 1.900e+01, 2.266e+03, 2.625e+03, 1.700e+01,
2.650e+03, 2.663e+03, 2.637e+03, 7.700e+01, 1.675e+03,
2.265e+03, 1.374e+03, 3.037e+03, 7.800e+01, 2.937e+03,
2.905e+03, 1.380e+02, 1.440e+02, 1.390e+02, 1.460e+02,
2.949e+03, 1.350e+02, 2.950e+03, 2.925e+03, 1.420e+02,
2.906e+03, 8.070e+02, 1.410e+02, 1.430e+02, 1.320e+02,
2.897e+03, 1.340e+02, 2.896e+03, 2.899e+03, 1.310e+02,
1.360e+02, 2.916e+03, 2.917e+03, 2.915e+03, 2.124e+03,
2.209e+03, 3.057e+03, 1.370e+02, 2.892e+03, 1.330e+02,
2.924e+03, 2.819e+03, 1.615e+03, 1.400e+02, 2.952e+03])
df['ear101c_code'].unique()
array([ nan, 1.700e+02, 7.900e+01, 1.770e+02, 5.100e+01, 5.200e+01,
1.930e+02, 1.990e+02, 4.800e+01, 5.000e+01, 1.564e+03, 4.600e+01,
2.010e+02, 2.610e+02, 1.445e+03, 2.772e+03, 4.140e+02, 8.297e+03,
8.415e+03, 6.900e+01, 3.840e+02, 7.100e+01, 7.400e+01, 7.200e+01,
1.840e+02, 3.740e+02, 7.500e+01, 7.300e+01, 2.288e+03, 1.854e+03,
1.530e+02, 1.600e+02, 6.080e+02, 6.090e+02, 6.550e+02, 1.470e+02,
1.485e+03, 6.560e+02, 1.550e+02, 6.520e+02, 1.480e+02, 6.100e+02,
1.610e+02, 6.610e+02, 1.560e+02, 1.920e+02, 3.066e+03, 1.690e+02,
1.270e+02, 1.500e+02, 6.750e+02, 6.800e+02, 6.260e+02, 1.520e+02,
6.350e+02, 6.730e+02, 1.620e+02, 6.840e+02, 1.570e+02, 6.690e+02,
5.390e+02, 1.540e+02, 6.190e+02, 1.590e+02, 6.720e+02, 1.510e+02,
2.558e+03, 7.370e+02, 6.100e+01, 7.710e+02, 6.000e+01, 6.715e+03,
5.900e+01, 5.500e+01, 5.550e+02, 8.300e+02, 5.800e+01, 5.400e+01,
9.010e+02, 8.150e+02, 8.170e+02, 9.200e+01, 5.700e+01, 8.040e+02,
8.780e+02, 1.475e+03, 5.600e+01, 2.109e+03, 2.005e+03, 8.300e+01,
8.900e+01, 8.400e+01, 8.500e+01, 8.700e+01, 8.800e+01, 9.000e+01,
9.977e+03, 9.100e+01, 9.470e+02, 1.199e+03, 9.300e+01, 9.400e+01,
9.500e+01, 9.700e+01, 9.978e+03, 1.200e+01, 9.530e+02, 1.100e+01,
9.380e+02, 1.000e+01, 3.600e+01, 1.035e+03, 3.500e+01, 1.061e+03,
2.500e+03, 8.450e+03, 1.000e+00, 1.114e+03, 3.000e+00, 2.000e+00,
2.007e+03, 2.700e+01, 3.075e+03, 1.255e+03, 2.293e+03, 1.079e+03,
2.800e+01, 1.234e+03, 1.400e+01, 1.986e+03, 1.345e+03, 2.006e+03,
1.023e+03, 2.022e+03, 1.500e+01, 1.302e+03, 9.984e+03, 2.020e+03,
2.291e+03, 1.300e+01, 6.700e+01, 1.432e+03, 1.457e+03, 6.600e+01,
1.525e+03, 6.200e+02, 1.652e+03, 1.745e+03, 7.498e+03, 6.500e+01,
1.476e+03, 8.467e+03, 6.300e+01, 6.400e+01, 9.900e+01, 1.583e+03,
1.584e+03, 1.589e+03, 1.000e+02, 1.651e+03, 1.010e+02, 1.650e+02,
2.770e+03, 1.637e+03, 1.030e+02, 9.930e+02, 9.840e+02, 1.040e+02,
1.050e+02, 1.018e+03, 1.090e+02, 1.100e+02, 2.230e+03, 1.120e+02,
1.140e+02, 1.673e+03, 1.731e+03, 1.130e+02, 1.742e+03, 6.382e+03,
1.236e+03, 1.070e+02, 1.053e+03, 1.110e+02, 1.020e+02, 1.150e+02,
1.080e+02, 1.060e+02, 1.102e+03, 7.000e+00, 1.811e+03, 9.000e+00,
8.000e+00, 1.855e+03, 2.900e+01, 3.000e+01, 3.100e+01, 1.509e+03,
3.200e+01, 1.902e+03, 3.300e+01, 1.160e+02, 1.220e+02, 4.061e+03,
1.240e+02, 2.004e+03, 1.170e+02, 2.058e+03, 1.250e+02, 1.300e+02,
2.093e+03, 1.190e+02, 2.013e+03, 1.987e+03, 1.180e+02, 2.156e+03,
1.260e+02, 2.084e+03, 1.280e+02, 1.988e+03, 1.985e+03, 1.210e+02,
2.010e+03, 1.290e+02, 1.230e+02, 4.000e+00, 2.149e+03, 6.000e+00,
2.056e+03, 2.160e+03, 2.100e+01, 1.311e+03, 2.257e+03, 2.000e+01,
2.683e+03, 3.800e+01, 3.900e+01, 2.290e+03, 4.000e+01, 2.295e+03,
4.300e+01, 1.179e+03, 4.400e+01, 4.200e+01, 1.055e+03, 4.100e+01,
8.000e+01, 2.200e+01, 1.903e+03, 2.300e+01, 2.400e+01, 2.500e+01,
2.600e+01, 2.552e+03, 1.282e+03, 1.900e+01, 2.569e+03, 1.600e+01,
2.588e+03, 1.700e+01, 1.800e+01, 7.800e+01, 7.700e+01, 7.600e+01,
2.777e+03, 1.380e+02, 3.110e+03, 1.360e+02, 1.320e+02, 2.886e+03,
6.543e+03, 1.440e+02, 1.390e+02, 2.915e+03, 2.939e+03, 3.127e+03,
1.460e+02, 8.266e+03, 1.450e+02, 2.789e+03, 1.420e+02, 1.330e+02,
2.884e+03, 2.601e+03, 1.410e+02, 1.430e+02, 3.125e+03, 2.913e+03,
1.340e+02, 1.350e+02, 1.310e+02, 2.919e+03, 5.660e+02, 1.324e+03,
3.057e+03, 1.370e+02, 2.890e+03, 1.400e+02, 2.018e+03, 7.040e+02,
2.930e+03])
df['ear101a'].value_counts()
-8.0 31876
44.0 165
62.0 144
31.0 111
51.0 93
41.0 90
21.0 65
32.0 57
14.0 56
13.0 47
37.0 46
23.0 42
36.0 40
35.0 33
61.0 27
53.0 27
43.0 25
33.0 24
52.0 23
45.0 20
22.0 17
42.0 17
65.0 11
34.0 11
12.0 10
50.0 8
11.0 7
46.0 2
15.0 2
54.0 1
Name: ear101a, dtype: int64
pd.DataFrame(df['qa401c_code'].value_counts())
qa401c_code | |
---|---|
113.0 | 11 |
21.0 | 9 |
123.0 | 8 |
144.0 | 7 |
124.0 | 7 |
... | ... |
1053.0 | 1 |
8316.0 | 1 |
2363.0 | 1 |
1729.0 | 1 |
652.0 | 1 |
324 rows × 1 columns