{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 中国家庭追踪调查2018\n" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2021-11-22T09:45:27.274079Z", "start_time": "2021-11-22T09:45:26.845904Z" } }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import pylab as plt\n", "\n", "plt.rcParams['font.sans-serif'] = ['Microsoft YaHei'] # 用来正常显示中文标签\n", "plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号, 注意['SimHei']对应这句不行." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2021-11-22T10:01:22.068527Z", "start_time": "2021-11-22T10:01:15.399141Z" } }, "outputs": [], "source": [ "df = pd.read_stata('/Users/datalab/bigdata/中国家庭追踪调查2018/cfps2018person_202012.dta', \n", " convert_categoricals=False, convert_missing=False)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2021-11-22T10:01:23.335798Z", "start_time": "2021-11-22T10:01:23.309995Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", " | pid | \n", "code | \n", "fid18 | \n", "fid16 | \n", "fid14 | \n", "fid12 | \n", "fid10 | \n", "pid_a_f | \n", "pid_a_m | \n", "selfrpt | \n", "... | \n", "cfps2018eduy_im | \n", "gdge | \n", "gdgeyear | \n", "gdgemonth | \n", "catipilot | \n", "pg02 | \n", "pg1201_min | \n", "pg1201_max | \n", "interviewerid18 | \n", "releaseversion | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "100051501.0 | \n", "501.0 | \n", "100051.0 | \n", "100051.0 | \n", "100051.0 | \n", "-8.0 | \n", "-8.0 | \n", "110043201.0 | \n", "110043105.0 | \n", "1.0 | \n", "... | \n", "12.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "761040.0 | \n", "2.1 | \n", "
1 | \n", "100051502.0 | \n", "502.0 | \n", "100051.0 | \n", "100051.0 | \n", "100051.0 | \n", "-8.0 | \n", "-8.0 | \n", "-8.0 | \n", "-8.0 | \n", "1.0 | \n", "... | \n", "12.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "761040.0 | \n", "2.1 | \n", "
2 | \n", "100160601.0 | \n", "601.0 | \n", "100160.0 | \n", "-8.0 | \n", "-8.0 | \n", "-8.0 | \n", "-8.0 | \n", "-8.0 | \n", "-8.0 | \n", "1.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "459505.0 | \n", "2.1 | \n", "
3 | \n", "100376551.0 | \n", "551.0 | \n", "100376.0 | \n", "100376.0 | \n", "-8.0 | \n", "-8.0 | \n", "-8.0 | \n", "-8.0 | \n", "-8.0 | \n", "1.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "526621.0 | \n", "2.1 | \n", "
4 | \n", "100551551.0 | \n", "551.0 | \n", "100551.0 | \n", "100551.0 | \n", "-8.0 | \n", "-8.0 | \n", "-8.0 | \n", "-8.0 | \n", "-8.0 | \n", "1.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "696822.0 | \n", "2.1 | \n", "
5 rows × 1371 columns
\n", "\n", " | qa401c_code | \n", "
---|---|
113.0 | \n", "11 | \n", "
21.0 | \n", "9 | \n", "
123.0 | \n", "8 | \n", "
144.0 | \n", "7 | \n", "
124.0 | \n", "7 | \n", "
... | \n", "... | \n", "
1053.0 | \n", "1 | \n", "
8316.0 | \n", "1 | \n", "
2363.0 | \n", "1 | \n", "
1729.0 | \n", "1 | \n", "
652.0 | \n", "1 | \n", "
324 rows × 1 columns
\n", "