pandas: powerful Python data analysis toolkit — pandas 0.23.0 documentation
# 型確認 df.dtypes # 欠損値のカウント df.isnull().sum() # 欠損値確認 df[df.isnull().any(axis=1)] #文字除去 df['xxx'].str.strip('yyy') # 型変換 df['xxx'].astype(np.int64) # 抽出 df1 = df.iloc[:, 3:5] # 置換 ## 条件がTrueの時はそのまま、Falseの時はNaN df['xxx'].where(df['xxx'] == 'hoge') ## 条件がTrueの時はNaN、Falseの時はそのまま df['xxx'].mask(df['xxx'] == 'hoge') # dfをそのまま適用 inplace=True # datetimeをtimeを分離 df['time'] = df.index.time.tolist() df['time'] = df['datetime'].dt.time.tolist() #ピボットテーブル pv = df.pivot_table(values = 'xxx', index = 'yyy', columns='zzz', aggfunc = sum, fill_value = 0) # 順位 df['xxx'].rank(ascending=False, method='min') # ソート df.sort_values(['xxx', 'yyy', 'zzz'], ascending=[True,True, True], inplace=True)
dfs = pd.read_html(url, header=0, index_col=0, match='xxxxx') pandas.read_html( io, match='.+', flavor=None, header=None, index_col=None, skiprows=None, attrs=None, parse_dates=False, tupleize_cols=None, thousands=', ', encoding=None, decimal='.', converters=None, na_values=None, keep_default_na=True)
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_html.html
df = pd.read_csv(url, encoding='shift_jis', skiprows=1, header=0, index_col=0) pandas.read_csv( filepath_or_buffer, sep=', ', delimiter=None, header='infer', names=None, index_col=None, usecols=None, squeeze=False, prefix=None, mangle_dupe_cols=True, dtype=None, engine=None, converters=None, true_values=None, false_values=None, skipinitialspace=False, skiprows=None, nrows=None, na_values=None, keep_default_na=True, na_filter=True, verbose=False, skip_blank_lines=True, parse_dates=False, infer_datetime_format=False, keep_date_col=False, date_parser=None, dayfirst=False, iterator=False, chunksize=None, compression='infer', thousands=None, decimal=b'.', lineterminator=None, quotechar='"', quoting=0, escapechar=None, comment=None, encoding=None, dialect=None, tupleize_cols=None, error_bad_lines=True, warn_bad_lines=True, skipfooter=0, skip_footer=0, doublequote=True, delim_whitespace=False, as_recarray=None, compact_ints=None, use_unsigned=None, low_memory=True, buffer_lines=None, memory_map=False, float_precision=None)
http://pandas.pydata.org/pandas-docs/stable/generated/pandas.read_csv.html