importpandasaspd dfpd.read_csv(D:/workspace/pandas/PandasProject/data/LJdata.csv)df# 处理中文列名 》英文列名df.columns[district,address,title,house_type,area,price,floor,build_time,direction,update_time,view_num,extra_info,link]df# 查看数据df.head()# 查看前5行df[:5]# 查看前5行df.info()# 查看详细信息df.describe()#默认只统计数字列(因为数字可以用四分法最大小值均值等)df.describe(includeall)#可设置统计所有列df.shape# 查看形状 几行几列# 1.找到租金最高、最低的房子# sql版# select * from xx order by price desc/asc limit0,1 (是房子不是租金:不是select min(price)...)#万一有多个找多个# select max(price) from xxx# select * from xxx where price(select max/min(price) from xxx)# pandas版# 先排序df.sort_values(price,ascendingFalse).head(1)#最高的一个df.nlargest(1,price)df.sort_values(price,ascendingTrue).head(1)#最低的一个df.nsmallest(1,price)# 万一有多个找多个df[df[price]df[price].max()]df[df[price]df[price].min()]# 2.找到最近新上的10套房房源# sql版# select * from xx order by update_time desc limit 0,10;# pandas版df.sort_values([update_time],ascendingFalse).head(10)# df.nlargest(10, update_time) #字符串无法使用此方法比大小# 3.查看所有更新时间 去重# sql版# select distinct update_from xxx# pandas版df[update_time].drop_duplicates()# df[update_time].unique()# 4.查看看房人数的平均值最大值和最小值# sql版# select avg(view_num),max(view_num),min(view_num) from xxx# pandas版df[view_num].mean()df[view_num].max()df[view_num].min()df[view_num].describe()# 5.查看不同看房人数的房源数量# sql版# select view_num,count(*) as house_count from xxx group by view_num# pandas版tempdf.groupby(view_num).agg({view_num:count})temp.columns[house_count]temp df.groupby([view_num]).address.count()df.groupby([view_num])[address].count()# 6.查看房租价格的分布例如平均值、标准差、中位数...# sql版# select avg(price),std(price),median(price) from xxx# pandas版df[price].describe()df[price].mean()df[price].std()df[price].median()# 7.找到看房人数最多的朝向# 思路根据朝向分组 看房人数聚合# sql版# with temp as(select direction,sum(view_num) as sum_view from xx group by direction)# select * from temp order by sum_view desc limit 0,1;# pandas版tempdf.groupby(direction).agg({view_num:sum})type(temp)# temp[布尔Series]返回的是 DataFrame(与原temp结构相同,但只包含满足条件的行;此处是只保留布尔Series中为True对应的行)temp[temp[view_num]temp[view_num].max()]# 8.查找最受欢迎的房型# 按房型分组看房人人数的总和# sql版# select house_type,sum(view_num) where from xxx group by house_type# pandas版tempdf.groupby(house_type).agg({view_num:count})temp temp[temp[view_num]temp[view_num].max()]# 9.查找房子的平均租房价格(元/平米)# sql版# select avg(price/area) from xxx# pandas版df[price_per]df[price]/df[area]df[price_per].mean()# 或者df[price].sum()/df[area].sum()# 10.找到出租房源最多的小区# sql版# select district,count(*) as house_count from xxx group by district order by house_count desc limit 1# pandas版tempdf.groupby(district).agg({district:count})temp temp[temp[district]temp[district].max()]