diff --git a/Extras/Omnivore/数据处理过程.md b/Extras/Omnivore/数据处理过程.md index 08d05aec..5f523f66 100644 --- a/Extras/Omnivore/数据处理过程.md +++ b/Extras/Omnivore/数据处理过程.md @@ -137,9 +137,45 @@ plt.show() ![image.png|600](https://image.kfdr.top/i/2024/03/16/65f55b0a3adbf.png) -## 业务时段分布 +## 支付方式统计 -查看一天二十四小时中的业务分布情况 +```python +import pandas as pd +import matplotlib.pyplot as plt +plt.rcParams['font.sans-serif'] = ['Microsoft YaHei'] + +# Load the Excel file +data = pd.read_excel('E:/Projects/analyse/pythonProject/merged_data.xlsx') + +# Ensure '日期' is in datetime format for grouping +data['日期'] = pd.to_datetime(data['日期']) + +# Add a 'YearMonth' column for easier analysis +data['YearMonth'] = data['日期'].dt.to_period('M') + +# Assuming 'data' is your DataFrame and '支付方式' is the column for Payment Methods +data['Payment Category'] = data['支付方式'].apply(lambda x: '挂账' if '挂账' in str(x).lower() else '现付') + +# Group by 'YearMonth' and 'Payment Category', then count the occurrences +monthly_payment_category_counts = data.groupby(['YearMonth', 'Payment Category']).size().unstack(fill_value=0) + +# Calculate the percentage of 'Pending' and 'Other' categories for each month +monthly_payment_category_percentage = (monthly_payment_category_counts.div(monthly_payment_category_counts.sum(axis=1), axis=0) * 100) + +# Plotting the results - A stacked bar chart would be suitable to show percentages month-by-month +monthly_payment_category_percentage.plot(kind='bar', stacked=True, figsize=(14, 7), color=['tomato', 'lightblue']) +plt.title('支付方式占比') +plt.xlabel('月份') +plt.ylabel('比例') +plt.legend(title='支付方式') +plt.xticks(rotation=45) +plt.tight_layout() +plt.show() +``` + +![image.png|600](https://image.kfdr.top/i/2024/03/16/65f571d80fd2f.png) + +## 业务时段分布 ```python import pandas as pd @@ -211,6 +247,72 @@ plt.show() ![image.png|600](https://image.kfdr.top/i/2024/03/16/65f5625b9cac2.png) +考虑到表格中的预约类型分为马上出发和预约派车两种,这意味着实际业务的发生时间往往与系统记录的订单时间不匹配,因此将预约派车类型的订单全部剔除,只研究马上出发订单的时间分布 + +```python +# Filter out booked departures to focus on immediate departures only +immediate_departures = data[data['预约类型'] == '马上出发'] + +# Calculating the ratio of day and night shifts for immediate departures only +immediate_departures_shift_ratio = immediate_departures['班次'].value_counts() + +# Generate a pie chart to show the ratio of day and night shifts for immediate departures +plt.figure(figsize=(8, 8)) +plt.pie(immediate_departures_shift_ratio, labels=immediate_departures_shift_ratio.index, autopct='%1.1f%%', startangle=140, colors=['lightblue', 'lightgreen']) +plt.title('白班和夜班的比例(马上出发)') +plt.show() + +# Grouping immediate departures by 'YearMonth' and '班次' (Shift), then count the occurrences +immediate_departures_count_by_month_shift = immediate_departures.groupby(['YearMonth', '班次']).size().unstack(fill_value=0) + +# Plotting the distribution of immediate departures by month and shift +immediate_departures_count_by_month_shift.plot(kind='bar', stacked=True, figsize=(14, 7), color=['lightblue', 'lightgreen']) +plt.title('白班和夜班的业务量(马上出发)') +plt.xlabel('月份') +plt.ylabel('业务频数') +plt.legend(title='班次', loc='upper right') +plt.xticks(rotation=45) +plt.tight_layout() +plt.show() +``` + +![image.png|600](https://image.kfdr.top/i/2024/03/16/65f5a296cdea9.png) + +![image.png|600](https://image.kfdr.top/i/2024/03/16/65f5a29dc8dd5.png) + +更进一步,剔除掉疫情期间的所有业务,能够 + +```python +# Filter for immediate departures after December 2022 +immediate_departures_after_dec2022 = immediate_departures[immediate_departures['YearMonth'] > '2022-12'] + +# Calculate the ratio of day and night shifts for this filtered data +shift_ratio_after_dec2022 = immediate_departures_after_dec2022['班次'].value_counts() + +# Generate a pie chart to show the ratio of day and night shifts for immediate departures after December 2022 +plt.figure(figsize=(8, 8)) +plt.pie(shift_ratio_after_dec2022, labels=shift_ratio_after_dec2022.index, autopct='%1.1f%%', startangle=140, colors=['lightblue', 'lightgreen']) +plt.title('2023年1月起白班和夜班的比例(马上出发)') +plt.show() + +# Grouping immediate departures by 'YearMonth' and '班次' (Shift), then count the occurrences +immediate_departures_count_by_month_shift = immediate_departures_after_dec2022.groupby(['YearMonth', '班次']).size().unstack(fill_value=0) + +# Plotting the distribution of immediate departures by month and shift +immediate_departures_count_by_month_shift.plot(kind='bar', stacked=True, figsize=(14, 7), color=['lightblue', 'lightgreen']) +plt.title('2023年1月起白班和夜班的业务量(马上出发)') +plt.xlabel('月份') +plt.ylabel('业务频数') +plt.legend(title='班次', loc='upper right') +plt.xticks(rotation=45) +plt.tight_layout() +plt.show() +``` + +![image.png|600](https://image.kfdr.top/i/2024/03/16/65f5a30c1d2c8.png) + +![image.png|600](https://image.kfdr.top/i/2024/03/16/65f5a315764a6.png) + ## 业务区域分布 ```python @@ -250,40 +352,3 @@ plt.show() ![image.png|600](https://image.kfdr.top/i/2024/03/16/65f569fe39e54.png) -## 支付方式统计 - -```python -import pandas as pd -import matplotlib.pyplot as plt -plt.rcParams['font.sans-serif'] = ['Microsoft YaHei'] - -# Load the Excel file -data = pd.read_excel('E:/Projects/analyse/pythonProject/merged_data.xlsx') - -# Ensure '日期' is in datetime format for grouping -data['日期'] = pd.to_datetime(data['日期']) - -# Add a 'YearMonth' column for easier analysis -data['YearMonth'] = data['日期'].dt.to_period('M') - -# Assuming 'data' is your DataFrame and '支付方式' is the column for Payment Methods -data['Payment Category'] = data['支付方式'].apply(lambda x: '挂账' if '挂账' in str(x).lower() else '现付') - -# Group by 'YearMonth' and 'Payment Category', then count the occurrences -monthly_payment_category_counts = data.groupby(['YearMonth', 'Payment Category']).size().unstack(fill_value=0) - -# Calculate the percentage of 'Pending' and 'Other' categories for each month -monthly_payment_category_percentage = (monthly_payment_category_counts.div(monthly_payment_category_counts.sum(axis=1), axis=0) * 100) - -# Plotting the results - A stacked bar chart would be suitable to show percentages month-by-month -monthly_payment_category_percentage.plot(kind='bar', stacked=True, figsize=(14, 7), color=['tomato', 'lightblue']) -plt.title('支付方式占比') -plt.xlabel('月份') -plt.ylabel('比例') -plt.legend(title='支付方式') -plt.xticks(rotation=45) -plt.tight_layout() -plt.show() -``` - -![image.png|600](https://image.kfdr.top/i/2024/03/16/65f571d80fd2f.png)