vault backup: 2024-03-16 17:17:45
This commit is contained in:
@@ -39,11 +39,174 @@ print("Files have been merged and saved as 'merged_data.xlsx'")
|
||||
|
||||
删去了以下列:序号、服务单号、调度单号、联系人、联系电话、患者信息、销售、介绍人、客服、调度、来源、承包组、车牌、出车成员、医护出车和任务备注
|
||||
|
||||
经过确认,所有调度单状态不为已返回的订单均未产生收入,故将其全部筛选出来后将总成交价一列的数值改为 0 以免影响计算结果,按月营收额如下所示:
|
||||
经过确认,所有调度单状态不为已返回的订单均未产生收入,故将其全部筛选出来后将总成交价一列的数值改为 0 以免影响计算结果,统计后月营收额如下所示:
|
||||
|
||||
| 日期 | 2022-04 | 2022-05 | 2022-06 | 2022-07 | 2022-08 | 2022-09 | 2022-10 | 2022-11 | 2022-12 | 2023-01 | 2023-02 | 2023-03 | 2023-04 | 2023-05 | 2023-06 | 2023-07 | 2023-08 | 2023-09 | 2023-10 | 2023-11 | 2023-12 | 2024-01 | 2024-02 |
|
||||
| --- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- | ---------- |
|
||||
| 营收额 | 3328917.00 | 3362286.00 | 3973152.00 | 3462363.00 | 4250864.00 | 4144810.76 | 4360712.00 | 4587020.00 | 4880988.50 | 4197830.00 | 3309294.00 | 3338335.00 | 4069565.00 | 4292058.60 | 3101339.20 | 3834394.40 | 3114722.80 | 2750602.00 | 4161377.40 | 3465051.00 | 2898861.00 | 3426260.50 | 3559553.15 |
|
||||
|
||||

|
||||
# 数据分析
|
||||
|
||||
## 月营收趋势
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
|
||||
|
||||
# Load the Excel file
|
||||
data = pd.read_excel('E:/Projects/analyse/pythonProject/merged_data.xlsx')
|
||||
|
||||
# Convert '日期' to datetime format and '总成交价' to numeric
|
||||
data['日期'] = pd.to_datetime(data['日期'])
|
||||
data['总成交价'] = pd.to_numeric(data['总成交价'], errors='coerce')
|
||||
|
||||
# Add a column for the year and month for easier analysis
|
||||
data['YearMonth'] = data['日期'].dt.to_period('M')
|
||||
|
||||
# Summarize monthly revenue
|
||||
monthly_revenue = data.groupby('YearMonth')['总成交价'].sum().reset_index()
|
||||
|
||||
plt.figure(figsize=(14, 7))
|
||||
plt.plot(monthly_revenue['YearMonth'].astype(str), monthly_revenue['总成交价'], marker='o')
|
||||
plt.title('月营收趋势')
|
||||
plt.xlabel('月份')
|
||||
plt.ylabel('收入')
|
||||
plt.xticks(rotation=45)
|
||||
plt.grid(visible=True)
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
```
|
||||
|
||||

|
||||
|
||||
## 平均客单价
|
||||
|
||||
为避免极端值影响,先按月份将所有数据分组,剔除前 1%和后 1%的订单后再计算平均客单价
|
||||
|
||||
```python
|
||||
# Attempting the analysis again with additional checks
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
|
||||
|
||||
# Load the Excel file
|
||||
data = pd.read_excel('E:/Projects/analyse/pythonProject/merged_data.xlsx')
|
||||
|
||||
# Ensure '日期' is in datetime format for grouping
|
||||
data['日期'] = pd.to_datetime(data['日期'])
|
||||
|
||||
# Add a 'YearMonth' column for easier analysis
|
||||
data['YearMonth'] = data['日期'].dt.to_period('M')
|
||||
|
||||
# Group data by 'YearMonth'
|
||||
grouped = data.groupby('YearMonth')
|
||||
|
||||
|
||||
# Function to remove the top 1% and bottom 1% within each group
|
||||
def remove_outliers(group):
|
||||
lower = group['总成交价'].quantile(0.01)
|
||||
upper = group['总成交价'].quantile(0.99)
|
||||
return group[(group['总成交价'] > lower) & (group['总成交价'] < upper)]
|
||||
|
||||
|
||||
# Apply the function to each group
|
||||
filtered_groups = grouped.apply(remove_outliers)
|
||||
|
||||
# Reset index as the grouping operation might introduce a multi-level index
|
||||
filtered_groups = filtered_groups.reset_index(drop=True)
|
||||
|
||||
# Group by 'YearMonth' again after filtering and calculate the average price
|
||||
average_price_filtered = filtered_groups.groupby('YearMonth')['总成交价'].mean().reset_index()
|
||||
|
||||
# Convert 'YearMonth' to string for plotting
|
||||
average_price_filtered['YearMonth'] = average_price_filtered['YearMonth'].astype(str)
|
||||
|
||||
# Plotting the result
|
||||
plt.figure(figsize=(14, 7))
|
||||
plt.plot(average_price_filtered['YearMonth'], average_price_filtered['总成交价'], marker='o', linestyle='-',
|
||||
color='red')
|
||||
plt.title('平均客单价(每月数据去除前1%和后1%)')
|
||||
plt.xlabel('月份')
|
||||
plt.ylabel('价格')
|
||||
plt.xticks(rotation=45)
|
||||
plt.grid(visible=True)
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
```
|
||||
|
||||

|
||||
|
||||
## 业务时段分布
|
||||
|
||||
查看一天二十四小时中的业务分布情况
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
|
||||
|
||||
# Load the Excel file
|
||||
data = pd.read_excel('E:/Projects/analyse/pythonProject/merged_data.xlsx')
|
||||
|
||||
# Extracting hour from the '时间' column to analyze service demand by time of day
|
||||
data['Hour'] = data['时间'].str.extract('(\d+):').astype(int)
|
||||
|
||||
# Analyzing service demand by hour
|
||||
service_demand_by_hour = data.groupby('Hour')['日期'].count().reset_index()
|
||||
|
||||
# Plotting service demand by hour
|
||||
plt.figure(figsize=(12, 6))
|
||||
plt.bar(service_demand_by_hour['Hour'], service_demand_by_hour['日期'], color='orange')
|
||||
plt.title('业务时段分布')
|
||||
plt.xlabel('24 小时')
|
||||
plt.ylabel('业务频次')
|
||||
plt.xticks(range(0, 24))
|
||||
plt.grid(axis='y')
|
||||
plt.show()
|
||||
```
|
||||
|
||||

|
||||
|
||||
## 白班夜班比例
|
||||
|
||||
```python
|
||||
import pandas as pd
|
||||
import matplotlib.pyplot as plt
|
||||
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
|
||||
|
||||
# Load the Excel file
|
||||
data = pd.read_excel('E:/Projects/analyse/pythonProject/merged_data.xlsx')
|
||||
|
||||
# Ensure '日期' is in datetime format for grouping
|
||||
data['日期'] = pd.to_datetime(data['日期'])
|
||||
|
||||
# Add a 'YearMonth' column for easier analysis
|
||||
data['YearMonth'] = data['日期'].dt.to_period('M')
|
||||
|
||||
# Calculate the ratio of day and night shifts
|
||||
shift_ratio = data['班次'].value_counts()
|
||||
|
||||
# Generate a pie chart to show the ratio of day and night shifts
|
||||
plt.figure(figsize=(8, 8))
|
||||
plt.pie(shift_ratio, labels=shift_ratio.index, autopct='%1.1f%%', startangle=140, colors=['lightblue', 'lightgreen'])
|
||||
plt.title('白班和夜班的比例')
|
||||
plt.show()
|
||||
|
||||
# Calculate the volume of day and night shifts by month
|
||||
shift_volume_by_month = data.groupby(['YearMonth', '班次'])['日期'].count().unstack(fill_value=0)
|
||||
|
||||
# Generate a bar chart to show the volume of day and night shifts by month
|
||||
shift_volume_by_month.plot(kind='bar', stacked=True, figsize=(14, 7), color=['lightblue', 'lightgreen'])
|
||||
plt.title('Volume of Day and Night Shifts by Month')
|
||||
plt.xlabel('Year-Month')
|
||||
plt.ylabel('Number of Shifts')
|
||||
plt.xticks(rotation=45)
|
||||
plt.legend(title='Shift')
|
||||
plt.tight_layout()
|
||||
plt.show()
|
||||
```
|
||||
|
||||

|
||||
|
||||

|
||||
|
||||
Reference in New Issue
Block a user