WITH CustomerSales AS (
SELECT
s.CustomerKey,
c.Name AS CustomerName,
p.Category AS ProductCategory,
SUM(s.SalesAmount) AS TotalSales
FROM Sales s
JOIN Customer c ON s.CustomerKey = c.CustomerKey
JOIN Product p ON s.ProductKey = p.ProductKey
GROUP BY s.CustomerKey, c.Name, p.Category
),
TopCustomerSales AS (
SELECT
*,
ROW_NUMBER() OVER (PARTITION BY ProductCategory ORDER BY TotalSales DESC) AS Ranking
FROM CustomerSales
WHERE (SELECT COUNT(*) FROM CustomerSales cs WHERE cs.ProductCategory =
CustomerSales.ProductCategory AND cs.TotalSales > CustomerSales.TotalSales) < 10
)
SELECT *
FROM TopCustomerSales
ORDER BY ProductCategory, TotalSales DESC;
EVALUATE
VAR CustomerSales =
SUMMARIZE (
Sales,
Sales[CustomerKey],
Customer[Name],
Product[Category],
"TotalSales", Sales[Sales Amount]
)
VAR TopCustomerSales =
TOPN (
10,
CustomerSales,
[TotalSales], DESC
)
VAR RankedCustomerSales =
ADDCOLUMNS (
TopCustomerSales,
"Ranking",
RANK (
DENSE,
TopCustomerSales,
ORDERBY ([TotalSales],DESC),
PARTITIONBY (Product[Category])
)
)
RETURN
RankedCustomerSales
ORDER BY Product[Category], [TotalSales] DESC
import pandas as pd
# Assuming df_sales, df_customer, and df_product are your DataFrames
df = pd.merge(df_sales, df_customer, on='CustomerKey')
df = pd.merge(df, df_product, on='ProductKey')
# Group by CustomerKey, Customer Name, and Product Category and sum Sales Amount
customer_sales = df.groupby(['CustomerKey', 'Name', 'Category'])['SalesAmount'].sum().reset_index()
customer_sales.rename(columns={'Name': 'CustomerName', 'Category': 'ProductCategory', 'SalesAmount':\
'TotalSales'}, inplace=True)
# Get top 10 customers by TotalSales for each ProductCategory
top_customer_sales = customer_sales.groupby('ProductCategory').apply(lambda x: \
x.nlargest(10, 'TotalSales')).reset_index(drop=True)
# Add ranking column
top_customer_sales['Ranking'] = top_customer_sales.groupby('ProductCategory')\
['TotalSales'].rank(method='dense', ascending=False).astype(int)
# Sort by ProductCategory and TotalSales
ranked_customer_sales = top_customer_sales.sort_values(by=['ProductCategory', 'TotalSales'],\
ascending=[True, False])
print(ranked_customer_sales)
| Sales[CustomerKey] | Product[Category] | TotalSales | Ranking |
| 19091 | Cameras and camcorders | 114005.75 | 1 |
| 19087 | Computers | 215519.65 | 1 |
| 19084 | Computers | 153326.32 | 2 |
| 19133 | Computers | 138538.32 | 3 |
| 19088 | Computers | 137523.27 | 4 |
| 19089 | Computers | 130626.16 | 5 |
| 19037 | Computers | 127071.62 | 6 |
| 19099 | Home Appliances | 157751.08 | 1 |
| 19102 | Home Appliances | 119491.59 | 2 |
| 19052 | Home Appliances | 117302.11 | 3 |
SUMMARIZE is used to create a table with distinct combinations of CustomerKey, Customer[Name], and Product[Category]. It also calculates the total sales (TotalSales) for each combination. TOPN is used to get the top 10 rows from the CustomerSales table based on TotalSales in descending order. ADDCOLUMNS is used to add a new column Ranking to the TopCustomerSales table. RANK assigns a rank to each row within its Product[Category] partition based on TotalSales in descending order. DENSE ranking ensures that ranks are sequential without gaps.
Leave a Comment