I've recently been down the data science path and integrating R and some measures into our analysis stack.
Moving from R Studio to SQL added some interesting problems to be able to get my scripts to work. The first few problems I ran into were structure, security, and then field types.
I overcame all those problems and came up with a script to calculate the slope of our sales over the previous x days and then use SQL 2017 R to write data to a database table.
I did not use the "WITH RESULT SETS" for the output and rather inserted my results into a staging table. The staging table was necessary because of type float and varchar coming from the R script that I was unable to coerce into the correct format.
One thing to remember as you move to SQL R is that the data source is imported separately and the default name is "InputDataSet" when you use it in your script.
And here is my script to loop through all products and calculate the slope of sale quantity.
INSERT INTO [stgR_QtySlope] ([Key]
,[elasticity_RedRetail]
,[elasticity_DemandMerchSalePrice]
,[ttl_sales]
,[max_price]
,[min_price]
,[mean_price]
,[price_ct]
,[start_dt]
,[end_dt]
,[slope]
,[orderdays_ct]
,[brandkey]
,[Link]
,[AnalysisType]
,[AnalysisDate]
,[ReportType]
,[ReportDate])
execute sp_execute_external_script
@language = N'R'
,@script = N'library(RODBC)
library(doParallel)
library(stats)
dataFetch2 <-InputDataSet
b <- dataFetch2
b$ProductKey <- trimws(b$ProductKey)
b$PerUnitPrice = b$RedRetailPrice #used to calculate elasticity
sku_list <- unique(trimws(b$ProductKey))
b$FreeShipping <- NULL
b$OrderDays <- NULL
b$RedRetailPrice <- NULL
b$SaleQtyPerDay <- NULL
b$BrandKey <- NULL
elasticity_values <- data.frame()
for (i in 1:length(sku_list)){
test_sku2 <- b[which(b$ProductKey==sku_list[i]),]
#elasticity
m2 <- lm(formula=SaleQty~PerUnitPrice, data= test_sku2)
coeffs2 <- as.data.frame(m2[[1]])
gradient2<- coeffs2[2,1]
gradient_final2 <- ifelse(is.na(gradient2), 0, gradient2)
mean_price <- sum(test_sku2$SaleQty * test_sku2$PerUnitPrice)/sum(test_sku2$SaleQty) #mean(test_sku2$PerUnitPrice)
max.price <- max(test_sku2$PerUnitPrice)
min.price <- min(test_sku2$PerUnitPrice)
price.ct <- length(unique(test_sku2$PerUnitPrice))
#elasticity DemandMerch sale price
m3 <- lm(formula=SaleQty~DemandMerchSellingPrice, data= test_sku2)
coeffs3 <- as.data.frame(m3[[1]])
gradient3<- coeffs3[2,1]
gradient_final3 <- ifelse(is.na(gradient2), 0, gradient3)
mean_price2 <- sum(test_sku2$SaleQty * test_sku2$DemandMerchSellingPrice)/sum(test_sku2$SaleQty) #mean(test_sku2$PerUnitPrice)
max.price2 <- max(test_sku2$DemandMerchSellingPrice)
min.price2 <- min(test_sku2$DemandMerchSellingPrice)
price.ct2 <- length(unique(test_sku2$DemandMerchSellingPrice))
#combine to single day ignoring price for sales slope
test_sku2$DemandMerchSellingPrice <- NULL
test_sku2 <- aggregate(test_sku2$SaleQty, by=list(test_sku2$ProductKey,test_sku2$OrderReceivedDateKey),FUN=sum)
names(test_sku2) <- c("ProductKey", "OrderReceivedDateKey","SaleQty")
attach(test_sku2)
test_sku2 <- test_sku2[order(OrderReceivedDateKey),]
detach(test_sku2)
test_sku2$ID <- seq.int(nrow(test_sku2))
#sales slope
m1 <- lm(formula=SaleQty~ID, data= test_sku2)
coeffs <- as.data.frame(m1[[1]])
gradient<- coeffs[2,1]
gradient_final <- ifelse(is.na(gradient), 0, gradient)
mean_sales <- mean(test_sku2$SaleQty)
ttl.sales <- sum(test_sku2$SaleQty)
orderdays.ct <- length(unique(test_sku2$OrderReceivedDateKey))
brandkey <- 100
end_dt <- max(test_sku2$OrderReceivedDateKey)
start_dt <- min(test_sku2$OrderReceivedDateKey)
slope <- gradient_final
elasticity <- gradient_final2*mean_price/mean_sales
elasticity2 <- gradient_final3*mean_price2/mean_sales
sku_elasticity <- cbind(sku_list[i],as.numeric(elasticity),as.numeric(elasticity2),as.numeric(ttl.sales),as.numeric(max.price),as.numeric(min.price),as.numeric(mean_price),price.ct,start_dt,end_dt,as.numeric(slope),orderdays.ct,brandkey)
elasticity_values <- rbind(elasticity_values,sku_elasticity)
}
colnames(elasticity_values)[colnames(elasticity_values)=="V1"] <- "ProductKey"
elasticity_values$Link <- "ProductKey"
elasticity_values$AnalysisType <- "Slope-7"
elasticity_values$AnalysisDate <- as.Date(Sys.Date())
elasticity_values$ReportType <- "Slope-7"
elasticity_values$ReportDate <- as.Date(Sys.Date())
OutputDataSet <- elasticity_values;'
,@input_data_1 = N'With PriceElasticty (ProductKey,OrderReceivedDateKey, BrandKey, SaleQty,BlackRetailPrice,RedRetailPrice,DemandItemSoldPrice,
DemandMerchSalesTtl, DemandMerchSalesWFeeAndShipTtl, FreeShipping)
as
(
SELECT D.[ProductKey], D.[OrderReceivedDateKey], D.[BrandKey], D.[SaleQty], D.BlackRetailPrice
,case when D.RedRetailPrice = 0 then D.BlackRetailPrice else D.RedRetailPrice end RedRetailPrice
,round(D.SaleAmt/D.SaleQty,2) DemandItemSoldPrice
,round((D.SaleAmt - D.DiscountAmt + D.MiscAmt),2) DemandMerchSalesTtl
,round((D.SaleAmt - D.DiscountAmt + D.MiscAmt+ProcessingFeeAmt+ShippingAmt),2) DemandMerchSalesWFeeAndShipTtl
,case when ShippingAmt = 0 then 1 else 0 end FreeShipping
FROM [Demand] D
inner join [Product] P on D.ProductKey = P.ProductKey
where D.OrderReceivedDateKey between (SELECT YEAR(GETDATE()-8) * 10000 + MONTH(GETDATE()-8) * 100 + DAY(GETDATE()-8))
and (SELECT YEAR(GETDATE()-1) * 10000 + MONTH(GETDATE()-1) * 100 + DAY(GETDATE()-1))
and D.IsCanceled = 0)
select ProductKey,FreeShipping, RedRetailPrice, BrandKey, OrderReceivedDateKey
,count(distinct OrderReceivedDateKey) OrderDays
,sum(SaleQty) SaleQty
,cast(sum(SaleQty) as decimal(8,2))/cast(count(distinct OrderReceivedDateKey) as decimal(8,2)) SaleQtyPerDay
,sum(DemandMerchSalesTtl)/sum(SaleQty) DemandMerchSellingPrice
from PriceElasticty
group by ProductKey, FreeShipping, RedRetailPrice, BrandKey,OrderReceivedDateKey
order by FreeShipping, ProductKey, RedRetailPrice, BrandKey;'
--move from STG to Permenant table
INSERT [R_QtySlope]
([Key]
,[elasticity_RedRetail]
,[elasticity_DemandMerchSalePrice]
,[ttl_sales]
,[max_price]
,[min_price]
,[mean_price]
,[price_ct]
,[start_dt]
,[end_dt]
,[slope]
,[orderdays_ct]
,[brandkey]
,[Link]
,[AnalysisType]
,[AnalysisDate]
,[ReportType]
,[ReportDate])
SELECT [Key]
,cast(left([elasticity_RedRetail],8) as [decimal](8, 2))
,cast(left([elasticity_DemandMerchSalePrice],8) as [decimal](8, 2))
,cast(left([ttl_sales],9) as [decimal](9, 2))
,cast(left([max_price],7) as [decimal](7, 2))
,cast(left([min_price],7) as [decimal](7, 2))
,cast(left([mean_price],7) as [decimal](7, 2))
,[price_ct]
,[start_dt]
,[end_dt]
,cast(left([slope],7) as [decimal](7, 4))
,[orderdays_ct]
,[brandkey]
,[Link]
,[AnalysisType]
,getdate()
,[ReportType]
,getdate()
FROM [stgR_QtySlope]