# Need to have R3.4.3 and above installed # IMF # For any question contact vguerreiro@imf.org or RPPI@imf.org # Ignore "warnings" in the console #################### Characteristics - Production #################### #(1 periods/quaters) # Read "Box 4. Prepare data for processing" in the Guide # This code is to be used for all periods of index compilation except the very first year. # The following quarters are compiled with the "production" code. # Install packages packages1 <- c("magrittr","dplyr","gridExtra","rio","EnvStats","micEconIndex", "readr","dummies","broom","openxlsx","XLConnect","xlsx") pkgs2inst <- !(packages1 %in% (.packages(all.available=T))) if (any(pkgs2inst)) install.packages(packages1[pkgs2inst]) lapply(packages1, require, character.only=T) rm(packages1,pkgs2inst) rm(list = ls()) #### !!! COMPILER NEEDS TO UPDATE THE FOLLOWING:------------------------------- # !!! Define the periods------------------------- Crrt.Period <- "4Q2015" Previous.Period <- "3Q2015" BasePeriod <- "4Q2014" WeightsYear <- "2014" # !!! Insert address of the folder where the data is Data_folder <- "C:/Users/vguerreiro/My Local Documents/CSO_Synthetic_data/data set/" # !!! Insert address of the folder where the outputs should be stored Output_folder <- "C:/Users/vguerreiro/My Local Documents/CSO_Synthetic_data/Exercise 4_characteristics/" setwd(Output_folder) #Importing indices All_Indexes <- as.data.frame( read_csv( paste( Output_folder, "All_Indexes" , Previous.Period, ".csv", sep = ""), col_types = cols())) # !!!! change date to last quarter previous year chaining <- as.data.frame(All_Indexes$'4Q2014') # !!! Define the Strata as c("a","b","c",....."z") Strat1 <- c("New") Strat2 <- c("Existing") # !!! Define the variable to be used for stratification (the "" are needed) strata.tag.name <- "Status" # !!! Choose the name of your strata (the "" are needed) strata.given.name <- "Status" #### !!! Complete with your number of strata strata <- list(Strat1, Strat2) # !!! Define the names of your strata (the "" are needed) Strata.names <- c("Total", "New", "Existing") ### !!! Select variables to be excluded from the regression (the "" are needed) exl_var <- c("id", "Year", "Month", "Status", "Period","Region", "Year_Built", "County") ### !!! Select categorical variables to be used on the regression (the "" are needed). # This variables will be transformed in dummies (binary)------------------------- catg_var <- c("Dwelling_Type", "BER", "Year_Built_Agg", "County_Agg", "Neighborhood_Type", "Building_Levels", "Central_Heating") ### !!! End of Compiler fundamental interaction ------------------------------ #-----------------------------------------------------------------------------# num.strata <- length(strata) max.num.strata <- max(as.data.frame(lapply(strata, function(x) length(x)))) exl_var <- c(strata.tag.name,exl_var) #Importing data file DF <- as.data.frame( read_csv( paste(Data_folder, Crrt.Period,".csv",sep = ""), col_types = cols())) #Importing weights weights <- as.data.frame( read_csv( paste(Data_folder,"weights",WeightsYear,".csv",sep = ""), col_types = cols())) #Importing coeficients and intercept form base period coef.base.s1 <- as.data.frame( read_csv( paste(Output_folder,"Coef", BasePeriod,"s1",".csv",sep = ""), col_types = cols())) Intcpt.base.s1 <- as.data.frame( read_csv( paste(Output_folder,"Intcpt", BasePeriod,"s1",".csv",sep = ""), col_types = cols())) coef.base.s2 <- as.data.frame( read_csv( paste(Output_folder,"Coef", BasePeriod,"s2",".csv",sep = ""), col_types = cols())) Intcpt.base.s2 <- as.data.frame( read_csv( paste(Output_folder,"Intcpt", BasePeriod,"s2",".csv",sep = ""), col_types = cols())) # Calculating OLS for each period and strata BasecalFunction <- function(BaseCalcul){ BaseCalcul <- as.data.frame(BaseCalcul) ## Dummies and categorical variables Base_ch <- dummy.data.frame(BaseCalcul, names = catg_var, omit.constants=TRUE, dummy.classes = getOption("dummy.classes"), all = TRUE) ## Strata # This creates a list with the number of elements corresponding to the number of stata strata.num.list <- vector("list",num.strata) # Creates another list with the max number of entries in all strata stata.max.num.list <- vector("list",max.num.strata) exl_name <- which(names(Base_ch) %in% exl_var) tag.ID <- which(names(Base_ch) %in% strata.tag.name) for (j in 1:num.strata) { # iterate over all strata for (i in 1:max.num.strata) { # iterate over the elements of each strata st <- Base_ch[(Base_ch[,tag.ID] == strata[[j]][i]),] stata.max.num.list[[i]] <- st strata.num.list[[j]][[i]] <- stata.max.num.list[[i]][,-(exl_name)] } } Base_ch_Stratum <- vector("list",num.strata) for (i in 1:num.strata) { Base_ch_Stratum[[i]] <- bind_rows(strata.num.list[[i]]) } ## OLS ------------------------------------------------------------------------ # This creates a list corresponding to the number of stata olsregS <- vector("list",num.strata) for (i in 1:num.strata) { datatest = Base_ch_Stratum [[i]] olsregS[[i]] <- lm(log( Price )~.,datatest) olsregS[[i]] <- summary( olsregS[[i]] )$coefficients olsregS[[i]] <- tidy( olsregS[[i]] ) } return( olsregS ) } Param <- BasecalFunction( BaseCalcul=DF ) Strata.1 <- Param[[1]] Strata.2 <- Param[[2]] ### make this by strata ################################## STRATA 1 ################################### crrt_Intcpt.s1 <- Strata.1[1,2] crrt.coef.s1 = Strata.1[-1,1:2] colnames( crrt.coef.s1 ) <- c( "CHR", "crrt.coef.s1" ) coef.s1 <- merge(x = crrt.coef.s1, y = coef.base.s1, by = c("CHR"), all.y = TRUE) coef.s1 <- subset( coef.s1[complete.cases(coef.s1), ]) crrt.Index_s1 <-( exp( sum( ( coef.s1$crrt.coef.s1 - coef.s1$Coef_P1s1 ) * coef.s1$Average ) + ( crrt_Intcpt.s1 - Intcpt.base.s1 ) )) * 100 crrt.Index_s1 ################################## STRATA 2 ################################### crrt_Intcpt.s2 <- Strata.2[1,2] crrt.coef.s2 = Strata.2[-1,1:2] colnames( crrt.coef.s2 ) <- c( "CHR", "crrt.coef.s2") coef.s2 <- merge(x = crrt.coef.s2, y = coef.base.s2, by = c("CHR"), all.y = TRUE) coef.s2 <- subset( coef.s2[complete.cases(coef.s2), ]) crrt.Index_s2 <- ( exp( sum( ( coef.s2$crrt.coef.s2 - coef.s2$Coef_P1s2 ) * coef.s2$Average ) + ( crrt_Intcpt.s2 - Intcpt.base.s2 ) ))*100 crrt.Index <- t( as.data.frame( cbind( crrt.Index_s1,crrt.Index_s2 ) )) crrt.Index # Strata aggregation Agg_Idx_P <- 0 for (i in 1:( nrow( weights ) - 1)) { Agg_Idx_P <- Agg_Idx_P + (weights[i + 1,2] * crrt.Index[i]) } Agg_Idx_St <- unlist( Agg_Idx_P ) crrt.Index <- rbind( Agg_Idx_St, crrt.Index ) crrt.Index # chain crrt.Index <- crrt.Index * chaining / 100 colnames( crrt.Index ) <- Crrt.Period All_Indexes <- cbind( All_Indexes,crrt.Index ) rownames( All_Indexes ) <- Strata.names All_Indexes #Exporting result to csv outfile <- paste( "All_Indexes", Crrt.Period, ".csv", sep="") write.csv( All_Indexes, file = outfile, row.names = F)