diff --git a/scenarioProcessing/aggregateShocks.R b/scenarioProcessing/aggregateShocks.R index 95b273d8ebad505d599b6e909769db386f4f9f5d..2c45a128abf709c6fe7630c78e6cc01d5b2b8ff7 100644 --- a/scenarioProcessing/aggregateShocks.R +++ b/scenarioProcessing/aggregateShocks.R @@ -4,6 +4,13 @@ # Benjamin Blanz 2024 # +library(nuts) +rlang::local_options(nuts.verbose = "quiet") +library(readxl) +nutsSheet <- read_excel("helperData/NUTS2021.xlsx", + sheet = "NUTS & SR 2021", range = "A1:H2125") +sectorColPattern <- 'ALL|TOTAL|^[A-Z]$|AGR|MIN|MFG|EGW|CNS|TRD|OTP|WTP|CMN|OFI|OBS|REA|PUB|OSG|agr|coa-oil-gas|coa\\.oil\\.gas|pro|ely-elc|ely\\.elc|ser|air-wtp-tran|air\\.wtp\\.tran' + source('funAggregateNuts2CNT.R') codes <- read.csv("helperData/nuts3fid4Codes.csv") codes <- codes[,c('fid4','CNTR_CODE','CNTR_NAME','CNTR_CODE_iso2','CNTR_CODE_iso3','CNTR_CODE_Eurostat')] @@ -12,8 +19,23 @@ codes <- codes[!duplicated(codes),] # files to aggregate files <- list.files('scenarios',pattern = 'csv',recursive = T) -files <- paste0('scenarios/',files[grep('.csv$(?<!aggCNT.csv)',files,perl=T)]) -files <- files[grep('.csv$(?<!rel.csv)',files,perl=T)] +files <- paste0('scenarios/',files[grep('\\.csv$',files,perl=T)]) +files <- files[!grepl('(agg)|(rel)',files)] + +cat('Aggregating NUTS3 to NUTS2 for...\n') +for(f.i in 1:length(files)){ + file <- files[f.i] + cat(sprintf('%i of %i %s\n',f.i, length(files),file)) + data <- read.csv(file,row.names=NULL) + if(!('fid4'%in%names(data))){ + cat(' fid4 col missing probably already country level data\n') + } else { + data.NUTS2 <- aggregateNUTS3ToNUTS2(data,codes,nutsSheet,sectorColPattern) + write.csv(data.NUTS2,gsub('.csv','-aggNUTS2.csv',file),row.names = F) + } +} + + cat('Aggregating NUTS3 to CNT for...\n') for(f.i in 1:length(files)){ file <- files[f.i] @@ -22,7 +44,7 @@ for(f.i in 1:length(files)){ if(!('fid4'%in%names(data))){ cat(' fid4 col missing probably already country level data\n') } else { - data.CNT <- aggregateNUTS3ToCountry(data,codes) + data.CNT <- aggregateNUTS3ToCountry(data,codes,sectorColPattern) write.csv(data.CNT,gsub('.csv','-aggCNT.csv',file),row.names = F) } } diff --git a/scenarioProcessing/funAggregateNuts2CNT.R b/scenarioProcessing/funAggregateNuts2CNT.R index 4e6f158736f5832ddc4a9f5b8d34e8233d9a5a61..993cb4ccc3d2a0e389a03b239f2add7b85bd4f91 100644 --- a/scenarioProcessing/funAggregateNuts2CNT.R +++ b/scenarioProcessing/funAggregateNuts2CNT.R @@ -3,13 +3,13 @@ # # Parameters # data data to aggregate -# codes country codes +# codes country +# sectorColPattern the matching pattern for sectorl column names # # Returns # aggregated data # -aggregateNUTS3ToCountry <- function(data,codes){ - sectorColPattern <- 'ALL|TOTAL|^[A-Z]$|AGR|MIN|MFG|EGW|CNS|TRD|OTP|WTP|CMN|OFI|OBS|REA|PUB|OSG|agr|coa.oil.gas|pro|ely.elc|ser|air.wtp.tran' +aggregateNUTS3ToCountry <- function(data,codes,sectorColPattern){ # identify columns with data rather than identifiers sectorCols <- grep(sectorColPattern,names(data),perl = T) # convert data to numeric (deals with in import error) @@ -31,3 +31,66 @@ aggregateNUTS3ToCountry <- function(data,codes){ data <- merge(codesCNT,data) return(data) } + +# +# Function to aggregate stocks or stock data to the NUTS2 level from the NUTS3 level +# +# Parameters +# data data to aggregate +# codes country codes +# nutsSheet nuts2021 sheet of region names from eurostat +# sectorColPattern the matching pattern for sectorl column names +# +# Returns +# aggregated data +# +aggregateNUTS3ToNUTS2 <- function(data,codes,nutsSheet,sectorColPattern){ + # identify rows that contain semantically valid nuts ids + data.NUTSsubset <- data[grepl('^[a-zA-Z]{2}[a-zA-Z0-9]{1,3}$',data$fid4,perl=T),] + # use the nuts classifiy script to determine the nuts versions + data.NUTSsubset.classified <- nuts_classify(data.NUTSsubset,'fid4') + # identify rows that have now valid nuts version these can be candidate countries that are not in the nuts lib database + # or malformed + data.NUTSsubset.naVersion <- data.NUTSsubset.classified$data[is.na(data.NUTSsubset.classified$data$from_version),] + # remove the invalid rows + data.NUTSsubset <- data.NUTSsubset[!(data.NUTSsubset$fid4 %in% data.NUTSsubset.naVersion$from_code),] + # reclassify and convert to uniform nuts version 2021 + data.NUTSsubset.classified <- nuts_classify(data.NUTSsubset,'fid4') + dataColsNames <- names(data.NUTSsubset)[grep(sectorColPattern,names(data.NUTSsubset),perl = T)] + variablesVector <- rep('absolute',length(dataColsNames)) + names(variablesVector) <- dataColsNames + data.NUTSsubset.2021 <- nuts_convert_version(data.NUTSsubset.classified,2021,variablesVector) + data.NUTSsubset.2021 <- data.NUTSsubset.2021[,-which(names(data.NUTSsubset.2021)=='country')]#remove column "country" to work around a bug in the next line + data.NUTSsubset.2021.classified <- nuts_classify(data.NUTSsubset.2021,'to_code') + # aggregate the valid rows to nuts2 + data.NUTSsubset.NUTS2 <- nuts_aggregate(data.NUTSsubset.2021.classified,2,variablesVector) + data.NUTSsubset.NUTS2 <- data.NUTSsubset.NUTS2[,-which(names(data.NUTSsubset.NUTS2)=='country')] + # drop empty rows, this should be just the FRY, france's overseas territories + if(sum(complete.cases(data.NUTSsubset.NUTS2))==nrow(data.NUTSsubset.NUTS2)){ + stop('too many incomplete cases\n') + } else { + data.NUTSsubset.NUTS2 <- data.NUTSsubset.NUTS2[complete.cases(data.NUTSsubset.NUTS2),] + } + names(data.NUTSsubset.NUTS2)[1] <- 'NUTS2' + + # deal with the rows that where omitted above + data.NUTSsubset.naVersion$nuts2 <- substring(data.NUTSsubset.naVersion$from_code,1,4) + sectorCols <- grep(sectorColPattern,names(data.NUTSsubset.naVersion),perl = T) + data.NUTSsubset.naVersion.NUTS2 <- aggregate(data.NUTSsubset.naVersion[,c(sectorCols)], + by = list(Category=data.NUTSsubset.naVersion$nuts2),FUN=sum) + names(data.NUTSsubset.naVersion.NUTS2)[1] <- 'NUTS2' + + # merge the two + data.NUTSsubset.NUTS2 <- rbind(data.NUTSsubset.naVersion.NUTS2,data.NUTSsubset.NUTS2) + + # add the country codes and region names + data.NUTSsubset.NUTS2$CNTR_CODE <- substring(data.NUTSsubset.NUTS2$NUTS2,1,2) + data.NUTSsubset.NUTS2 <- merge(data.NUTSsubset.NUTS2,nutsSheet[,c('Code 2021','NUTS level 2')],by.x='NUTS2',by.y='Code 2021',all.x=T) + names(data.NUTSsubset.NUTS2)[names(data.NUTSsubset.NUTS2)=='NUTS level 2'] <- 'NAME' + numCol <- ncol(data.NUTSsubset.NUTS2) + data.NUTSsubset.NUTS2 <- data.NUTSsubset.NUTS2[,c(1,numCol-1,numCol, 2:(numCol-2))] + codesCNT <- codes[,-1] + codesCNT <- codesCNT[!duplicated(codesCNT),] + data.NUTS2 <- merge(codesCNT,data.NUTSsubset.NUTS2) + return(data.NUTS2) +} diff --git a/scenarioProcessing/funRelData.R b/scenarioProcessing/funRelData.R index 2e4fad7eb6ea1448d8ce25176d824dd0a41854eb..605b28f2e1b922fc08ab7df83c6149b859b49d6e 100644 --- a/scenarioProcessing/funRelData.R +++ b/scenarioProcessing/funRelData.R @@ -8,20 +8,20 @@ # # Parameters # data The shock impacts specifying the capital destroyed -# stocksNUTS3 The sectoral stocks of capital at NUTS3 level -# stocksCNT The sectoral stocks of capital at country level +# stocks The sectoral stocks of capital at NUTS3 level, NUTS2 level, or at country +# level depending on aggregation level +# aggregationLevel one of 'CNT','NUTS2','NUTS3' +# sectorColPattern the matching string for the sector names # # Returns # shock data relative to the stocks # # Benjamin Blanz 2024 # -relData <- function(data,stocksNUTS3=NULL,stocksCNT=NULL){ - sectorColPattern <- 'ALL|TOTAL|^[A-Z]$|AGR|MIN|MFG|EGW|CNS|TRD|OTP|WTP|CMN|OFI|OBS|REA|PUB|OSG|agr|coa.oil.gas|pro|ely.elc|ser|air.wtp.tran' +relData <- function(data,stocks,aggregationLevel,sectorColPattern){ # Ensure the Total clumn is called TOTAL, not ALL as in some scenarios names(data)[names(data)=='ALL'] <- 'TOTAL' - names(stocksNUTS3)[names(stocksNUTS3)=='ALL'] <- 'TOTAL' - names(stocksCNT)[names(stocksCNT)=='ALL'] <- 'TOTAL' + names(stocks)[names(stocks)=='ALL'] <- 'TOTAL' # Identify the columns with data (not the columns with country or idx). sectorCols <- grep(sectorColPattern,names(data),perl = T) # prepare an empty data set for the relative data, keeping the index columns @@ -29,17 +29,16 @@ relData <- function(data,stocksNUTS3=NULL,stocksCNT=NULL){ data.rel[,sectorCols] <- NA for(i in 1:nrow(data)){ # identify the correct row in the stocks - if('fid4' %in% names(data)){ - stocks <- stocksNUTS3 + if(aggregationLevel=='NUTS3'){ rowInStocks <- which(stocks$fid4 == data$fid4[i]) + } else if(aggregationLevel=='NUTS2'){ + rowInStocks <- which(stocks$NUTS2 == data$NUTS2[i]) } else if (nchar(data$CNTR_CODE[i])==2){ - stocks <- stocksCNT rowInStocks <- which(stocks$CNTR_CODE_Eurostat == data$CNTR_CODE[i]) if(length(rowInStocks)==0){ rowInStocks <- which(stocks$CNTR_CODE_iso2== data$CNTR_CODE[i]) } } else if (nchar(data$CNTR_CODE[i])==3){ - stocks <- stocksCNT rowInStocks <- which(stocks$CNTR_CODE_iso3 == data$CNTR_CODE[i]) } if(length(rowInStocks)==0){ diff --git a/scenarioProcessing/relativizeShocks.R b/scenarioProcessing/relativizeShocks.R index d587eed336acf1ff52c57bac3aaa03cfdb573489..54e14d0f6bdc7c6ba66276223dfae4a2075b4c45 100644 --- a/scenarioProcessing/relativizeShocks.R +++ b/scenarioProcessing/relativizeShocks.R @@ -7,12 +7,18 @@ source('funRelData.R') -nuts3LevelStocksNACE <- read.csv("helperData/nuts3LevelStocksNACE.csv", row.names=NULL) -countryLevelStocksNACE <- read.csv("helperData/countryLevelStocksNACE.csv", row.names=NULL) -nuts3LevelStocksGTAP <- read.csv("helperData/nuts3LevelStocksGTAP.csv", row.names=NULL) -countryLevelStocksGTAP <- read.csv("helperData/countryLevelStocksGTAP.csv", row.names=NULL) -nuts3LevelStocksGRACE <- read.csv("helperData/nuts3LevelStocksGRACE.csv", row.names=NULL) -countryLevelStocksGRACE <- read.csv("helperData/countryLevelStocksGRACE.csv", row.names=NULL) +sectorColPattern <- 'ALL|TOTAL|^[A-Z]$|AGR|MIN|MFG|EGW|CNS|TRD|OTP|WTP|CMN|OFI|OBS|REA|PUB|OSG|agr|coa-oil-gas|coa\\.oil\\.gas|pro|ely-elc|ely\\.elc|ser|air-wtp-tran|air\\.wtp\\.tran' + +stocks <- list() +stocks$NACE <- list(NUTS3 = read.csv("helperData/nuts3LevelStocksNACE.csv", row.names=NULL), + NUTS2 = read.csv("helperData/nuts2LevelStocksNACE.csv", row.names=NULL), + CNT = read.csv("helperData/countryLevelStocksNACE.csv", row.names=NULL)) +stocks$GTAP <- list(NUTS3 = read.csv("helperData/nuts3LevelStocksGTAP.csv", row.names=NULL), + NUTS2 = read.csv("helperData/nuts2LevelStocksGTAP.csv", row.names=NULL), + CNT = read.csv("helperData/countryLevelStocksGTAP.csv", row.names=NULL)) +stocks$GRACE <- list(NUTS3 = read.csv("helperData/nuts3LevelStocksGRACE.csv", row.names=NULL), + NUTS2 = read.csv("helperData/nuts2LevelStocksGRACE.csv", row.names=NULL), + CNT = read.csv("helperData/countryLevelStocksGRACE.csv", row.names=NULL)) files <- list.files('scenarios',pattern = 'csv',recursive = T) @@ -22,16 +28,20 @@ for(f.i in 1:length(files)){ file <- files[f.i] cat(sprintf('%i of %i %s\n',f.i, length(files),file)) data <- read.csv(file,row.names=NULL) + sectorType <- 'NACE' if(grepl('GTAP',file)){ - data.rel <- relData(data,nuts3LevelStocksGTAP,countryLevelStocksGTAP) - write.csv(data.rel,gsub('.csv','-rel.csv',file),row.names = F) + sectorType <- 'GTAP' } else if (grepl('GRACE',file)) { - data.rel <- relData(data,nuts3LevelStocksGRACE,countryLevelStocksGRACE) - write.csv(data.rel,gsub('.csv','-rel.csv',file),row.names = F) - } else { - data.rel <- relData(data,nuts3LevelStocksNACE,countryLevelStocksNACE) - write.csv(data.rel,gsub('.csv','-rel.csv',file),row.names = F) + sectorType <- 'GRACE' + } + aggType <- 'NUTS3' + if(grepl('aggCNT',file) | (!'fid4' %in% names(data) & !'NUTS2' %in% names(data))){ + aggType <- 'CNT' + } else if(grepl('aggNUTS2',file)){ + aggType <- 'NUTS2' } + data.rel <- relData(data,stocks[[sectorType]][[aggType]],aggType,sectorColPattern) + write.csv(data.rel,gsub('.csv','-rel.csv',file),row.names = F) } diff --git a/scenarioProcessing/sectorMappingNACE2GRACE.R b/scenarioProcessing/sectorMappingNACE2GRACE.R index dc4b022eb5f2a923886b4380a0d144641ca79ad0..55f5a4e3452dacbc1cc0bd36c28e045bff5da07a 100644 --- a/scenarioProcessing/sectorMappingNACE2GRACE.R +++ b/scenarioProcessing/sectorMappingNACE2GRACE.R @@ -15,6 +15,7 @@ files <- list.files('scenarios',pattern = 'NACE.csv',recursive = T) files <- paste0('scenarios/',files) files[length(files)+1] <- 'helperData/countryLevelStocksNACE.csv' files[length(files)+1] <- 'helperData/nuts3LevelStocksNACE.csv' +files[length(files)+1] <- 'helperData/nuts2LevelStocksNACE.csv' # Mapping of sectors for (file in files){ @@ -31,8 +32,8 @@ for (file in files){ GRACEcodes <- unique(sector_mapping$`GRACE Code`) for(GRACEcode in GRACEcodes){ dataGRACE[[GRACEcode]] <- NA - rows <- which(sector_mapping$`GRACE Code` == GRACEcode) - NACEcodes <- strsplit(paste(sector_mapping$`NACE Code`[rows],collapse = '.'),'\\.')[[1]] + col <- which(sector_mapping$`GRACE Code` == GRACEcode) + NACEcodes <- strsplit(paste(sector_mapping$`NACE Code`[col],collapse = '.'),'\\.')[[1]] if(sum(NACEcodes %in% colnames(data))==length(NACEcodes)){ if(length(NACEcodes)>1){ dataGRACE[[GRACEcode]] <- rowSums(data[,NACEcodes]) diff --git a/scenarioProcessing/sectorMappingNACE2GTAP.R b/scenarioProcessing/sectorMappingNACE2GTAP.R index b8c24a23bebb7b8e65ada7596a8b6d3da65f2a88..353fd130761b68e0ef013e1cbbc0e770ec11fc01 100644 --- a/scenarioProcessing/sectorMappingNACE2GTAP.R +++ b/scenarioProcessing/sectorMappingNACE2GTAP.R @@ -13,6 +13,7 @@ files <- list.files('scenarios',pattern = 'NACE.csv',recursive = T) files <- paste0('scenarios/',files) files[length(files)+1] <- 'helperData/countryLevelStocksNACE.csv' files[length(files)+1] <- 'helperData/nuts3LevelStocksNACE.csv' +files[length(files)+1] <- 'helperData/nuts2LevelStocksNACE.csv' # Mapping of sectors for (file in files){ diff --git a/scenarioProcessing/stockAggregation.R b/scenarioProcessing/stockAggregation.R index 5d17b7bf5a959d5c6f7716819d627bd07abd21d2..8f8c20fb0a512d8635774dff477ce8eb84980ca8 100644 --- a/scenarioProcessing/stockAggregation.R +++ b/scenarioProcessing/stockAggregation.R @@ -7,7 +7,7 @@ # Benjamin Blanz 2024 # -sectorColPattern <- 'ALL|TOTAL|^[A-Z]$|AGR|MIN|MFG|EGW|CNS|TRD|OTP|WTP|CMN|OFI|OBS|REA|PUB|OSG|agr|coa-oil-gas|pro|ely-elc|ser|air-wtp-tran' +sectorColPattern <- 'ALL|TOTAL|^[A-Z]$|AGR|MIN|MFG|EGW|CNS|TRD|OTP|WTP|CMN|OFI|OBS|REA|PUB|OSG|agr|coa-oil-gas|coa\\.oil\\.gas|pro|ely-elc|ely\\.elc|ser|air-wtp-tran|air\\.wtp\\.tran' # read scenario file with NUTS3 data #### library(readxl) @@ -35,7 +35,7 @@ stocksNUTS3$CNTR_NAME <- countrycode(stocksNUTS3$CNTR_CODE_iso3,'iso3c','iso.nam stocksNUTS3$CNTR_CODE_Eurostat <- countrycode(stocksNUTS3$CNTR_CODE_iso3,'iso3c','eurostat') stocksNUTS3$CNTR_CODE_iso2 <- countrycode(stocksNUTS3$CNTR_CODE_iso3,'iso3c','iso2c') stocksNUTS3 <- stocksNUTS3[,c(1:5,29,30,31,28,6:27)] -write.csv(stocksNUTS3,file = 'helperData/nuts3LevelStocks.csv',row.names=F) +write.csv(stocksNUTS3,file = 'helperData/nuts3LevelStocksNACE.csv',row.names=F) codes <- stocksNUTS3[,1:9] write.csv(codes,file = 'helperData/nuts3fid4Codes.csv',row.names=F) sink('helperData/nuts3LevelStocksMetadata.csv') @@ -65,7 +65,7 @@ stocksCNT$CNTR_NAME <- countrycode(stocksCNT$CNTR_CODE_iso3,'iso3c','iso.name.en stocksCNT$CNTR_CODE_Eurostat <- countrycode(stocksCNT$CNTR_CODE_iso3,'iso3c','eurostat') stocksCNT$CNTR_CODE_iso2 <- countrycode(stocksCNT$CNTR_CODE_iso3,'iso3c','iso2c') stocksCNT <- stocksCNT[,c(25,26,27,24,2:23)] -write.csv(stocksCNT,file = 'helperData/countryLevelStocks.csv',row.names = F) +write.csv(stocksCNT,file = 'helperData/countryLevelStocksNACE.csv',row.names = F) sink('helperData/countryLevelStocksMetadata.csv') cat(sprintf('Unit %s,,\n',stockUnit)) cat(sprintf('Labels,, \n')) @@ -75,3 +75,22 @@ for(n in colnames(stocksNUTS3)[sectorCols]){ cat(sprintf('"%s", "%s", "%s"\n',n,stockLabels[n],stockTypes[n])) } sink() + +# aggregate to NUTS2 level #### +library(nuts) +rlang::local_options(nuts.verbose = "quiet") +nutsSheet <- read_excel("helperData/NUTS2021.xlsx", + sheet = "NUTS & SR 2021", range = "A1:H2125") +stocksNUTS2 <- aggregateNUTS3ToNUTS2(stocksNUTS3, + codes[,c('fid4','CNTR_CODE','CNTR_NAME','CNTR_CODE_iso2','CNTR_CODE_iso3','CNTR_CODE_Eurostat')], + nutsSheet,sectorColPattern) +write.csv(stocksNUTS2,file = 'helperData/nuts2LevelStocksNACE.csv',row.names = F) +sink('helperData/countryLevelStocksMetadata.csv') +cat(sprintf('Unit %s,,\n',stockUnit)) +cat(sprintf('Labels,, \n')) +cat(sprintf('Sector,Label,Type\n')) +sectorCols <- grep(sectorColPattern,names(stocksNUTS2),perl = T) +for(n in colnames(stocksNUTS2)[sectorCols]){ + cat(sprintf('"%s", "%s", "%s"\n',n,stockLabels[n],stockTypes[n])) +} +sink()