Skip to content
Snippets Groups Projects
Commit 7ace5356 authored by Benjamin Blanz's avatar Benjamin Blanz
Browse files

added NUTS2 aggregation

parent fbf697f6
No related branches found
No related tags found
No related merge requests found
......@@ -4,6 +4,13 @@
# Benjamin Blanz 2024
#
library(nuts)
rlang::local_options(nuts.verbose = "quiet")
library(readxl)
nutsSheet <- read_excel("helperData/NUTS2021.xlsx",
sheet = "NUTS & SR 2021", range = "A1:H2125")
sectorColPattern <- 'ALL|TOTAL|^[A-Z]$|AGR|MIN|MFG|EGW|CNS|TRD|OTP|WTP|CMN|OFI|OBS|REA|PUB|OSG|agr|coa-oil-gas|coa\\.oil\\.gas|pro|ely-elc|ely\\.elc|ser|air-wtp-tran|air\\.wtp\\.tran'
source('funAggregateNuts2CNT.R')
codes <- read.csv("helperData/nuts3fid4Codes.csv")
codes <- codes[,c('fid4','CNTR_CODE','CNTR_NAME','CNTR_CODE_iso2','CNTR_CODE_iso3','CNTR_CODE_Eurostat')]
......@@ -12,8 +19,23 @@ codes <- codes[!duplicated(codes),]
# files to aggregate
files <- list.files('scenarios',pattern = 'csv',recursive = T)
files <- paste0('scenarios/',files[grep('.csv$(?<!aggCNT.csv)',files,perl=T)])
files <- files[grep('.csv$(?<!rel.csv)',files,perl=T)]
files <- paste0('scenarios/',files[grep('\\.csv$',files,perl=T)])
files <- files[!grepl('(agg)|(rel)',files)]
cat('Aggregating NUTS3 to NUTS2 for...\n')
for(f.i in 1:length(files)){
file <- files[f.i]
cat(sprintf('%i of %i %s\n',f.i, length(files),file))
data <- read.csv(file,row.names=NULL)
if(!('fid4'%in%names(data))){
cat(' fid4 col missing probably already country level data\n')
} else {
data.NUTS2 <- aggregateNUTS3ToNUTS2(data,codes,nutsSheet,sectorColPattern)
write.csv(data.NUTS2,gsub('.csv','-aggNUTS2.csv',file),row.names = F)
}
}
cat('Aggregating NUTS3 to CNT for...\n')
for(f.i in 1:length(files)){
file <- files[f.i]
......@@ -22,7 +44,7 @@ for(f.i in 1:length(files)){
if(!('fid4'%in%names(data))){
cat(' fid4 col missing probably already country level data\n')
} else {
data.CNT <- aggregateNUTS3ToCountry(data,codes)
data.CNT <- aggregateNUTS3ToCountry(data,codes,sectorColPattern)
write.csv(data.CNT,gsub('.csv','-aggCNT.csv',file),row.names = F)
}
}
......@@ -3,13 +3,13 @@
#
# Parameters
# data data to aggregate
# codes country codes
# codes country
# sectorColPattern the matching pattern for sectorl column names
#
# Returns
# aggregated data
#
aggregateNUTS3ToCountry <- function(data,codes){
sectorColPattern <- 'ALL|TOTAL|^[A-Z]$|AGR|MIN|MFG|EGW|CNS|TRD|OTP|WTP|CMN|OFI|OBS|REA|PUB|OSG|agr|coa.oil.gas|pro|ely.elc|ser|air.wtp.tran'
aggregateNUTS3ToCountry <- function(data,codes,sectorColPattern){
# identify columns with data rather than identifiers
sectorCols <- grep(sectorColPattern,names(data),perl = T)
# convert data to numeric (deals with in import error)
......@@ -31,3 +31,66 @@ aggregateNUTS3ToCountry <- function(data,codes){
data <- merge(codesCNT,data)
return(data)
}
#
# Function to aggregate stocks or stock data to the NUTS2 level from the NUTS3 level
#
# Parameters
# data data to aggregate
# codes country codes
# nutsSheet nuts2021 sheet of region names from eurostat
# sectorColPattern the matching pattern for sectorl column names
#
# Returns
# aggregated data
#
aggregateNUTS3ToNUTS2 <- function(data,codes,nutsSheet,sectorColPattern){
# identify rows that contain semantically valid nuts ids
data.NUTSsubset <- data[grepl('^[a-zA-Z]{2}[a-zA-Z0-9]{1,3}$',data$fid4,perl=T),]
# use the nuts classifiy script to determine the nuts versions
data.NUTSsubset.classified <- nuts_classify(data.NUTSsubset,'fid4')
# identify rows that have now valid nuts version these can be candidate countries that are not in the nuts lib database
# or malformed
data.NUTSsubset.naVersion <- data.NUTSsubset.classified$data[is.na(data.NUTSsubset.classified$data$from_version),]
# remove the invalid rows
data.NUTSsubset <- data.NUTSsubset[!(data.NUTSsubset$fid4 %in% data.NUTSsubset.naVersion$from_code),]
# reclassify and convert to uniform nuts version 2021
data.NUTSsubset.classified <- nuts_classify(data.NUTSsubset,'fid4')
dataColsNames <- names(data.NUTSsubset)[grep(sectorColPattern,names(data.NUTSsubset),perl = T)]
variablesVector <- rep('absolute',length(dataColsNames))
names(variablesVector) <- dataColsNames
data.NUTSsubset.2021 <- nuts_convert_version(data.NUTSsubset.classified,2021,variablesVector)
data.NUTSsubset.2021 <- data.NUTSsubset.2021[,-which(names(data.NUTSsubset.2021)=='country')]#remove column "country" to work around a bug in the next line
data.NUTSsubset.2021.classified <- nuts_classify(data.NUTSsubset.2021,'to_code')
# aggregate the valid rows to nuts2
data.NUTSsubset.NUTS2 <- nuts_aggregate(data.NUTSsubset.2021.classified,2,variablesVector)
data.NUTSsubset.NUTS2 <- data.NUTSsubset.NUTS2[,-which(names(data.NUTSsubset.NUTS2)=='country')]
# drop empty rows, this should be just the FRY, france's overseas territories
if(sum(complete.cases(data.NUTSsubset.NUTS2))==nrow(data.NUTSsubset.NUTS2)){
stop('too many incomplete cases\n')
} else {
data.NUTSsubset.NUTS2 <- data.NUTSsubset.NUTS2[complete.cases(data.NUTSsubset.NUTS2),]
}
names(data.NUTSsubset.NUTS2)[1] <- 'NUTS2'
# deal with the rows that where omitted above
data.NUTSsubset.naVersion$nuts2 <- substring(data.NUTSsubset.naVersion$from_code,1,4)
sectorCols <- grep(sectorColPattern,names(data.NUTSsubset.naVersion),perl = T)
data.NUTSsubset.naVersion.NUTS2 <- aggregate(data.NUTSsubset.naVersion[,c(sectorCols)],
by = list(Category=data.NUTSsubset.naVersion$nuts2),FUN=sum)
names(data.NUTSsubset.naVersion.NUTS2)[1] <- 'NUTS2'
# merge the two
data.NUTSsubset.NUTS2 <- rbind(data.NUTSsubset.naVersion.NUTS2,data.NUTSsubset.NUTS2)
# add the country codes and region names
data.NUTSsubset.NUTS2$CNTR_CODE <- substring(data.NUTSsubset.NUTS2$NUTS2,1,2)
data.NUTSsubset.NUTS2 <- merge(data.NUTSsubset.NUTS2,nutsSheet[,c('Code 2021','NUTS level 2')],by.x='NUTS2',by.y='Code 2021',all.x=T)
names(data.NUTSsubset.NUTS2)[names(data.NUTSsubset.NUTS2)=='NUTS level 2'] <- 'NAME'
numCol <- ncol(data.NUTSsubset.NUTS2)
data.NUTSsubset.NUTS2 <- data.NUTSsubset.NUTS2[,c(1,numCol-1,numCol, 2:(numCol-2))]
codesCNT <- codes[,-1]
codesCNT <- codesCNT[!duplicated(codesCNT),]
data.NUTS2 <- merge(codesCNT,data.NUTSsubset.NUTS2)
return(data.NUTS2)
}
......@@ -8,20 +8,20 @@
#
# Parameters
# data The shock impacts specifying the capital destroyed
# stocksNUTS3 The sectoral stocks of capital at NUTS3 level
# stocksCNT The sectoral stocks of capital at country level
# stocks The sectoral stocks of capital at NUTS3 level, NUTS2 level, or at country
# level depending on aggregation level
# aggregationLevel one of 'CNT','NUTS2','NUTS3'
# sectorColPattern the matching string for the sector names
#
# Returns
# shock data relative to the stocks
#
# Benjamin Blanz 2024
#
relData <- function(data,stocksNUTS3=NULL,stocksCNT=NULL){
sectorColPattern <- 'ALL|TOTAL|^[A-Z]$|AGR|MIN|MFG|EGW|CNS|TRD|OTP|WTP|CMN|OFI|OBS|REA|PUB|OSG|agr|coa.oil.gas|pro|ely.elc|ser|air.wtp.tran'
relData <- function(data,stocks,aggregationLevel,sectorColPattern){
# Ensure the Total clumn is called TOTAL, not ALL as in some scenarios
names(data)[names(data)=='ALL'] <- 'TOTAL'
names(stocksNUTS3)[names(stocksNUTS3)=='ALL'] <- 'TOTAL'
names(stocksCNT)[names(stocksCNT)=='ALL'] <- 'TOTAL'
names(stocks)[names(stocks)=='ALL'] <- 'TOTAL'
# Identify the columns with data (not the columns with country or idx).
sectorCols <- grep(sectorColPattern,names(data),perl = T)
# prepare an empty data set for the relative data, keeping the index columns
......@@ -29,17 +29,16 @@ relData <- function(data,stocksNUTS3=NULL,stocksCNT=NULL){
data.rel[,sectorCols] <- NA
for(i in 1:nrow(data)){
# identify the correct row in the stocks
if('fid4' %in% names(data)){
stocks <- stocksNUTS3
if(aggregationLevel=='NUTS3'){
rowInStocks <- which(stocks$fid4 == data$fid4[i])
} else if(aggregationLevel=='NUTS2'){
rowInStocks <- which(stocks$NUTS2 == data$NUTS2[i])
} else if (nchar(data$CNTR_CODE[i])==2){
stocks <- stocksCNT
rowInStocks <- which(stocks$CNTR_CODE_Eurostat == data$CNTR_CODE[i])
if(length(rowInStocks)==0){
rowInStocks <- which(stocks$CNTR_CODE_iso2== data$CNTR_CODE[i])
}
} else if (nchar(data$CNTR_CODE[i])==3){
stocks <- stocksCNT
rowInStocks <- which(stocks$CNTR_CODE_iso3 == data$CNTR_CODE[i])
}
if(length(rowInStocks)==0){
......
......@@ -7,12 +7,18 @@
source('funRelData.R')
nuts3LevelStocksNACE <- read.csv("helperData/nuts3LevelStocksNACE.csv", row.names=NULL)
countryLevelStocksNACE <- read.csv("helperData/countryLevelStocksNACE.csv", row.names=NULL)
nuts3LevelStocksGTAP <- read.csv("helperData/nuts3LevelStocksGTAP.csv", row.names=NULL)
countryLevelStocksGTAP <- read.csv("helperData/countryLevelStocksGTAP.csv", row.names=NULL)
nuts3LevelStocksGRACE <- read.csv("helperData/nuts3LevelStocksGRACE.csv", row.names=NULL)
countryLevelStocksGRACE <- read.csv("helperData/countryLevelStocksGRACE.csv", row.names=NULL)
sectorColPattern <- 'ALL|TOTAL|^[A-Z]$|AGR|MIN|MFG|EGW|CNS|TRD|OTP|WTP|CMN|OFI|OBS|REA|PUB|OSG|agr|coa-oil-gas|coa\\.oil\\.gas|pro|ely-elc|ely\\.elc|ser|air-wtp-tran|air\\.wtp\\.tran'
stocks <- list()
stocks$NACE <- list(NUTS3 = read.csv("helperData/nuts3LevelStocksNACE.csv", row.names=NULL),
NUTS2 = read.csv("helperData/nuts2LevelStocksNACE.csv", row.names=NULL),
CNT = read.csv("helperData/countryLevelStocksNACE.csv", row.names=NULL))
stocks$GTAP <- list(NUTS3 = read.csv("helperData/nuts3LevelStocksGTAP.csv", row.names=NULL),
NUTS2 = read.csv("helperData/nuts2LevelStocksGTAP.csv", row.names=NULL),
CNT = read.csv("helperData/countryLevelStocksGTAP.csv", row.names=NULL))
stocks$GRACE <- list(NUTS3 = read.csv("helperData/nuts3LevelStocksGRACE.csv", row.names=NULL),
NUTS2 = read.csv("helperData/nuts2LevelStocksGRACE.csv", row.names=NULL),
CNT = read.csv("helperData/countryLevelStocksGRACE.csv", row.names=NULL))
files <- list.files('scenarios',pattern = 'csv',recursive = T)
......@@ -22,16 +28,20 @@ for(f.i in 1:length(files)){
file <- files[f.i]
cat(sprintf('%i of %i %s\n',f.i, length(files),file))
data <- read.csv(file,row.names=NULL)
sectorType <- 'NACE'
if(grepl('GTAP',file)){
data.rel <- relData(data,nuts3LevelStocksGTAP,countryLevelStocksGTAP)
write.csv(data.rel,gsub('.csv','-rel.csv',file),row.names = F)
sectorType <- 'GTAP'
} else if (grepl('GRACE',file)) {
data.rel <- relData(data,nuts3LevelStocksGRACE,countryLevelStocksGRACE)
write.csv(data.rel,gsub('.csv','-rel.csv',file),row.names = F)
} else {
data.rel <- relData(data,nuts3LevelStocksNACE,countryLevelStocksNACE)
write.csv(data.rel,gsub('.csv','-rel.csv',file),row.names = F)
sectorType <- 'GRACE'
}
aggType <- 'NUTS3'
if(grepl('aggCNT',file) | (!'fid4' %in% names(data) & !'NUTS2' %in% names(data))){
aggType <- 'CNT'
} else if(grepl('aggNUTS2',file)){
aggType <- 'NUTS2'
}
data.rel <- relData(data,stocks[[sectorType]][[aggType]],aggType,sectorColPattern)
write.csv(data.rel,gsub('.csv','-rel.csv',file),row.names = F)
}
......@@ -15,6 +15,7 @@ files <- list.files('scenarios',pattern = 'NACE.csv',recursive = T)
files <- paste0('scenarios/',files)
files[length(files)+1] <- 'helperData/countryLevelStocksNACE.csv'
files[length(files)+1] <- 'helperData/nuts3LevelStocksNACE.csv'
files[length(files)+1] <- 'helperData/nuts2LevelStocksNACE.csv'
# Mapping of sectors
for (file in files){
......@@ -31,8 +32,8 @@ for (file in files){
GRACEcodes <- unique(sector_mapping$`GRACE Code`)
for(GRACEcode in GRACEcodes){
dataGRACE[[GRACEcode]] <- NA
rows <- which(sector_mapping$`GRACE Code` == GRACEcode)
NACEcodes <- strsplit(paste(sector_mapping$`NACE Code`[rows],collapse = '.'),'\\.')[[1]]
col <- which(sector_mapping$`GRACE Code` == GRACEcode)
NACEcodes <- strsplit(paste(sector_mapping$`NACE Code`[col],collapse = '.'),'\\.')[[1]]
if(sum(NACEcodes %in% colnames(data))==length(NACEcodes)){
if(length(NACEcodes)>1){
dataGRACE[[GRACEcode]] <- rowSums(data[,NACEcodes])
......
......@@ -13,6 +13,7 @@ files <- list.files('scenarios',pattern = 'NACE.csv',recursive = T)
files <- paste0('scenarios/',files)
files[length(files)+1] <- 'helperData/countryLevelStocksNACE.csv'
files[length(files)+1] <- 'helperData/nuts3LevelStocksNACE.csv'
files[length(files)+1] <- 'helperData/nuts2LevelStocksNACE.csv'
# Mapping of sectors
for (file in files){
......
......@@ -7,7 +7,7 @@
# Benjamin Blanz 2024
#
sectorColPattern <- 'ALL|TOTAL|^[A-Z]$|AGR|MIN|MFG|EGW|CNS|TRD|OTP|WTP|CMN|OFI|OBS|REA|PUB|OSG|agr|coa-oil-gas|pro|ely-elc|ser|air-wtp-tran'
sectorColPattern <- 'ALL|TOTAL|^[A-Z]$|AGR|MIN|MFG|EGW|CNS|TRD|OTP|WTP|CMN|OFI|OBS|REA|PUB|OSG|agr|coa-oil-gas|coa\\.oil\\.gas|pro|ely-elc|ely\\.elc|ser|air-wtp-tran|air\\.wtp\\.tran'
# read scenario file with NUTS3 data ####
library(readxl)
......@@ -35,7 +35,7 @@ stocksNUTS3$CNTR_NAME <- countrycode(stocksNUTS3$CNTR_CODE_iso3,'iso3c','iso.nam
stocksNUTS3$CNTR_CODE_Eurostat <- countrycode(stocksNUTS3$CNTR_CODE_iso3,'iso3c','eurostat')
stocksNUTS3$CNTR_CODE_iso2 <- countrycode(stocksNUTS3$CNTR_CODE_iso3,'iso3c','iso2c')
stocksNUTS3 <- stocksNUTS3[,c(1:5,29,30,31,28,6:27)]
write.csv(stocksNUTS3,file = 'helperData/nuts3LevelStocks.csv',row.names=F)
write.csv(stocksNUTS3,file = 'helperData/nuts3LevelStocksNACE.csv',row.names=F)
codes <- stocksNUTS3[,1:9]
write.csv(codes,file = 'helperData/nuts3fid4Codes.csv',row.names=F)
sink('helperData/nuts3LevelStocksMetadata.csv')
......@@ -65,7 +65,7 @@ stocksCNT$CNTR_NAME <- countrycode(stocksCNT$CNTR_CODE_iso3,'iso3c','iso.name.en
stocksCNT$CNTR_CODE_Eurostat <- countrycode(stocksCNT$CNTR_CODE_iso3,'iso3c','eurostat')
stocksCNT$CNTR_CODE_iso2 <- countrycode(stocksCNT$CNTR_CODE_iso3,'iso3c','iso2c')
stocksCNT <- stocksCNT[,c(25,26,27,24,2:23)]
write.csv(stocksCNT,file = 'helperData/countryLevelStocks.csv',row.names = F)
write.csv(stocksCNT,file = 'helperData/countryLevelStocksNACE.csv',row.names = F)
sink('helperData/countryLevelStocksMetadata.csv')
cat(sprintf('Unit %s,,\n',stockUnit))
cat(sprintf('Labels,, \n'))
......@@ -75,3 +75,22 @@ for(n in colnames(stocksNUTS3)[sectorCols]){
cat(sprintf('"%s", "%s", "%s"\n',n,stockLabels[n],stockTypes[n]))
}
sink()
# aggregate to NUTS2 level ####
library(nuts)
rlang::local_options(nuts.verbose = "quiet")
nutsSheet <- read_excel("helperData/NUTS2021.xlsx",
sheet = "NUTS & SR 2021", range = "A1:H2125")
stocksNUTS2 <- aggregateNUTS3ToNUTS2(stocksNUTS3,
codes[,c('fid4','CNTR_CODE','CNTR_NAME','CNTR_CODE_iso2','CNTR_CODE_iso3','CNTR_CODE_Eurostat')],
nutsSheet,sectorColPattern)
write.csv(stocksNUTS2,file = 'helperData/nuts2LevelStocksNACE.csv',row.names = F)
sink('helperData/countryLevelStocksMetadata.csv')
cat(sprintf('Unit %s,,\n',stockUnit))
cat(sprintf('Labels,, \n'))
cat(sprintf('Sector,Label,Type\n'))
sectorCols <- grep(sectorColPattern,names(stocksNUTS2),perl = T)
for(n in colnames(stocksNUTS2)[sectorCols]){
cat(sprintf('"%s", "%s", "%s"\n',n,stockLabels[n],stockTypes[n]))
}
sink()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment