I have two tables. table1 looks like this
date hour data
2010-05-01 3 5
2010-05-02 7 7
2010-05-02 10 8
2010-07-03 18 3
2011-12-09 22 1
2012-05-01 3 0
This is stored as a data.table with key set on date and hour.
I have another table, that looks like this. It's my outages table.
resource date_out date_back
joey 2010-04-30 4:00:00 2010-05-02 8:30:00
billy 2009-04-20 7:00:00 2009-02-02 5:30:00
bob 2011-11-15 12:20:00 2010-12-09 23:00:00
joey 2012-04-28 1:00:00 2012-05-02 17:00:00
I want to add columns to table1 where those columns are the resource from the outages table. I want the values in those columns to be 0 for whenever there isn't an outage and 1 for when there is.
The result for this example should be.
date hour data joey billy bob
2010-05-01 3 5 1 0 0
2010-05-02 7 7 1 0 0
2010-05-02 10 8 0 0 0
2010-07-03 18 3 0 0 0
2011-12-09 22 1 0 0 1
2012-05-01 3 0 1 0 0
In actuality my table1 has about 2500 rows and my outages table has 19000. The only way I could think to do this is to loop through each row of the outages table and then insert 1s into table1 in the correct places. My code relies on table1 being in order so at least it doesn't have to scan 100% of that table for every row of outages. However the below takes over 4 hours for my data.
for (out in 1:length(outages$resource)) {
a<-as.character(outages[out]$resource)
#if column doesn't exist then create it
if (a %in% colnames(table1)==FALSE) {
table1$new<-0
setnames(table1, "new", a)
}
midpoint<-round(length(table1$date)/2,0)
if (table1$date[midpoint]+table1$hour[midpoint]*60*60>=outages[out]$due_out && table1$date[midpoint]+table1$hour[midpoint]*60*60<=outages [out]$due_back)
{
while(table1$date[midpoint]+table1$hour[midpoint]*60*60>=outages[out]$due_out && midpoint>=1 && midpoint<=length(table1$date)) {
table1[midpoint,a:=1,with=FALSE]
midpoint<-midpoint-1
}
midpoint<-round(length(table1$date)/2,0)
while(table1$date[midpoint]+table1$hour[midpoint]*60*60<=outages[out]$due_back && midpoint>=1 && midpoint<=length(table1$date)) {
table1[midpoint,a:=1,with=FALSE]
midpoint<-midpoint+1
}
} else {
if (table1$date[midpoint]+table1$hour[midpoint]*60*60>outages[out]$due_back) {
while(table1$date[midpoint]+table1$hour[midpoint]*60*60>outages[out]$due_back && midpoint>=1 && midpoint<=length(table1$date)) {
midpoint<-midpoint-1
}
while(table1$date[midpoint]+table1$hour[midpoint]*60*60>=outages[out]$due_out && midpoint>=1 && midpoint<=length(table1$date)) {
table1[midpoint,a:=1,with=FALSE]
midpoint<-midpoint-1
}
}
midpoint<-round(length(table1$date)/2,0)
if (table1$date[midpoint]+table1$hour[midpoint]*60*60<outages[out]$due_out) {
while(table1$date[midpoint]+table1$hour[midpoint]*60*60<outages[out]$due_out && midpoint>=1 && midpoint<=length(table1$date)) {
midpoint<-midpoint+1
}
while(table1$date[midpoint]+table1$hour[midpoint]*60*60<=outages[out]$due_back && midpoint>=1 && midpoint<=length(table1$date)) {
table1[midpoint,a:=1,with=FALSE]
midpoint<-midpoint+1
}
}
}
if (sum(table1[,a,with=FALSE])==0) {
table1[,a:=NULL,with=FALSE]
}
}
To quote everybody's favorite infomercial line "There's got to be a better way".