We could use base R to do this - get the index of 'Freq' and 'SQR' columns ('i1', 'i2'), then get the max of the 'Freq' columns with pmax for each row, replace the 'SQR' corresponding columns where the value for 'Freq' columns is not max to NA, loop over the rows with apply (MARGIN = 1), remove the NA and paste the 'SQR' words. Create two new columns in 'df1'
i1 <- startsWith(names(df1), 'Freq')
i2 <- startsWith(names(df1), "SQR")
f1 <- function(x) {
if(all(is.na(x))) {
NA_character_
} else paste(na.omit(x), collapse = ",")
}
mx <- do.call(pmax, c(df1[i1], na.rm = TRUE))
wrd <- apply(replace(df1[i2], df1[i1] != mx, NA), 1, FUN = f1)
df1[c("MaxFreq", "SQR_word")] <- list(mx, wrd)
-output
> df1
GUID SESSION_SKEY SEQNUM parent_uid SQR_01 SQR_02 SQR_03 SQR_04 SQR_05 SQR_06 SQR_07 SQR_08 Freq1 Freq2 Freq3 Freq4
1 004ce66617739f9705a73dd001dd5ff7 3.84e+13 56 2216028557 volkite culverin <NA> <NA> NA NA NA NA 3 2 NA NA
2 004ce66617739f9705a73dd001dd5ff7 3.84e+13 153 2216028557 contemptor dreadnought volkite <NA> NA NA NA NA 3 2 3 NA
3 004ce66617739f9705a73dd001dd5ff7 3.84e+13 217 2216028557 land raider prometheus <NA> NA NA NA NA 9 1 1 NA
4 004ce66617739f9705a73dd001dd5ff7 3.84e+13 12 2216028557 contemptor pattern volkite culverin NA NA NA NA 3 1 3 2
5 05f7cdbb17a0a45e3fcb79bfffc8817c 3.84e+13 250 1297482930 fake london genius <NA> NA NA NA NA 2 2 2 NA
6 0827fedf17611f9bede0fab001e6dcad 3.84e+13 62 72778457 teapot for one set NA NA NA NA 1 26 4 21
Freq5 Freq6 Freq7 Freq8 MaxFreq SQR_word
1 NA NA NA NA 3 volkite
2 NA NA NA NA 3 contemptor,volkite
3 NA NA NA NA 9 land
4 NA NA NA NA 3 contemptor,volkite
5 NA NA NA NA 2 fake,london,genius
6 NA NA NA NA 26 for
Or may use tidyverse to create the columns - reshape to 'long' format with pivot_longer and do a group by summarise to create the columns and then bind the columns with original data
library(dplyr)
library(tidyr)
library(stringr)
df1 %>%
mutate(rn = row_number()) %>%
dplyr::select(rn, starts_with("SQR"), starts_with("Freq")) %>%
rename_with(~ str_remove(., "_0?")) %>%
pivot_longer(cols = -rn, names_to = c(".value", "grp"),
names_sep = "(?<=\\D)(?=\\d)", values_drop_na = TRUE) %>%
group_by(rn) %>%
summarise(MaxFreq = max(Freq),
SQR_word = str_c(SQR[Freq == MaxFreq], collapse=",")) %>%
select(-rn) %>%
bind_cols(df1, .)
-output
GUID SESSION_SKEY SEQNUM parent_uid SQR_01 SQR_02 SQR_03 SQR_04 SQR_05 SQR_06 SQR_07 SQR_08 Freq1 Freq2 Freq3 Freq4
1 004ce66617739f9705a73dd001dd5ff7 3.84e+13 56 2216028557 volkite culverin <NA> <NA> NA NA NA NA 3 2 NA NA
2 004ce66617739f9705a73dd001dd5ff7 3.84e+13 153 2216028557 contemptor dreadnought volkite <NA> NA NA NA NA 3 2 3 NA
3 004ce66617739f9705a73dd001dd5ff7 3.84e+13 217 2216028557 land raider prometheus <NA> NA NA NA NA 9 1 1 NA
4 004ce66617739f9705a73dd001dd5ff7 3.84e+13 12 2216028557 contemptor pattern volkite culverin NA NA NA NA 3 1 3 2
5 05f7cdbb17a0a45e3fcb79bfffc8817c 3.84e+13 250 1297482930 fake london genius <NA> NA NA NA NA 2 2 2 NA
6 0827fedf17611f9bede0fab001e6dcad 3.84e+13 62 72778457 teapot for one set NA NA NA NA 1 26 4 21
Freq5 Freq6 Freq7 Freq8 MaxFreq SQR_word
1 NA NA NA NA 3 volkite
2 NA NA NA NA 3 contemptor,volkite
3 NA NA NA NA 9 land
4 NA NA NA NA 3 contemptor,volkite
5 NA NA NA NA 2 fake,london,genius
6 NA NA NA NA 26 for
data
df1 <- structure(list(GUID = c("004ce66617739f9705a73dd001dd5ff7", "004ce66617739f9705a73dd001dd5ff7",
"004ce66617739f9705a73dd001dd5ff7", "004ce66617739f9705a73dd001dd5ff7",
"05f7cdbb17a0a45e3fcb79bfffc8817c", "0827fedf17611f9bede0fab001e6dcad"
), SESSION_SKEY = c(3.84e+13, 3.84e+13, 3.84e+13, 3.84e+13, 3.84e+13,
3.84e+13), SEQNUM = c(56L, 153L, 217L, 12L, 250L, 62L), parent_uid = c(2216028557,
2216028557, 2216028557, 2216028557, 1297482930, 72778457), SQR_01 = c("volkite",
"contemptor", "land", "contemptor", "fake", "teapot"), SQR_02 = c("culverin",
"dreadnought", "raider", "pattern", "london", "for"), SQR_03 = c(NA,
"volkite", "prometheus", "volkite", "genius", "one"), SQR_04 = c(NA,
NA, NA, "culverin", NA, "set"), SQR_05 = c(NA, NA, NA, NA, NA,
NA), SQR_06 = c(NA, NA, NA, NA, NA, NA), SQR_07 = c(NA, NA, NA,
NA, NA, NA), SQR_08 = c(NA, NA, NA, NA, NA, NA), Freq1 = c(3L,
3L, 9L, 3L, 2L, 1L), Freq2 = c(2L, 2L, 1L, 1L, 2L, 26L), Freq3 = c(NA,
3L, 1L, 3L, 2L, 4L), Freq4 = c(NA, NA, NA, 2L, NA, 21L), Freq5 = c(NA,
NA, NA, NA, NA, NA), Freq6 = c(NA, NA, NA, NA, NA, NA), Freq7 = c(NA,
NA, NA, NA, NA, NA), Freq8 = c(NA, NA, NA, NA, NA, NA)), class = "data.frame", row.names = c(NA,
-6L))