本文介绍了根据定义的 i 创建具有动态列名和重复值的循环的处理方法,对大家解决问题具有一定的参考价值,需要的朋友们下面随着小编来一起学习吧!
问题描述
我有以下数据框:
id <- c("A", "B", "C") col1 <- c(1, 3, 5) col2 <- c(6, 12, 9) col3 <- c(2, 4, 30) df <- data.frame(id, col1, col2, col3)从本质上讲,我希望每个i都被20、25、30、35、40取代.此循环有效,但是非常非常慢.
Essentially, I want every i to be replaced by 20, 25, 30, 35, 40. This loop works but it works very, very slowly.
library(dplyr) library(tibble) library(foreach) library(tidyverse) library(purrr) id <- c("A", "B", "C") col1 <- c(1, 3, 5) col2 <- c(6, 12, 9) col3 <- c(2, 4, 30) df <- data.frame(id, col1, col2, col3) vals <- c(seq(from=20, to=40, by=5)) final <- foreach(i = vals, bine='cbind') %do% { # if cell is greater than i, then code 0 df_2 <- df %>% mutate(across(starts_with("col"), ~ +(. < i))) # transpose the dataset rownames(df_2) <- df_2$id df_2$id <- NULL df_2_t <- as.data.frame(t(df_2)) # sum the rows df_2_t <- cbind(id = rownames(df_2_t), df_2_t) rownames(df_2_t) <- 1:nrow(df_2_t) df_2_t <- df_2_t %>% mutate(sum = rowSums(.[2:ncol(.)])) # merge a new column id2 <- c("col1", "col2", "col3") D <- c(3, 4, 5) id_d <- data.frame(id2, D) df_2_t_d <- left_join(df_2_t, id_d, by = c("id" = "id2")) # divide D by the number of letters (there are 3 letter columns -- A, B, C) df_2_t_d$letters <- rep(3) df_2_t_d <- df_2_t_d %>% mutate(frac = D/letters) # recode all 1s to the frac letters <- grep("^A|^B|^C", names(df_2_t_d)) df_2_t_d[letters] <- apply(df_2_t_d[letters], 2, function(x) ifelse(x == 1, df_2_t_d$frac, 0)) # drop two columns df_2_t_d <- select(df_2_t_d, -c(D, letters)) # transpose again rownames(df_2_t_d) <- df_2_t_d$id df_2_t_d$id <- NULL df_2_t_d2 <- as.data.frame(t(df_2_t_d)) df_2_t_d2_sum <- df_2_t_d2 %>% mutate(rowSums(.[1:3])) %>% transmute(!!paste0('sum_', i) := rowSums(select(., starts_with('col')))) } df_2_t_d2 <- cbind(list_name = rownames(df_2_t_d2), df_2_t_d2) rownames(df_2_t_d2) <- 1:nrow(df_2_t_d2) df_2_t_d2 <- select(df_2_t_d2, list_name) abc <- cbind(df_2_t_d2, df_2_t_d2_sum) View(abc)如果有任何方法可以加快速度,欢迎提出建议!
If there's any way to speed it up, suggestions are welcome!
推荐答案这是执行此操作的一种方法 map_dfc :
Here's a way to do this map_dfc :
library(dplyr) library(purrr) vals <- seq(from=20, to=40, by=5) bind_cols( df, map_dfc(vals, function(x) df %>% mutate(across(starts_with("col"), ~ +(. < x))) %>% transmute(!!paste0('sum_', x) := rowSums(select(., starts_with('col'))))))或在基数R中:
cols <- grep('col', names(df)) df[paste0('sum_', vals)] <- lapply(vals, function(x) rowSums(+(df[cols] < x))) df # id col1 col2 col3 sum_20 sum_25 sum_30 sum_35 sum_40 #1 A 1 6 2 3 3 3 3 3 #2 B 3 12 4 3 3 3 3 3 #3 C 5 9 30 2 2 2 3 3更多推荐
根据定义的 i 创建具有动态列名和重复值的循环
发布评论