R - ggmap - 通过地理编码计算城市之间的最短距离

标签 r list dataframe geocoding ggmap

我有一个城市和相关信息的列表,我已经放在一个数据框中,如下所示:

library(plyr)
library(dplyr)
library(ggmap)
library(Imap)

cities <- c("washington, dc", "wilmington, de", "amarillo, tx", 
            "denver, co", "needham, ma", "philadelphia, pa", 
            "doylestown, pa", "galveston, tx", "tuscaloosa, al", 
            "hollywood, fl"
            )

id <- c(156952, 154222, 785695, 154423, 971453, 149888, 1356987,
        178946, 169944, 136421)

month <- c(201811, 201811, 201912, 201912, 202005, 202005, 
           202005, 202106, 202106, 202106 )

category<- c("home", "work", "home", "home", "home", "work",
             "cell", "home", "work", "cell")

places <- data.frame(cities, id, category, month)
使用 Imapggmap 包,我可以检索每个城市的经度和纬度:
lat <- geocode(location = places$cities, source = "google")$lat
lon <- geocode(location = places$cities, source = "google")$lon

places <- cbind(places, lat, lon)
我想做的是以下内容:
  • 按月和类别计算每个城市之间的距离
  • places
  • 的不同列中返回第二个最短距离和相应的城市和 id

    我写了一个 for 循环来计算距离:
    for (i in 1:nrow(places)) {
    
    
    
    
    dist_list[[i]] <- gdist(lon.1 = places$lon[i], 
                              lat.1 = places$lat[i], 
                              lon.2 = places$lon, 
                              lat.2 = places$lat,
                              units="miles")
      
    }
    
    产生以下数据:
    dput(dist_list)
    list(c(0, 98.3464717885451, 1386.25425677199, 1489.87718040776, 
    383.083760289456, 123.232894969413, 140.284537078237, 1209.23510542932, 
    706.670452283757, 906.79542720295), c(98.4762434610638, 0, 1472.06660056474, 
    1560.93398322985, 285.23618862797, 24.9195071209828, 44.8853561530985, 
    1308.60741637919, 805.755084908157, 983.102810248198), c(1389.07354011351, 
    1472.06660056474, 0, 356.573530670257, 1712.29111612461, 1493.39302974566, 
    1497.2125164277, 579.329313217289, 827.577713357261, 1434.82691622332
    ), c(1492.80130415651, 1560.93398322985, 356.573530670257, 0, 
    1761.3773163288, 1578.71125031146, 1576.80713231756, 923.725006795209, 
    1067.04809350934, 1717.32991551111), c(383.551997010915, 285.23618862797, 
    1712.29111612461, 1761.3773163288, 0, 260.382178510916, 243.947043197789, 
    1588.85470703957, 1088.38640303169, 1230.47219244291), c(123.395655314093, 
    24.9195071209827, 1493.39302974566, 1578.71125031146, 260.382178510916, 
    0, 24.7382114555287, 1333.29925285915, 830.581742827321, 1002.94777739349
    ), c(140.431447025301, 44.8853561530986, 1497.2125164277, 1576.80713231756, 
    243.947043197789, 24.7382114555285, 0, 1346.44527983873, 844.827513981938, 
    1026.98263808807), c(1211.16392416136, 1308.60741637919, 579.329313217289, 
    923.725006795209, 1588.85470703957, 1333.29925285915, 1346.44527983873, 
    0, 505.292529136012, 925.512554201542), c(707.73957320737, 805.755084908157, 
    827.577713357261, 1067.04809350934, 1088.38640303169, 830.581742827321, 
    844.827513981938, 505.292529136012, 0, 666.837848781548), c(906.880841903584, 
    983.102810248198, 1434.82691622332, 1717.32991551111, 1230.47219244291, 
    1002.94777739349, 1026.98263808807, 925.512554201542, 666.837848781548, 
    0))
    
    所需的结果如下所示(第一行):
    cities          id         category  month      lat        lon   min.dist  closest city  closest city id  
    washington, dc  156952     home      201811 38.90719  -77.03687   98.34647  wilmington, de  154222 
    
    通过 nth 中的 Rfast 函数,我可以获得第二小的距离nth(dist_list[[1]], 2)我遇到的问题是我不知道如何将列表中的信息连接到 df places 。任何帮助或建议将不胜感激。

    最佳答案

    # get min distance:
    min_d <- sapply(dist_list, function(x) sort(x)[2])
    places$min_dist <- min_d
    # index:
    i <- sapply(dist_list, function(x) which(sort(x)[2] == x))
    # add name:
    places$min_name <- places$cities[i]
    
    分组:
    # prepare dist matrix outside loop
    m <- t(as.data.frame(dist_list))
    row.names(m) <- NULL
    diag(m) <- NA
    
    # create grouping variable:
    gv <- as.integer(factor(places$month)) # or:
    # gv <- as.integer(factor(paste(places$month, places$category)))
    
    # set distance to NA if not in relevant group:
    i <- sapply(gv, function(x) gv == x)
    m[!i] <- NA
    
    l <- sapply(as.data.frame(t(m)), function(x) {
      if (all(is.na(x))) return(list(NA, NA))
      mv <- min(x, na.rm = T)
      i <- which(x == mv)
      list(mv, i)
    })
    l
    places <- cbind(places, min_dist = unlist(l[1, ]), min_nr = unlist(l[2, ]))
    
    places$min_name <- places$cities[places$min_nr] # add name
    places$min_id <- places$id[places$min_nr] # add id
    places
    
    结果:
                  cities      id category  month  min_dist min_nr         min_name  min_id
    V1    washington, dc  156952     home 201811  98.34647      2   wilmington, de  154222
    V2    wilmington, de  154222     work 201811  98.47624      1   washington, dc  156952
    V3      amarillo, tx  785695     home 201912 356.57353      4       denver, co  154423
    V4        denver, co  154423     home 201912 356.57353      3     amarillo, tx  785695
    V5       needham, ma  971453     home 202005 243.94704      7   doylestown, pa 1356987
    V6  philadelphia, pa  149888     work 202005  24.73821      7   doylestown, pa 1356987
    V7    doylestown, pa 1356987     cell 202005  24.73821      6 philadelphia, pa  149888
    V8     galveston, tx  178946     home 202106 505.29253      9   tuscaloosa, al  169944
    V9    tuscaloosa, al  169944     work 202106 505.29253      8    galveston, tx  178946
    V10    hollywood, fl  136421     cell 202106 666.83785      9   tuscaloosa, al  169944
    

    关于R - ggmap - 通过地理编码计算城市之间的最短距离,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/68037239/

    相关文章:

    r - 如何在 R 中创建相关矩阵?

    python - 将列表中长度不等的值追加到字典中

    python-3.x - 如何使用IQR从DataFrame中删除异常值?

    python - 如何从具有相同 x 值的多类标签生成二元向量

    python - 如何在Python的条件下增加数据帧列?

    r - ggplot2 geom_line 图表输出质量很差。时间序列线看起来很模糊

    r - 将基本 r 图保存为可以在多图中绘制的对象

    r - 用聚合解决 ddply 任务的优雅方法(希望有更好的性能)

    list - Scala 中嵌套列表的深度反转

    python - Python 中的列表索引