r - 将预测的时间序列与 R 中的原始序列重叠

标签 r dataframe ggplot2 time-series forecasting

我进行预测

w=read.csv("C:/Users/admin/Documents/aggrmonth.csv", sep=";",dec=",")
w
#create time series object
w=ts(w$new,frequency = 12,start=c(2015,1)) 
w
#timeplot
plot.ts(w)

#forecast for the next months
library("forecast")
m <- stats::HoltWinters(w)
test=forecast:::forecast.HoltWinters(m,h=4) #h is how much month do you want to predict
test

现在我想提前 4 个月得到预测。
从 01.2017-04.2017。我这知道原始值。
1-Jan-17    1020
1-Feb-17    800
1-Mar-17    1130
1-Apr-17    600

但我需要得到图,其中显示的 CI 预测值与原始值重叠。
当然,如果我没有清楚地解释,我附上了情节。
needed plot
绿色曲线是系列的初始值(我的4个月)
绿色虚线是预测值与原始值重叠。
预测虚线上的虚线是置信区间。

如何创建这样的情节
w=

structure(list(yearMon = structure(c(9L, 7L, 15L, 1L, 17L, 13L, 
11L, 3L, 23L, 21L, 19L, 5L, 10L, 8L, 16L, 2L, 18L, 14L, 12L, 
4L, 24L, 22L, 20L, 6L), .Label = c("1-Apr-15", "1-Apr-16", "1-Aug-15", 
"1-Aug-16", "1-Dec-15", "1-Dec-16", "1-Feb-15", "1-Feb-16", "1-Jan-15", 
"1-Jan-16", "1-Jul-15", "1-Jul-16", "1-Jun-15", "1-Jun-16", "1-Mar-15", 
"1-Mar-16", "1-May-15", "1-May-16", "1-Nov-15", "1-Nov-16", "1-Oct-15", 
"1-Oct-16", "1-Sep-15", "1-Sep-16"), class = "factor"), new = c(8575L, 
8215L, 16399L, 16415L, 15704L, 19805L, 17484L, 18116L, 19977L, 
14439L, 9258L, 12259L, 4909L, 9539L, 8802L, 11253L, 11971L, 7838L, 
2095L, 4157L, 3910L, 1306L, 3429L, 1390L)), .Names = c("yearMon", 
"new"), class = "data.frame", row.names = c(NA, -24L))

最佳答案

我们可以使用 ggfortify创建一个数据框,然后用 ggplot2 绘制两个时间序列

# Load required libraries
library(lubridate)
library(magrittr)
library(tidyverse)
library(scales)
library(forecast)
library(ggfortify)

w <- structure(list(yearMon = structure(c(9L, 7L, 15L, 1L, 17L, 13L, 
  11L, 3L, 23L, 21L, 19L, 5L, 10L, 8L, 16L, 2L, 18L, 14L, 12L, 
  4L, 24L, 22L, 20L, 6L), .Label = c("1-Apr-15", "1-Apr-16", "1-Aug-15", 
  "1-Aug-16", "1-Dec-15", "1-Dec-16", "1-Feb-15", "1-Feb-16", "1-Jan-15", 
  "1-Jan-16", "1-Jul-15", "1-Jul-16", "1-Jun-15", "1-Jun-16", "1-Mar-15", 
  "1-Mar-16", "1-May-15", "1-May-16", "1-Nov-15", "1-Nov-16", "1-Oct-15", 
  "1-Oct-16", "1-Sep-15", "1-Sep-16"), class = "factor"), new = c(8575L, 
  8215L, 16399L, 16415L, 15704L, 19805L, 17484L, 18116L, 19977L, 
  14439L, 9258L, 12259L, 4909L, 9539L, 8802L, 11253L, 11971L, 7838L, 
  2095L, 4157L, 3910L, 1306L, 3429L, 1390L)), .Names = c("yearMon", 
  "new"), class = "data.frame", row.names = c(NA, -24L))

# create time series object
w = ts(w$new, frequency = 12, start=c(2015, 1)) 
w

#>        Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov
#> 2015  8575  8215 16399 16415 15704 19805 17484 18116 19977 14439  9258
#> 2016  4909  9539  8802 11253 11971  7838  2095  4157  3910  1306  3429
#>        Dec
#> 2015 12259
#> 2016  1390

# forecast for the next months
m <- stats::HoltWinters(w)

# h is how much month do you want to predict
pred = forecast:::forecast.HoltWinters(m, h=4) 
pred

#>          Point Forecast     Lo 80     Hi 80      Lo 95     Hi 95
#> Jan 2017    -5049.00381 -9644.003 -454.0045 -12076.449  1978.441
#> Feb 2017       37.44605 -5599.592 5674.4843  -8583.660  8658.552
#> Mar 2017     -256.41474 -6770.890 6258.0601 -10219.444  9706.615
#> Apr 2017     2593.09445 -4693.919 9880.1079  -8551.431 13737.620

# plot
plot(pred, include = 24, showgap = FALSE)



# Convert pred from list to data frame object
df1 <- fortify(pred) %>% as_tibble()

# Create Date column, remove Index column and rename other columns 
df1 %<>% 
  mutate(Date = as.Date(Index, "%Y-%m-%d")) %>% 
  select(-Index) %>% 
  rename("Low95" = "Lo 95",
         "Low80" = "Lo 80",
         "High95" = "Hi 95",
         "High80" = "Hi 80",
         "Forecast" = "Point Forecast")
df1

#> # A tibble: 28 x 8
#>     Data Fitted Forecast Low80 High80 Low95 High95 Date      
#>    <int>  <dbl>    <dbl> <dbl>  <dbl> <dbl>  <dbl> <date>    
#>  1  8575     NA       NA    NA     NA    NA     NA 2015-01-01
#>  2  8215     NA       NA    NA     NA    NA     NA 2015-02-01
#>  3 16399     NA       NA    NA     NA    NA     NA 2015-03-01
#>  4 16415     NA       NA    NA     NA    NA     NA 2015-04-01
#>  5 15704     NA       NA    NA     NA    NA     NA 2015-05-01
#>  6 19805     NA       NA    NA     NA    NA     NA 2015-06-01
#>  7 17484     NA       NA    NA     NA    NA     NA 2015-07-01
#>  8 18116     NA       NA    NA     NA    NA     NA 2015-08-01
#>  9 19977     NA       NA    NA     NA    NA     NA 2015-09-01
#> 10 14439     NA       NA    NA     NA    NA     NA 2015-10-01
#> # ... with 18 more rows

### Avoid the gap between data and forcast
# Find the last non missing NA values in obs then use that
# one to initialize all forecast columns
lastNonNAinData <- max(which(complete.cases(df1$Data)))
df1[lastNonNAinData, 
    !(colnames(df1) %in% c("Data", "Fitted", "Date"))] <- df1$Data[lastNonNAinData]

ggplot(df1, aes(x = Date)) + 
  geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
  geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
  geom_point(aes(y = Data, colour = "Data"), size = 4) +
  geom_line(aes(y = Data, group = 1, colour = "Data"), 
            linetype = "dotted", size = 0.75) +
  geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
  geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
  scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
  scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
  scale_fill_brewer(name = "Intervals") +
  guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
  theme_bw(base_size = 14)



编辑:包含从“2017-01-01”到“2017-04-01”的已知值

# Create new column which has known values
df1$Obs <- NA
df1$Obs[(nrow(df1)-3):(nrow(df1))] <- c(1020, 800, 1130, 600)

ggplot(df1, aes(x = Date)) + 
  geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
  geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
  geom_point(aes(y = Data, colour = "Data"), size = 4) +
  geom_line(aes(y = Data, group = 1, colour = "Data"), 
            linetype = "dotted", size = 0.75) +
  geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
  geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
  scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
  scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
  scale_fill_brewer(name = "Intervals") +
  guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
  theme_bw(base_size = 14) +
  geom_line(aes(y = Obs, group = 4, colour = "Obs"), linetype = "dotted", size = 0.75)



或者将这些值直接放入 Data柱子

df1$Data[(nrow(df1)-3):(nrow(df1))] <- c(1020, 800, 1130, 600)

ggplot(df1, aes(x = Date)) + 
  geom_ribbon(aes(ymin = Low95, ymax = High95, fill = "95%")) +
  geom_ribbon(aes(ymin = Low80, ymax = High80, fill = "80%")) +
  geom_point(aes(y = Data, colour = "Data"), size = 3) +
  geom_line(aes(y = Data, group = 1, colour = "Data"), 
            linetype = "dotted", size = 0.75) +
  geom_line(aes(y = Fitted, group = 2, colour = "Fitted"), size = 0.75) +
  geom_line(aes(y = Forecast, group = 3, colour = "Forecast"), size = 0.75) +
  scale_x_date(breaks = scales::pretty_breaks(), date_labels = "%b %y") +
  scale_colour_brewer(name = "Legend", type = "qual", palette = "Dark2") +
  scale_fill_brewer(name = "Intervals") +
  guides(colour = guide_legend(order = 1), fill = guide_legend(order = 2)) +
  theme_bw(base_size = 14)



创建于 2018-04-21 由 reprex package (v0.2.0)。

关于r - 将预测的时间序列与 R 中的原始序列重叠,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/49957203/

相关文章:

r - 无法在 WSL Ubuntu 18.04 上安装 R Shiny Server

r - 在没有标签的约束排序中绘制向量

r - 向 ggplot 图形添加文件名或其他注释

python - 如何根据多列的值拆分数据框

r - 在数据框中使用 Ifelse

pandas - Pandas 数据框中 groupby.agg 的 N 最大/idxmax

r - 差异图

r - 更改 geom_ribbon() 默认路径顺序

r - 在 x 轴上的日期之间提供阴影

r - 如何根据数量更改ggplot中每个方面的xlim或比例?