R/partition.R


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

#' Subset an idframe data
#' 
#' Subsetting method for datas of class \code{idframe}
#' 
#' @param data an object of class \code{idframe}
#' @param indices the indices that need to be subsetted
#' @export
dataSlice <- function(data,indices){
  # check if the class is correct
  if(class(data)!='idframe')
    stop("Not an idframe data")
  
  if(!all(indices %in% seq(to=dim(data$output)[1],by=1)))
    stop("Invalid indices")
  
  trim <- data
  trim$output <- trim$output[indices,,drop=F]
  trim$input <- trim$input[indices,,drop=F]
  
  if(trim$type=="freq"){
    trim$frequncies <- trim$frequencies[indices] 
  } else {
    trim$t.start <- trim$t.start + trim$Ts*(indices[1]-1)
    trim$t.end <- trim$t.start + trim$Ts*(length(indices)-1)
  }
  
  return(trim)
}

#' Split data into training and validation sets
#' 
#' The function splits the data into training and validation sets and returns them bundled
#' as a list. The size of the sets are determined by the parameter \code{p}.
#' 
#' @param data an object of class \code{idframe}
#' @param p the percentage of the data that goes to training (Default : \code{0.6})
#' @return list containing estimation and validation idframe datas
#' 
#' @examples
#' data(cstr)
#' splitList <- dataPartition(cstr,p=0.6)
#' train <- splitList$estimation # training set 
#' test <- splitList$validation # testing set
#' 
#' @export
dataPartition <- function(data,p=0.6){
  # check if the class is correct
  if(class(data)!='idframe')
    stop("Not an idframe data")
  
  index <- seq_along(data$output[,1])
  
  trainIndex <- index[1:round(p*length(index))]
  testIndex <- index[!(index %in% trainIndex)]
  
  train <- dataSlice(data,trainIndex)
  test <- dataSlice(data,testIndex)
  
  return(list(estimation=train,validation=test))
}