-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy patheval_util.R
311 lines (264 loc) · 12.6 KB
/
eval_util.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
# Utility routines for evaluation
source('setting.R')
if (!exists('UTIL_R')) {
source('util.R')
}
# Note:
# For 'as.Date', the 'original' verson need 'origin' specified when date input is a integer
# 'zoo' overwrite the original version by providing a default origin
# In conclusion, it is expected a RStudio start-up, without source file and then loading packages,
# can have the following error happens
# "Error in as.Date.numeric(end_date) : 'origin' must be supplied"
# Or, add "origin='1970-01-01'" for every calling to 'as.Date' (which seems too annoying...)
# ref.: https://stat.ethz.ch/pipermail/r-help/2010-March/233159.html
require(quantmod) # for 'ROC': rate of change
require(PerformanceAnalytics)
START_EVAL_DATE = as.Date('2005-01-03')
stock_id_closure <- function(id)
{
function(dummy_input) id
}
get_market_dates <- function(start_date)
{
stopifnot(class(start_date) == 'Date')
GOLDEN_STOCK_ID = '0050'
# TODO: only use 'DATE' will cause error in 'get_csv_data'
csv_data = get_csv_data(get_csv_file(GOLDEN_STOCK_ID), cared_value=c(DATE, CLOSE))
date_values = as.vector(csv_data$date)
cvt_dates = list() # converted (to unclass of 'Date') dates
for (i in 1:length(date_values))
{
iter_date = to_date(date_values[i])
if (unclass(FINISH_EVAL_DATE) < iter_date) {
stopifnot(FALSE)
next
}
if (unclass(start_date) > iter_date) { # we expect the date_values is from newest to oldest
break
}
cvt_dates[[length(cvt_dates) + 1]] = iter_date
}
return (rev(cvt_dates)) # reverse to 'oldest to newest' for adopted package (zoo, xts) expects such fashsion
}
get_newest_market_dates <- function()
{
newest_date = tail(get_market_dates(START_EVAL_DATE), n=1)
return (gsub('-', '/', as.Date(newest_date[[1]])))
}
# TODO: the 'StockPosDate' related function shall be better to be placed in a class
# The StockPosDate functionality --- begin
# Ref.: http://stackoverflow.com/documentation/r/5179/hashmaps#t=201611191447485497121
StockPosDate = new.env() # use environment as a hash table
clear_stock_pos_hash <- function()
{
rm(list=names(StockPosDate), envir=StockPosDate)
}
add_date_entry_into_hash <- function(stock_id, pos_date)
{
entry = StockPosDate[[stock_id]]
if (FALSE == is.null(entry)) {
entry[[length(entry) + 1]] = pos_date
}
else {
entry = list()
entry[[1]] = pos_date
}
# TODO: check if this causes a totally overwritten... we actually expect a write on pointer values
StockPosDate[[stock_id]] = entry
}
get_stocks_from_hash <- function()
{
return (names(StockPosDate))
}
get_stock_pos_date <- function(stock_id)
{
entry = StockPosDate[[stock_id]]
stopifnot(FALSE == is.null(entry))
return (entry)
}
# The StockPosDate functionality --- end
get_initial_summary <- function(start_date, end_date)
{
dprint(paste("從", as.Date(start_date), "到", as.Date(end_date))) # actually, the input 'end_date' is only for print (debug) purpose...
dates = get_market_dates(as.Date(start_date))
return (data.frame(date=sapply(dates, unclass), pos_cnt=0, ret_cumu=0, ret_avg=0))
}
add_summary <- function(res_pool, stock_id, buy_dates)
{
dprint(paste(c('標的:', stock_id, '=> 日期:', sapply(buy_dates, function(x){as.character.Date(as.Date(x))})), collapse = ' '))
csv_data = get_csv_data(get_csv_file(stock_id), cared_date_start=as.Date(buy_dates[[1]]),
cared_value=c(DATE, CLOSE, ADJCLOSE))
zoo_data = zoo(csv_data[,ADJCLOSE], sapply(csv_data$date, to_date)) # zoo transfers to 'old->new' format, which ROC need
roc = ROC(zoo_data, type='discrete')
abnomal_roc_idx = which(sapply(roc, function(x){(is.na(x) | x>0.10 | x< -0.10)}))
abnomal_roc_idx = abnomal_roc_idx[! abnomal_roc_idx %in% 1] # the 1st entry is always NA for ROC, needless consider it
if (length(abnomal_roc_idx) > 0) {
for (i in 1:length(abnomal_roc_idx)) {
idx = abnomal_roc_idx[i]
dprint(paste0("忽略ROC=", round(roc[idx], 2), ", 於", as.Date(index(zoo_data)[idx])))
roc[idx] = 0
}
}
buy_idx = length(buy_dates)
res_idx = nrow(res_pool)
buy_date_value = unclass(as.Date(buy_dates[[buy_idx]]))
for (i in length(zoo_data):3) # i=1: buy decision, i=2: place position => only has returns for '>2'
{ # from newest to oldest
if (index(zoo_data)[i - 2] < buy_date_value)
{ # Note: 1. zoo keeps unclass Date value as index, 2. as above comment says, there's offset '2' between 'buy' and 'return'
buy_idx = buy_idx - 1
if (buy_idx < 1) { break }
buy_date_value = unclass(as.Date(buy_dates[[buy_idx]]))
}
while (res_pool$date[res_idx] != index(zoo_data)[i]) { # stock value (zoo_data) might be loss in certain market dates
res_idx = res_idx - 1
stopifnot(res_idx > 0)
}
# 1. increment pos_cnt (by buy_idx = cumu. positions of stock_id)
# 2. add ROC (rate of change) as return in percentage
res_pool$pos_cnt[res_idx] = res_pool$pos_cnt[res_idx] + buy_idx
res_pool$ret_cumu[res_idx] = res_pool$ret_cumu[res_idx] + (buy_idx * roc[i])
}
return (res_pool)
}
get_positions <- function(get_position_func)
{
# get positions
dates = get_market_dates(START_EVAL_DATE) # vector of 'Date', from oldest to newest
positions = list()
for (i in seq(1, length(dates), by=EVAL_PERIOD_LEN)) # evaluate from oldest (START_EVAL_DATE) date
{
cared_date = as.Date(dates[[i]])
position = get_position_func(cared_date)
dprint(paste(c("策略日期:", as.character.Date(cared_date), "=> 標的:", position), collapse = ' '))
pos_with_colname = matrix(position) # convert to matrix, bcoz later colnames need matrix-like input
# a work-around (?, maybe not so bad) (put 'date' in colname) until we figure out
# how to place data of different type as a whole
colnames(pos_with_colname) = cared_date
positions[[length(positions) + 1]] = pos_with_colname
}
return (positions)
}
# summary format:
# observation -> market date, from oldest to newest
# column ->
# pos_cnt: cumulative position count
# ret_cumu: cumulative return (%)
# ret_avg: average return (%) <= equals to 'ret_cumu / pos_cnt'
get_evaluate_summary <- function(get_position_func, update_progress=NULL)
{
positions = get_positions(get_position_func)
clear_stock_pos_hash() # reset global data
if (!is.null(update_progress)) { update_progress(0.0, if (ENG == LANG) 'transfer to stock-oriented data' else '轉換為個股導向資料') }
# convert to 'stock major' entries from input 'date major' entries (to reduce csv file access)
for (i in 1:length(positions))
{
entry = positions[[i]]
pos_date = as.integer(colnames(entry))
stocks = as.vector(entry)
for (i in 1:length(stocks)) {
add_date_entry_into_hash(stocks[i], pos_date)
}
}
if (!is.null(update_progress)) { update_progress(0.2, if (ENG == LANG) 'initialize data' else '初始化資料') }
# prepare summary data pool
start_date = as.integer(colnames(positions[[1]]))
end_date = as.integer(colnames(positions[[length(positions)]])) # find out if we can use '-1' to retrieve last element
summary = get_initial_summary(start_date, end_date)
if (!is.null(update_progress)) { update_progress(0.4, if (ENG == LANG) 'iterate data' else '累加資料') }
# for each stock_id, fill in its 'contribution' to data pool
for (stock_id in get_stocks_from_hash()) {
buy_dates = get_stock_pos_date(stock_id)
summary = add_summary(summary, stock_id, buy_dates)
}
if (!is.null(update_progress)) { update_progress(0.6, if (ENG == LANG) 'summarize data' else '總結資料') }
for (i in 1:nrow(summary)) {
if (summary$pos_cnt[i] > 0) {
summary$ret_avg[i] = summary$ret_cumu[i] / summary$pos_cnt[i]
}
}
if (!is.null(update_progress)) { update_progress(0.8) }
return (summary)
}
get_return_from_summary <- function(summary, title)
{
ret_values = matrix(summary$ret_avg)
colnames(ret_values) = title
return (xts(ret_values, as.Date(summary$date)))
}
get_return <- function(get_position_func, title, in_summary=NA)
{
summary = if (class(in_summary) == 'logical' && is.na(in_summary)) # need '&&' to have short-curcuit evaluation
get_evaluate_summary(get_position_func) else in_summary
return (get_return_from_summary(summary, title))
}
evaluate <- function(get_position_func, title, in_summary=NA, update_progress=NULL)
{
summary = if (class(in_summary) == 'logical' && is.na(in_summary)) # need '&&' to have short-curcuit evaluation
get_evaluate_summary(get_position_func, update_progress) else in_summary
ret = get_return_from_summary(summary, title)
table.Drawdowns(ret, top=10)
table.DownsideRisk(ret)
charts.PerformanceSummary(ret)
return (summary)
}
evaluate_pair_relative <- function(get_position_func1, title1, get_position_func2, title2,
in_summary1=NA, in_summary2=NA, update_progress=NULL)
{
if (!is.null(update_progress)) { update_progress(0, paste0(if (ENG == LANG) 'compute ' else '計算', title1)) }
ret1 = get_return(get_position_func1, title1, in_summary1)
if (!is.null(update_progress)) { update_progress(0.01, paste0(if (ENG == LANG) 'compute ' else '計算', title2)) }
ret2 = get_return(get_position_func2, title2, in_summary2)
if (!is.null(update_progress)) { update_progress(0.6, if (ENG == LANG) 'draw relative graph' else '繪製「相對報酬」圖') }
# Note: we've tried, but failed to find the 'PerformanceAnalytics' charts support Chinese
chart.RelativePerformance(ret1, ret2, main=paste("Relative Performance:", title1, "to", title2))
if (!is.null(update_progress)) { update_progress(0.9) }
}
evaluate_pair <- function(get_position_func1, title1, get_position_func2, title2,
in_summary1=NA, in_summary2=NA, update_progress=NULL)
{
if (!is.null(update_progress)) { update_progress(0, paste0(if (ENG == LANG) 'compute ' else '計算', title1)) }
ret1 = get_return(get_position_func1, title1, in_summary1)
if (!is.null(update_progress)) { update_progress(0.01, paste0(if (ENG == LANG) 'compute ' else '計算', title2)) }
ret2 = get_return(get_position_func2, title2, in_summary2)
if (!is.null(update_progress)) { update_progress(0.4, if (ENG == LANG) 'draw summary graph' else '繪製「報酬總結」圖') }
ret_combine = cbind(ret1, ret2)
colnames(ret_combine) = c(title1, title2)
table.Drawdowns(ret_combine, top=10)
table.DownsideRisk(ret_combine)
charts.PerformanceSummary(ret_combine)
if (!is.null(update_progress)) { update_progress(0.7, if (ENG == LANG) 'compute statistics' else '計算報酬相關數據') }
cumu_returns = Return.cumulative(ret_combine)
annual_returns = Return.annualized(ret_combine)
first_valid_entry = which(ret1 != 0)[1]
cumu_returns1 = Return.cumulative(ret1[first_valid_entry:length(ret1),])
annual_returns1 = Return.annualized(ret1[first_valid_entry:length(ret1),])
dd = findDrawdowns(ret1[first_valid_entry:length(ret1),])
ldd = max(dd$length)
ldd_entry = which(dd$length==ldd)[1] # only returns the result of 1st ldd entry
ldd_from_date = as.character(index(ret_combine)[dd$from[ldd_entry] + first_valid_entry - 1])
ldd_to_date = as.character(index(ret_combine)[dd$to[ldd_entry] + first_valid_entry - 1])
start_month = index(ret_combine)[first_valid_entry - 1]
start_month1 = paste0(format(start_month, '%Y'), '年', months(start_month))
first_valid_entry2 = which(ret2 != 0)[1]
cumu_returns2 = Return.cumulative(ret2[first_valid_entry2:length(ret2),])
annual_returns2 = Return.annualized(ret2[first_valid_entry2:length(ret2),])
dd2 = findDrawdowns(ret2[first_valid_entry2:length(ret2),])
ldd2 = max(dd2$length)
ldd_entry2 = which(dd2$length==ldd2)[1] # only returns the result of 1st ldd entry
ldd_from_date2 = as.character(index(ret_combine)[dd2$from[ldd_entry2] + first_valid_entry2 - 1])
ldd_to_date2 = as.character(index(ret_combine)[dd2$to[ldd_entry2] + first_valid_entry2 - 1])
start_month = index(ret_combine)[first_valid_entry2 - 1]
start_month2 = paste0(format(start_month, '%Y'),
if (ENG == LANG) '' else '年',
if (ENG == LANG) paste0('/', format(start_month, '%m')) else months(start_month))
if (!is.null(update_progress)) { update_progress(0.8, if (ENG == LANG) 'return results' else '傳回結果') }
return (data.frame(cumu_ret=c(cumu_returns1, cumu_returns2),
annual_ret=c(annual_returns1, annual_returns2),
start_month=c(start_month1, start_month2),
mdd=c(min(dd$return), min(dd2$return)),
ldd=c(ldd, ldd2),
from=c(ldd_from_date, ldd_from_date2),
to=c(if (is.na(ldd_to_date)) '' else ldd_to_date,
if (is.na(ldd_to_date2)) '' else ldd_to_date2)))
}