-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxtimportu.ado
181 lines (180 loc) · 7.32 KB
/
xtimportu.ado
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
*! version 1.1.3 07oct2022 I I Bolotov
program def xtimportu, rclass
version 14.0
/*
This program imports monthly, quarterly, half-yearly, and yearly time
series and panel data from a supported file format, filtering and
encoding its cross-sectional units if required (use "|" as separator
in encode()), allowing the user to export and/or save the result.
Wide (pivoted) data must be imported in a way that the time values
are located in _n == 1 (use cellrange() for excel and preformat()
for other filetypes) and are transposed with (SSC) sxpose2.
Unicode characters are fully supported for Stata 14 and newer versions.
Author: Ilya Bolotov, MBA, Ph.D.
Date: 20 November 2020
*/
cap which sxpose2
if _rc {
ssc install sxpose2
}
// syntax
syntax ///
/* syntax for import */ anything(name=import id="import: subcommand"), ///
[/* ignore */ FIRSTrow VARNames(string) case(string) force replace] ///
[/* preformat */ PREformat(string asis)] ///
[PANELvar(string) REgex(string) ENcode(string)] ///
[TIMEvar(string) TFORmat(string)] TFREQuency(string) [drop TDEstring] ///
[GENerate(string) float Ignore(string asis) percent dpcomma TOstring] ///
[clear export(string asis) SAving(string asis) *]
// adjust and preprocess options
local regex = ustrregexra(`"`regex'"', "\s", "\\s")
local encode = ustrregexra(`"`encode'"', "\s", "_" )
local tfrequency = ///
cond(ustrregexm(strtrim(`"`tfrequency'"'), "y", 1), "year", "") + ///
cond(ustrregexm(strtrim(`"`tfrequency'"'), "h", 1), "halfyear", "") + ///
cond(ustrregexm(strtrim(`"`tfrequency'"'), "q", 1), "quarter", "") + ///
cond(ustrregexm(strtrim(`"`tfrequency'"'), "m", 1), "month", "")
local ignore = cond(`"`ignore'"' != "", `"ignore(`ignore')"', "")
// check options for errors
if trim(`"`tfrequency'"') == "" {
di as err "invalid syntax"
exit 198
}
if trim(`"`float'`ignore'`percent'`dpcomma'"') != "" & ///
trim(`"`tostring'"') != "" {
di as err "must specify either * (destring options) or tostring option"
exit 198
}
if trim(`"`clear'`export'`saving'"') == "" {
di as err "must specify one or more of clear, export or saving options"
exit 198
}
tempname j F f var
// check for third-party packages from SSC
qui which sxpose2
// import data from a supported file format
if trim(`"`clear'"') == "" { // preserve data (if required)
preserve
}
qui import `import', `options' clear
`preformat' // preformat data (if required)
// prepare the cross-sectional unit(s)
if trim(`"`regex'"') != "" { // filter rows (if required)
qui {
/* filter panelvar by matching regex() */
if trim(`"`panelvar'"') == "" { // get the first variable
ds
/* define panelvar */
local panelvar : word 1 of `r(varlist)'
}
drop if ! ustrregexm(`panelvar', `"`regex'"') & ///
("`timevar'" != "" | _n > 1) // preserve the first row
/* replace regex with the contents of encode() */
if trim(`"`encode'"') != "" {
local regex = usubinstr(`"`regex'"', "|", " ", .)
local i = usubinstr(`"`encode'"', "|", " ", .)
forvalues `j' = 1/`=wordcount(`"`regex'"')' {
replace `panelvar' = word(`"`encode'"', ``j'') if ///
ustrregexm(`panelvar', word(`"`regex'"', ``j''))
}
}
/* drop duplicates and missing values */
by `panelvar', sort: drop if _n > 1
}
}
// prepare the time variale
qui {
if trim("`timevar'") == "" { // transpose data (if required)
sxpose2, clear force
/* define timevar */
local timevar "_var1"
}
/* fill in eventual missing values in the time variable */
replace `timevar' = `timevar'[_n - 1] if mi(`timevar')
tostring `timevar', replace force
if trim(`"`tformat'"') == "" { // autoformat (if required)
/* drop yearly sums for frequency higher than yearly */
if trim("`tfrequency'") != "year" & ///
trim(`"`drop'"') != "" {
drop if ustrregexm(`timevar', `"^\s{0,}\d{4}\s{0,}$"')
}
/* strip frequency higher than yearly (if `tfrequency' != "year") */
replace `timevar' = ///
ustrregexrf(`timevar', `".{0,}(\d{4}).{0,}"', "$1", 1)
/* recreate stripped frequency (if `tfrequency' != "year") */
if trim("`tfrequency'") != "year" {
local `F' = cond("`tfrequency'" == "halfyear", 2, 0) + ///
cond("`tfrequency'" == "quarter", 4, 0) + ///
cond("`tfrequency'" == "month", 12, 0)
local `f' = ///
cond(``F'', "-" + upper(substr("`tfrequency'", 1, 1)), "")
replace `timevar' = `timevar' + "``f''" + cond(``F'', ///
string(cond(mod(_n - 1, ``F''), mod(_n - 1, ``F''), ``F'')), "")
}
/* drop duplicates and missing values */
drop if ! ustrregexm(`timevar', "\d{4}") & ///
_n > 1 // preserve the first row
by `timevar', sort: drop if _n > 1
/* define tformat() */
local tformat = cond("`tfrequency'" != "year", ///
"Y`=upper(substr("`tfrequency'", 1, 1))'", "Y")
}
if trim("`timevar'") == "_var1" | ///
trim(`"`tdestring'"') != "" {
tempvar dt
g `dt' = `tfrequency'ly(`timevar', `"`tformat'"')
drop `timevar'
rename `dt' `timevar'
order `timevar'
destring `timevar', replace force
}
if trim("`timevar'") == "_var1" { // transpose data (if required)
foreach `var' of varlist * {
cap rename ``var'' `=subinstr("``var''", "_var", "v", .)'
}
sxpose2, clear force
ds _var1, not // reshape data
foreach `var' of varlist `r(varlist)' {
cap rename ``var'' value`=``var''[1]'
}
drop if _n == 1
rename _var1 `panelvar'
reshape long value, string i(`panelvar') j(`timevar')
destring `timevar', replace force
}
/* format the time variable as str# or %tX */
if trim(`"`tdestring'"') != "" {
format `timevar' %t`=lower(substr("`tfrequency'", 1, 1))'
}
if trim("`timevar'") == "_var1" & ///
trim(`"`tdestring'"') == "" {
g `dt' = string(yofd(dof`=lower(substr("`tfrequency'", 1, 1))'( ///
`timevar'))) + cond("`tfrequency'" != "year", "``f''" + ///
string(`tfrequency'(dof`=lower(substr("`tfrequency'", 1, 1))'( ///
`timevar'))), "")
drop `timevar'
rename `dt' `timevar'
}
}
// prepare the variable
if trim(`"`tostring'"') == "" {
qui destring value, replace force `float' `ignore' `percent' `dpcomma'
}
// rename dimensions and the variable, order and sort
rename `panelvar' unit
rename `timevar' `tfrequency'
if trim(`"`generate'"') != "" {
confirm new var `generate'
rename value `generate'
}
order unit `tfrequency'
sort unit `tfrequency'
// export data to a supported file format
if trim(`"`export'"') != "" {
export `export'
}
// save data to a DTA file
if trim(`"`saving'"') != "" {
save `saving'
}
end