Navigate back to the homepage

TensorFlow HLO文本处理和结构化

LER0ever
February 17th, 2020 · 1 min read

HLO Text的文件结构

来源: HLO的C++数据结构序列化之后的文本文件

文件全部由一个个函数构成,没有任何其他结构。 每个函数都由函数名、输入参数列表和类型、输出参数类型和函数体构成。 其中的函数体结构类似SSA(Static Single Assignment),每个变量都只会被赋值一次,并且名称唯一。做数据流图DFG的时候可以非常单纯的直接查找变量名找到这个变量被赋值和使用的地方。

每一条SSA指令结构大概如下:

1%fusion.8228 = f32[4,32,48,32]{3,2,1,0} fusion(f32[192,1024]{1,0} %dot.2067, f32[] %arg217.0), kind=kLoop, calls=%fused_computation.4684.clone, metadata={op_type="Mul" op_name="transformer/parallel_0_5/transformer/transformer/body/encoder/layer_11/self_attention/multihead_attention/mul"}

大体就是 %var = $type $fn($params), {$metadata…}

结构化处理 attempt 1 (2020/02/11)

Observation

可以观察到每一条指令的数据流动都是从等号从右往左,所以可以尝试直接使用Python对文本做字符串处理,大概思路就是 1. 按照等号split每一条指令 2. 等号左边处理%var1,作为左操作数 3. 等号右边处理%var2, %var3, 作为右操作数 4. 数据流关系就是左操作数依赖于右操作数

Implementation

代码实现如下

1class block:
2 def __init__(self):
3 self.name = “”
4 self.firstline = “”
5 self.params = []
6 self.body = []
7 self.calls = []
8
9class node:
10 def __init__(self):
11 self.id = “”
12 self.label = “”
13class edge:
14 def __init__(self):
15 self.source = “”
16 self.target = “”
17class graph:
18 def __init__(self):
19 self.nodes = []
20 self.edges = []
21
22result = []
23
24def process_body_line(s):
25 # into calls
26 ret = []
27 call_fn = ["calls=", "to_apply="]
28 for w in call_fn:
29 while w in s:
30 start = s.find(w) + len(w)
31 end = s.find(",", start)
32 ret.append(s[start:end])
33 s = s.replace(w, ''.join(reversed(w)))
34 return ret
35
36def process_first_line_into_args(s):
37 # into params
38 ret = []
39 param_end = s.find(->)
40 if param_end != -1:
41 s = s[0 : param_end - 2]
42 param_start = s.find(() + 1
43 s = s[param_start : param_end + 1]
44 params = s.split(", ")
45 for x in params:
46 ret.append(x[0:x.find(":")])
47 # param_end = s.find(")", param_end) - 1
48 return ret
49
50def process_first_line_into_name(s):
51 name_end = s.find(" (")
52 name = s[0:name_end]
53 name = name.replace("ENTRY ", "")
54 return name
55
56l = 0
57while l < len(lines):
58 # print(l)
59 line = lines[l]
60 if len(line) < 2:
61 l = l + 1
62 continue
63 if line[:2] == “ “:
64 print(“Unhandled Situation, printing surround lines…”)
65 print(lines[l - 1], lines[l + 1])
66 exit
67 if line[0] != " ":
68 f = block()
69 f.firstline = line.replace("\n", "")
70 f.params = process_first_line_into_args(f.firstline)
71 f.name = process_first_line_into_name(f.firstline)
72 # print(process_first_line_into_name(f.firstline))
73 l = l + 1
74 line = lines[l]
75 while line[0] !=}:
76 f.calls = f.calls + process_body_line(line)
77 f.body.append(line.replace(“\n”, “”))
78 l = l + 1
79 line = lines[l]
80 result.append(f)
81 l = l + 1
82 # if l % 10 == 0:
83 # print("Currently l = ", l)
84
85def parse_fn_line(s):
86 i = 0
87 ret = []
88 while i < len(s):
89 if s[i] == '%':
90 new_var = "%"
91 i = i + 1
92 while s[i] != ' ' and s[i] != ')' and s[i] != ',':
93 new_var += s[i]
94 i = i + 1
95 ret.append(new_var)
96 i = i + 1
97 return ret
98
99def parse_fn_dfg(blk):
100 # print("parsing fn", blk.name)
101 variables = []
102 for x in blk.body:
103 variables.append(parse_fn_line(x))
104 # print(variables)
105 g = graph()
106 created = set()
107 for l in variables:
108 for x in l:
109 if x in created:
110 continue
111 n = node()
112 n.id = x
113 n.label = x#shorten_name(x)
114 g.nodes.append(n)
115 created.add(x)
116 for l in variables:
117 if len(l) <= 1:
118 continue
119 for x in l[1:]:
120 e = edge()
121 e.source = x
122 e.target = l[0]
123 g.edges.append(e)
124 def dumper(obj):
125 try:
126 return obj.toJSON()
127 except:
128 return obj.__dict__
129 return json.dumps(g, default=dumper, indent=2)

Limitation

马上问题就来了,HLO指令应该还是偏灵活, 光生成DFG碰到的bad case就有

  • 左操作数不止一个
  • 有可能没有右操作数

今天(134:23)在尝试做Variable Propagation的时候碰到了更多的问题,如

  • 需要读取variable的类型,类似f32[4,32,48,32]{3,2,1,0}这种信息在尝试切分矩阵的时候是必要的
  • 需要识别右边函数的名称,以及那些右操作数会被传入该函数,对不同函数切分矩阵的处理不同
  • 个别操作会在metadata里写重要信息…比如Slice会将slice的dimension放在后面

结构化处理 attempt 2 (2020/02/12)

Observation

尝试了一早上使用Python字符串匹配处理HLO文本,发现会触发各种Corner cases,比如

  • 右操作数可以直接是一个number
  • metadata可以是一个dict
  • metadata可以是一个[a:b] 的数组
  • metadata可以是一个字符串
  • 函数返回值得类型可以是一个数组:返回多个变量的函数

Implementation

下午改用另一种思路,使用词法和语法分析把这个SSA form当成 LL(k) 语法抽象生成语法树 词法比较简单,语法也能套LL1的大多数结构

考虑AST结构如下,内嵌EBNF和语法定义

1var HLOLexer = lexer.Must(ebnf.New(`
2Comment = ("#" | "//") { "\u0000"…"\uffff"-"\n" } .
3Ident = (alpha | "_") { "." | "_" | "-" | alpha | digit } .
4String = "\"" {Ident | "/"} "\"" .
5VarName = "%" Ident .
6Number = { "-" } ("." | digit | "inf") {"." | digit} .
7Whitespace = " " | "\t" | "\n" | "\r" .
8Rightarrow = "->" .
9Assign = "=" .
10Punct = "!"…"/" | ":"…"@" | "["…"_" | "{"…"~" .
11alpha = "a"…"z" | "A"…"Z" .
12digit = "0"…"9" .
13`))
14
15type HLO struct {
16 Functions []*HLOFunction `@@*`
17}
18
19type HLOFunction struct {
20 Name string `("ENTRY")? @VarName`
21 Params []*Param `"(" [ @@ { "," @@ } ] ")"`
22 ReturnTypes []*Type `"->" ( "(" [ @@ { "," @@ } ] ")" | @@)`
23 Body []*Instruction `"{" @@ {@@} "}"`
24}
25
26type Instruction struct {
27 VarName string `("ROOT")? @VarName "="`
28 Fn *FunctionCall `@@`
29 Meta []*Meta `{ "," @@ }`
30}
31
32type FunctionCall struct {
33 ReturnTypes []*RichType `(@@ | "(" @@ { "," @@ } ")" )`
34 Name string `@Ident`
35 Params []*RichParam `"(" [ @@ { "," @@ } ] ")"`
36}
37
38type Meta struct {
39 Key string `@Ident "="`
40 Value string `(@Ident|@VarName|@Number)?`
41 DictValue []*Dict `("{" { @@ } "}")?`
42 ListNums []int `("{" @Number {"," @Number } "}")?`
43 ListSlices []Slice `("{" @@ {"," @@ } "}")?`
44}
45
46type Dict struct {
47 Key string `@Ident "="`
48 Value string `@String | @Ident`
49}
50
51type Slice struct {
52 Start int `"[" @Number ":"`
53 End int `@Number "]"`
54}
55
56type Param struct {
57 Name string `@Ident ":"`
58 Type *Type `@@`
59}
60
61type Type struct {
62 DataType string `@Ident`
63 Dimensions []int `"[" [ @Number { "," @Number } ] "]"`
64}
65
66type RichParam struct {
67 Type *RichType `(@@)?`
68 Name string `@VarName | @Number | @Ident`
69}
70
71type RichType struct {
72 VarType string `@Ident`
73 VarDim []int `"[" [ @Number { "," @Number } ] "]" ("{" [ @Number { "," @Number } ] "}")?`
74}

Result

解析事例函数如下

1%fused_computation.19.clone (param_0.16672: f32[4,49,1024], param_1.23221: f32[196,1024]) -> f32[1024] {
2 %param_1.23221 = f32[196,1024]{1,0} parameter(1)
3 %reshape.13330 = f32[4,49,1024]{2,1,0} reshape(f32[196,1024]{1,0} %param_1.23221), metadata={op_type="Reshape" op_name="training/gradients/transformer/parallel_0_5/transformer/transformer/body/decoder/layer_23_1/ffn/conv1/Tensordot/Reshape_grad/Reshape"}
4 %param_0.16672 = f32[4,49,1024]{2,1,0} parameter(0)
5 %multiply.14985 = f32[4,49,1024]{2,1,0} multiply(f32[4,49,1024]{2,1,0} %reshape.13330, f32[4,49,1024]{2,1,0} %param_0.16672), metadata={op_type="Mul" op_name="training/gradients/transformer/parallel_0_5/transformer/transformer/body/decoder/layer_23_1/ffn/layer_prepostprocess/layer_norm/mul_1_grad/Mul_1"}
6 %constant.11228 = f32[] constant(0), metadata={op_type="RandomUniform" op_name="transformer/parallel_0_5/transformer/transformer/body/dropout/random_uniform/RandomUniform"}
7 ROOT %reduce.1954 = f32[1024]{0} reduce(f32[4,49,1024]{2,1,0} %multiply.14985, f32[] %constant.11228), dimensions={0,1}, to_apply=%training_gradients_transformer_parallel_0_5_transformer_transformer_body_decoder_layer_23_1_ffn_layer_prepostprocess_layer_norm_mul_1_grad_Sum_1-reduction.48850, metadata={op_type="Sum" op_name="training/gradients/transformer/parallel_0_5/transformer/transformer/body/decoder/layer_23_1/ffn/layer_prepostprocess/layer_norm/mul_1_grad/Sum_1"}
8}

返回Tokens和AST结果为:

1[%fused_computation.19.clone ( param_0.16672 : f32 [ 4 , 49 , 1024 ] , param_1.23221 : f32 [ 196 , 1024 ] ) -> f32 [ 1024 ] {
2 %param_1.23221 = f32 [ 196 , 1024 ] { 1 , 0 } parameter ( 1 )
3 %reshape.13330 = f32 [ 4 , 49 , 1024 ] { 2 , 1 , 0 } reshape ( f32 [ 196 , 1024 ] { 1 , 0 } %param_1.23221 ) , metadata = { op_type = "Reshape" op_name = "training/gradients/transformer/parallel_0_5/transformer/transformer/body/decoder/layer_23_1/ffn/conv1/Tensordot/Reshape_grad/Reshape" }
4 %param_0.16672 = f32 [ 4 , 49 , 1024 ] { 2 , 1 , 0 } parameter ( 0 )
5 %multiply.14985 = f32 [ 4 , 49 , 1024 ] { 2 , 1 , 0 } multiply ( f32 [ 4 , 49 , 1024 ] { 2 , 1 , 0 } %reshape.13330 , f32 [ 4 , 49 , 1024 ] { 2 , 1 , 0 } %param_0.16672 ) , metadata = { op_type = "Mul" op_name = "training/gradients/transformer/parallel_0_5/transformer/transformer/body/decoder/layer_23_1/ffn/layer_prepostprocess/layer_norm/mul_1_grad/Mul_1" }
6 %constant.11228 = f32 [ ] constant ( 0 ) , metadata = { op_type = "RandomUniform" op_name = "transformer/parallel_0_5/transformer/transformer/body/dropout/random_uniform/RandomUniform" }
7 ROOT %reduce.1954 = f32 [ 1024 ] { 0 } reduce ( f32 [ 4 , 49 , 1024 ] { 2 , 1 , 0 } %multiply.14985 , f32 [ ] %constant.11228 ) , dimensions = { 0 , 1 } , to_apply = %training_gradients_transformer_parallel_0_5_transformer_transformer_body_decoder_layer_23_1_ffn_layer_prepostprocess_layer_norm_mul_1_grad_Sum_1-reduction.48850 , metadata = { op_type = "Sum" op_name = "training/gradients/transformer/parallel_0_5/transformer/transformer/body/decoder/layer_23_1/ffn/layer_prepostprocess/layer_norm/mul_1_grad/Sum_1" }
8 } <EOF>]
9&main.HLO{
10 Functions: []*main.HLOFunction{
11 &main.HLOFunction{
12 Name: "%fused_computation.19.clone",
13 Params: []*main.Param{
14 &main.Param{
15 Name: "param_0.16672",
16 Type: &main.Type{
17 DataType: "f32",
18 Dimensions: []int{
19 4,
20 49,
21 1024,
22 },
23 },
24 },
25 &main.Param{
26 Name: "param_1.23221",
27 Type: &main.Type{
28 DataType: "f32",
29 Dimensions: []int{
30 196,
31 1024,
32 },
33 },
34 },
35 },
36 ReturnTypes: []*main.Type{
37 &main.Type{
38 DataType: "f32",
39 Dimensions: []int{
40 1024,
41 },
42 },
43 },
44 Body: []*main.Instruction{
45 &main.Instruction{
46 VarName: "%param_1.23221",
47 Fn: &main.FunctionCall{
48 ReturnTypes: []*main.RichType{
49 &main.RichType{
50 VarType: "f32",
51 VarDim: []int{
52 196,
53 1024,
54 1,
55 0,
56 },
57 },
58 },
59 Name: "parameter",
60 Params: []*main.RichParam{
61 &main.RichParam{
62 Name: "1",
63 },
64 },
65 },
66 },
67 &main.Instruction{
68 VarName: "%reshape.13330",
69 Fn: &main.FunctionCall{
70 ReturnTypes: []*main.RichType{
71 &main.RichType{
72 VarType: "f32",
73 VarDim: []int{
74 4,
75 49,
76 1024,
77 2,
78 1,
79 0,
80 },
81 },
82 },
83 Name: "reshape",
84 Params: []*main.RichParam{
85 &main.RichParam{
86 Type: &main.RichType{
87 VarType: "f32",
88 VarDim: []int{
89 196,
90 1024,
91 1,
92 0,
93 },
94 },
95 Name: "%param_1.23221",
96 },
97 },
98 },
99 Meta: []*main.Meta{
100 &main.Meta{
101 Key: "metadata",
102 DictValue: []*main.Dict{
103 &main.Dict{
104 Key: “op_type”,
105 Value: “\”Reshape\””,
106 },
107 &main.Dict{
108 Key: "op_name”,
109 Value: “\”training/gradients/transformer/parallel_0_5/transformer/transformer/body/decoder/layer_23_1/ffn/conv1/Tensordot/Reshape_grad/Reshape\””,
110 },
111 },
112 },
113 },
114 },
115 &main.Instruction{
116 VarName: “%param_0.16672”,
117 Fn: &main.FunctionCall{
118 ReturnTypes: []*main.RichType{
119 &main.RichType{
120 VarType: "f32",
121 VarDim: []int{
122 4,
123 49,
124 1024,
125 2,
126 1,
127 0,
128 },
129 },
130 },
131 Name: "parameter",
132 Params: []*main.RichParam{
133 &main.RichParam{
134 Name: "0",
135 },
136 },
137 },
138 },
139 &main.Instruction{
140 VarName: "%multiply.14985",
141 Fn: &main.FunctionCall{
142 ReturnTypes: []*main.RichType{
143 &main.RichType{
144 VarType: "f32",
145 VarDim: []int{
146 4,
147 49,
148 1024,
149 2,
150 1,
151 0,
152 },
153 },
154 },
155 Name: "multiply",
156 Params: []*main.RichParam{
157 &main.RichParam{
158 Type: &main.RichType{
159 VarType: "f32",
160 VarDim: []int{
161 4,
162 49,
163 1024,
164 2,
165 1,
166 0,
167 },
168 },
169 Name: "%reshape.13330",
170 },
171 &main.RichParam{
172 Type: &main.RichType{
173 VarType: "f32",
174 VarDim: []int{
175 4,
176 49,
177 1024,
178 2,
179 1,
180 0,
181 },
182 },
183 Name: "%param_0.16672",
184 },
185 },
186 },
187 Meta: []*main.Meta{
188 &main.Meta{
189 Key: "metadata",
190 DictValue: []*main.Dict{
191 &main.Dict{
192 Key: "op_type",
193 Value: "\"Mul\"",
194 },
195 &main.Dict{
196 Key: “op_name”,
197 Value: "\"training/gradients/transformer/parallel_0_5/transformer/transformer/body/decoder/layer_23_1/ffn/layer_prepostprocess/layer_norm/mul_1_grad/Mul_1\"",
198 },
199 },
200 },
201 },
202 },
203 &main.Instruction{
204 VarName: "%constant.11228",
205 Fn: &main.FunctionCall{
206 ReturnTypes: []*main.RichType{
207 &main.RichType{
208 VarType: "f32",
209 },
210 },
211 Name: "constant",
212 Params: []*main.RichParam{
213 &main.RichParam{
214 Name: "0",
215 },
216 },
217 },
218 Meta: []*main.Meta{
219 &main.Meta{
220 Key: "metadata",
221 DictValue: []*main.Dict{
222 &main.Dict{
223 Key: "op_type",
224 Value: "\"RandomUniform\"",
225 },
226 &main.Dict{
227 Key: "op_name",
228 Value: "\"transformer/parallel_0_5/transformer/transformer/body/dropout/random_uniform/RandomUniform\"",
229 },
230 },
231 },
232 },
233 },
234 &main.Instruction{
235 VarName: “%reduce.1954”,
236 Fn: &main.FunctionCall{
237 ReturnTypes: []*main.RichType{
238 &main.RichType{
239 VarType: "f32",
240 VarDim: []int{
241 1024,
242 0,
243 },
244 },
245 },
246 Name: "reduce",
247 Params: []*main.RichParam{
248 &main.RichParam{
249 Type: &main.RichType{
250 VarType: “f32”,
251 VarDim: []int{
252 4,
253 49,
254 1024,
255 2,
256 1,
257 0,
258 },
259 },
260 Name: “%multiply.14985”,
261 },
262 &main.RichParam{
263 Type: &main.RichType{
264 VarType: "f32",
265 },
266 Name: "%constant.11228",
267 },
268 },
269 },
270 Meta: []*main.Meta{
271 &main.Meta{
272 Key: "dimensions",
273 ListNums: []int{
274 0,
275 1,
276 },
277 },
278 &main.Meta{
279 Key: "to_apply",
280 Value: "%training_gradients_transformer_parallel_0_5_transformer_transformer_body_decoder_layer_23_1_ffn_layer_prepostprocess_layer_norm_mul_1_grad_Sum_1-reduction.48850",
281 },
282 &main.Meta{
283 Key: "metadata",
284 DictValue: []*main.Dict{
285 &main.Dict{
286 Key: "op_type",
287 Value: "\"Sum\"",
288 },
289 &main.Dict{
290 Key: "op_name",
291 Value: "\"training/gradients/transformer/parallel_0_5/transformer/transformer/body/decoder/layer_23_1/ffn/layer_prepostprocess/layer_norm/mul_1_grad/Sum_1\"",
292 },
293 },
294 },
295 },
296 },
297 },
298 },
299 },
300}
301{Functions:[0xc0000f03c0]}

More articles from L.E.R Space | Blog

最小化数组分段和问题,算法及其应用(1)

使用流水线的案例简介最小数组分段和问题,及其变种。

October 20th, 2019 · 1 min read

Build jiri for Windows

My experiences and work around for compiling and using jiri under Windows

August 25th, 2018 · 2 min read
© 2014–2020 L.E.R Space | Blog
Link to $https://twitter.com/LER0everLink to $https://github.com/LER0everLink to $https://instagram.com/rongyi.ioLink to $https://www.linkedin.com/in/LER0ever