机器学习识别XSS实践
[Machine Learning] [Data Science for Cyber Security]
前言
前日看到大佬发的机器学习识别XSS的项目代码
于是搞了一把,记录一下遇到的问题
思路
大佬代码中给出的流程:
- 数据集:GET/POST query
- 范化、分词:将数字串、链接替换为固定词,然后用正则分词
- 特征(word2vec)
- 神经网络(MLP/RNN/CNN)
我实践的流程:
- 范化,分词流程不变
- 特征(分别尝试了word2vec、doc2vec、统计特征)
- SVM
数据预处理
以下是在本地做数据预处理时的 jupyter notebook 代码。
import graphlab as gl
1 取数据
normal = gl.SFrame.read_csv('data/normal_examples.csv')
evil = gl.SFrame.read_csv('data/xssed.csv')
Parsing completed. Parsed 40637 lines in 0.091727 secs.
normal = gl.SFrame(normal)
evil = gl.SFrame(evil)
normal
param |
---|
_%3D1498591621808 |
code%3Dzs_000001%2Czs_399 001%2Czs_399006%26cb% ... |
_%3D1498591951848%26list% 3Dml_sh600030 ... |
6053%26ri%3Dzb6-00f%7E- 04gUry-01h- ... |
b1498592370545%3D1 |
v%3D13111002 |
COLLCC%3D3442798258%26 |
t%3Dcheck%26rec%3Dstratus %26etyp%3Dconnect%26z ... |
cn_600022%2Ccn_600516%2Cc n_000002%2Ccn_600519% ... |
_%3D1498179095094%26list% 3Dsh600030 ... |
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
2 画数据
normal.show()
Canvas is accessible via web browser at the URL: http://localhost:54528/index.html
Opening Canvas in default web browser.
evil.show()
Canvas is updated and available in a tab in the default browser.
evil.head()
param |
---|
Search%3D%3C/script%3E%3C img/%2A%00/src%3D%22w ... |
symbol%3D%3Ch1%3E%3Cscrip t%3Ealert%28/hacked/% ... |
query%3D%3CIMG%2B%22%22%2 2%3E%3CSCRIPT%3Ealert ... |
ReturnUrl%3Dhttp%3A//www. elle.fr/recherche ... |
_lang%3D%22%3E%3Cscript%3 Ealert%28document.coo ... |
language%3D%22%3E%3C/scri pt%3E%22%3E%27%3E%3Cs ... |
q%3Dbentley%26stylesheet% 3D%22%3E%3Cscript%3Ea ... |
option%3Dcom_wdshop%26vie w%3Duserinfo%26ajax_j ... |
CT_ORIG_URL%3D/arena/%22% 3E%3Cscript%3Ealert%2 ... |
query%3DSearch...%26Produ ct%3D%27%22--%3E%3C/s ... |
3 范化、分词
import nltk
import re
from urllib import unquote
def GeneSeg(payload):
#数字泛化为"0"
payload=payload['param'].lower()
payload=unquote(unquote(payload))
payload,num=re.subn(r'\d+',"0",payload)
#替换url为”http://u
payload,num=re.subn(r'(http|https)://[a-zA-Z0-9\.@&/#!#\?]+', "http://u", payload)
#分词
r = '''
(?x)[\w\.]+?\(
|\)
|"\w+?"
|'\w+?'
|http://\w
|</\w+>
|<\w+>
|<\w+
|\w+=
|>
|[\w\.]+
'''
return nltk.regexp_tokenize(payload, r)
# 增加一列
normal['parsed'] = normal.apply(GeneSeg)
evil['parsed'] = evil.apply(GeneSeg)
normal.head()
param | parsed |
---|---|
_%3D1498591621808 | [_=, 0] |
code%3Dzs_000001%2Czs_399 001%2Czs_399006%26cb% ... |
[code=, zs_0, zs_0, zs_0, cb=, fortune_hq_cn, _=, ... |
_%3D1498591951848%26list% 3Dml_sh600030 ... |
[_=, 0, list=, ml_sh0] |
6053%26ri%3Dzb6-00f%7E- 04gUry-01h- ... |
[0, ri=, zb0, 0f, 0gury, 0h, 0rc, tn=, 0, en=, ... |
b1498592370545%3D1 | [b0=, 0] |
v%3D13111002 | [v=, 0] |
COLLCC%3D3442798258%26 | [collcc=, 0] |
t%3Dcheck%26rec%3Dstratus %26etyp%3Dconnect%26z ... |
[t=, check, rec=, stratus, etyp=, connect, ... |
cn_600022%2Ccn_600516%2Cc n_000002%2Ccn_600519% ... |
[cn_0, cn_0, cn_0, cn_0, cn_0, cn_0, cn_0, cn_0, ... |
_%3D1498179095094%26list% 3Dsh600030 ... |
[_=, 0, list=, sh0] |
evil.head()
param | parsed |
---|---|
Search%3D%3C/script%3E%3C img/%2A%00/src%3D%22w ... |
[search=, </script>, <img, src=, ... |
symbol%3D%3Ch1%3E%3Cscrip t%3Ealert%28/hacked/% ... |
[symbol=, <h0>, <script>, alert(, hacked, ), ... |
query%3D%3CIMG%2B%22%22%2 2%3E%3CSCRIPT%3Ealert ... |
[query=, <img, >, <script>, alert(, ... |
ReturnUrl%3Dhttp%3A//www. elle.fr/recherche ... |
[returnurl=, http://u, globale, searchtext, ), ... |
_lang%3D%22%3E%3Cscript%3 Ealert%28document.coo ... |
[_lang=, >, <script>, alert(, document.cookie, ... |
language%3D%22%3E%3C/scri pt%3E%22%3E%27%3E%3Cs ... |
[language=, >, </script>, >, >, <script>, alert(, ... |
q%3Dbentley%26stylesheet% 3D%22%3E%3Cscript%3Ea ... |
[q=, bentley, stylesheet=, >, <scri ... |
option%3Dcom_wdshop%26vie w%3Duserinfo%26ajax_j ... |
[option=, com_wdshop, view=, userinfo, ... |
CT_ORIG_URL%3D/arena/%22% 3E%3Cscript%3Ealert%2 ... |
[ct_orig_url=, arena, >, <script>, alert(, 0, ), ... |
query%3DSearch...%26Produ ct%3D%27%22--%3E%3C/s ... |
[query=, search..., product=, >, </style>, ... |
4 词表
# 取词的全集生成新的SFrame
ans = []
evil['parsed'].apply(lambda x:[ans.append(i) for i in x])
dtype: list
Rows: 40637
[[None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None], [None, None, None, None], [None, None, None, None], [None, None, None, None, None], [None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None], [None, None], [None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None], [None, None, None, None, None, None, None, None, None], ... ]
evil_word_frame = gl.SFrame(data=ans)
evil_word_frame
X1 |
---|
search= |
</script> |
<img |
src= |
worksinchrome |
colon |
prompt |
x0 |
0 |
x0 |
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
# 增加统计列
evil_word_frame['count'] = evil_word_frame.apply(lambda x:len(evil_word_frame.filter_by(x['X1'],'X1')))
# 去重,按数量排序
evil_word_frame = evil_word_frame.unique().sort('count',ascending=False)
evil_word_frame
X1 | count |
---|---|
0 | 491 |
) | 155 |
> | 120 |
alert( | 113 |
</script> | 99 |
<script> | 83 |
string.fromcharcode( | 37 |
http://u | 15 |
document.cookie | 15 |
xss | 10 |
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
# 取前300个词作为词表
wordlist_len = 300
evil_word_frame = evil_word_frame[0:wordlist_len]
evil_word_frame
X1 | count |
---|---|
0 | 491 |
) | 155 |
> | 120 |
alert( | 113 |
</script> | 99 |
<script> | 83 |
string.fromcharcode( | 37 |
http://u | 15 |
document.cookie | 15 |
xss | 10 |
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
evil_word_frame.save('file/evil_word_list.csv', format='csv')
5 词向量
# 将不在词表内的词全部替换为'WORD',结果存入'words'列
evil['words'] = evil.apply(lambda x:[i if len(evil_word_frame.filter_by(i,'X1')) else 'WORD' for i in x['parsed']])
evil
param | parsed | words |
---|---|---|
Search%3D%3C/script%3E%3C img/%2A%00/src%3D%22w ... |
[search=, </script>, <img, src=, ... |
[search=, </script>, <img, src=, ... |
symbol%3D%3Ch1%3E%3Cscrip t%3Ealert%28/hacked/% ... |
[symbol=, <h0>, <script>, alert(, hacked, ), ... |
[WORD, <h0>, <script>, alert(, hacked, ), ... |
query%3D%3CIMG%2B%22%22%2 2%3E%3CSCRIPT%3Ealert ... |
[query=, <img, >, <script>, alert(, ... |
[query=, <img, >, <script>, alert(, ... |
ReturnUrl%3Dhttp%3A//www. elle.fr/recherche ... |
[returnurl=, http://u, globale, searchtext, ), ... |
[returnurl=, http://u, globale, searchtext, ), ... |
_lang%3D%22%3E%3Cscript%3 Ealert%28document.coo ... |
[_lang=, >, <script>, alert(, document.cookie, ... |
[_lang=, >, <script>, alert(, document.cookie, ... |
language%3D%22%3E%3C/scri pt%3E%22%3E%27%3E%3Cs ... |
[language=, >, </script>, >, >, <script>, alert(, ... |
[language=, >, </script>, >, >, <script>, alert(, ... |
q%3Dbentley%26stylesheet% 3D%22%3E%3Cscript%3Ea ... |
[q=, bentley, stylesheet=, >, <scri ... |
[q=, bentley, stylesheet=, >, <scri ... |
option%3Dcom_wdshop%26vie w%3Duserinfo%26ajax_j ... |
[option=, com_wdshop, view=, userinfo, ... |
[WORD, com_wdshop, view=, userinfo, ajax_json=, ... |
CT_ORIG_URL%3D/arena/%22% 3E%3Cscript%3Ealert%2 ... |
[ct_orig_url=, arena, >, <script>, alert(, 0, ), ... |
[ct_orig_url=, arena, >, <script>, alert(, 0, ), ... |
query%3DSearch...%26Produ ct%3D%27%22--%3E%3C/s ... |
[query=, search..., product=, >, </style>, ... |
[query=, search..., product=, >, </style>, ... |
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
normal['words'] = normal.apply(lambda x:[i if len(evil_word_frame.filter_by(i,'X1')) else 'WORD' for i in x['parsed']])
normal
param | parsed | words |
---|---|---|
_%3D1498591621808 | [_=, 0] | [WORD, 0] |
code%3Dzs_000001%2Czs_399 001%2Czs_399006%26cb% ... |
[code=, zs_0, zs_0, zs_0, cb=, fortune_hq_cn, _=, ... |
[WORD, WORD, WORD, WORD, WORD, WORD, WORD, 0] ... |
_%3D1498591951848%26list% 3Dml_sh600030 ... |
[_=, 0, list=, ml_sh0] | [WORD, 0, WORD, WORD] |
6053%26ri%3Dzb6-00f%7E- 04gUry-01h- ... |
[0, ri=, zb0, 0f, 0gury, 0h, 0rc, tn=, 0, en=, ... |
[0, WORD, WORD, WORD, WORD, WORD, WORD, WORD, ... |
b1498592370545%3D1 | [b0=, 0] | [WORD, 0] |
v%3D13111002 | [v=, 0] | [v=, 0] |
COLLCC%3D3442798258%26 | [collcc=, 0] | [WORD, 0] |
t%3Dcheck%26rec%3Dstratus %26etyp%3Dconnect%26z ... |
[t=, check, rec=, stratus, etyp=, connect, ... |
[WORD, WORD, WORD, WORD, WORD, WORD, WORD, WORD, ... |
cn_600022%2Ccn_600516%2Cc n_000002%2Ccn_600519% ... |
[cn_0, cn_0, cn_0, cn_0, cn_0, cn_0, cn_0, cn_0, ... |
[WORD, WORD, WORD, WORD, WORD, WORD, WORD, WORD, ... |
_%3D1498179095094%26list% 3Dsh600030 ... |
[_=, 0, list=, sh0] | [WORD, 0, WORD, WORD] |
Note: Only the head of the SFrame is printed.
You can use print_rows(num_rows=m, num_columns=n) to print more rows and columns.
5.1 word2vec
from gensim.models.word2vec import Word2Vec
Using TensorFlow backend.
embedding_size=128
skip_window=5
num_sampled=64
num_iter=100
data_set = evil['words']
data_set[0:10]
dtype: list
Rows: 10
[['search=', '</script>', '<img', 'src=', 'worksinchrome', 'colon', 'prompt', 'x0', '0', 'x0', 'onerror=', 'eval(', 'src', ')', '>'], ['WORD', '<h0>', '<script>', 'alert(', 'hacked', ')', '</script>', '</h0>', 'id=', '0'], ['query=', '<img', '>', '<script>', 'alert(', '"cyb0r_pr0dat0r"', ')', '</script>', '>', 'WORD', 'true', 'ct=', 'null', 'autobounce=', 'true'], ['returnurl=', 'http://u', 'globale', 'searchtext', ')', '>', '<script>', 'alert(', 'xss', 'by', 'm0ke', ')', '</script>'], ['_lang=', '>', '<script>', 'alert(', 'document.cookie', ')', '</script>'], ['language=', '>', '</script>', '>', '>', '<script>', 'alert(', 'arman', 'ires', 'digital', 'boys', 'underground', 'http://u', ')', '</script>'], ['q=', 'bentley', 'stylesheet=', '>', '<script>', 'alert(', "'raven'", ')', '</script>', 'collections=', 'libcms'], ['WORD', 'com_wdshop', 'view=', 'userinfo', 'ajax_json=', 'ajax_fill_city_state', 'format=', 'WORD', 'zip=', '>', '</style>', '</script>', '<script>', 'alert(', 'document.cookie', ')', '</script>'], ['ct_orig_url=', 'arena', '>', '<script>', 'alert(', '0', ')', '</script>', '<iframe', 'src=', 'http://u', '>', '</iframe>'], ['query=', 'search...', 'product=', '>', '</style>', '</script>', '<script>', 'alert(', "'xss'", ')', '</script>', 'page=', '0']]
model=Word2Vec(data_set,size=embedding_size,window=skip_window,negative=num_sampled,iter=num_iter)
#model=Word2Vec(data_set)
5.2 模型存取
model.save('model_word2vec_auto')
# import pickle
# with open('model_word2vec_pickle','wb') as f:
# pickle.dump(model,f)
model_new = Word2Vec.load('model_word2vec_auto')
# import pickle
# model_new = pickle.load('model_word2vec_pickle')
5.3 数据存取
# evil.save('evil_data')
# normal.save('normal_data')
# evil = graphlab.SFrame('evil_data')
# normal = graphlab.SFrame('normal_data')
5.4 测试模型
embeddings=model_new.wv
embeddings.similar_by_word("</script>",5)
[('keyword=', 0.4176161289215088),
('searchfor=', 0.39741984009742737),
('page=', 0.38440901041030884),
('id=', 0.3829260766506195),
('q=', 0.3615732789039612)]
embeddings.similar_by_word("alert(",5)
[(')', 0.3255831003189087),
('</marquee>', 0.3212870955467224),
('</script>', 0.3093520998954773),
('</h0>', 0.30848926305770874),
('<h0>', 0.2897389233112335)]
5.5 添加向量和标签
dictionary=dict([(embeddings.index2word[i],i)for i in range(len(embeddings.index2word))])
reverse_dictionary=dict(zip(dictionary.values(),dictionary.keys()))
word2vec={"dictionary":dictionary,"embeddings":embeddings,"reverse_dictionary":reverse_dictionary}
from graphlab import SArray
def generate_vec(words):
l = SArray([0.0]*128)
for word in words:
if word in dictionary:
l += SArray(embeddings[word])
return l
# 添加新的vec列、label列
black = gl.SFrame([evil.apply(lambda x:generate_vec(x['words'])),evil.apply(lambda x:1)])
white = gl.SFrame([normal.apply(lambda x:generate_vec(x['words'])),normal.apply(lambda x:0)])
black.show()
Canvas is accessible via web browser at the URL: http://localhost:59854/index.html
Opening Canvas in default web browser.
#black.save('fuck_black')
#white.save('fuck_white')
len(white)
200129
二分类
之后的工作即划分数据集之后训练SVM,这部分工作导出到机器学习平台完成,可能涉敏就不展开了。
反思
-
从结果来看,尽管验证集给出了良好的预测效果,但模型的泛化能力很差。如果想要机器在大量的标签和函数的组合中识别出是否为恶意,则对数据集的要求则会更高。
-
同时,仅根据GET/POST参数判定XSS攻击是不完整的,需关联response的返回内容才能判断是否攻击成功,即便这样也仅能检出反射型XSS。该模型面对真实场景中复杂的输入和触发方式可能会略显鸡肋,需结合业务