| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443 | 
							- local fun = require "fun"
 
- local rspamd_logger = require "rspamd_logger"
 
- local util = require "rspamd_util"
 
- local lua_util = require "lua_util"
 
- local rspamd_regexp = require "rspamd_regexp"
 
- local ucl = require "ucl"
 
- local complicated = {}
 
- local rules = {}
 
- local scores = {}
 
- local function words_to_re(words, start)
 
-   return table.concat(fun.totable(fun.drop_n(start, words)), " ");
 
- end
 
- local function split(str, delim)
 
-   local result = {}
 
-   if not delim then
 
-     delim = '[^%s]+'
 
-   end
 
-   for token in string.gmatch(str, delim) do
 
-     table.insert(result, token)
 
-   end
 
-   return result
 
- end
 
- local function handle_header_def(hline, cur_rule)
 
-   --Now check for modifiers inside header's name
 
-   local hdrs = split(hline, '[^|]+')
 
-   local hdr_params = {}
 
-   local cur_param = {}
 
-   -- Check if an re is an ordinary re
 
-   local ordinary = true
 
-   for _,h in ipairs(hdrs) do
 
-     if h == 'ALL' or h == 'ALL:raw' then
 
-       ordinary = false
 
-     else
 
-       local args = split(h, '[^:]+')
 
-       cur_param['strong'] = false
 
-       cur_param['raw'] = false
 
-       cur_param['header'] = args[1]
 
-       if args[2] then
 
-         -- We have some ops that are required for the header, so it's not ordinary
 
-         ordinary = false
 
-       end
 
-       fun.each(function(func)
 
-           if func == 'addr' then
 
-             cur_param['function'] = function(str)
 
-               local addr_parsed = util.parse_addr(str)
 
-               local ret = {}
 
-               if addr_parsed then
 
-                 for _,elt in ipairs(addr_parsed) do
 
-                   if elt['addr'] then
 
-                     table.insert(ret, elt['addr'])
 
-                   end
 
-                 end
 
-               end
 
-               return ret
 
-             end
 
-           elseif func == 'name' then
 
-             cur_param['function'] = function(str)
 
-               local addr_parsed = util.parse_addr(str)
 
-               local ret = {}
 
-               if addr_parsed then
 
-                 for _,elt in ipairs(addr_parsed) do
 
-                   if elt['name'] then
 
-                     table.insert(ret, elt['name'])
 
-                   end
 
-                 end
 
-               end
 
-               return ret
 
-             end
 
-           elseif func == 'raw' then
 
-             cur_param['raw'] = true
 
-           elseif func == 'case' then
 
-             cur_param['strong'] = true
 
-           else
 
-             rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2',
 
-               func, cur_rule['symbol'])
 
-           end
 
-         end, fun.tail(args))
 
-         -- Some header rules require splitting to check of multiple headers
 
-         if cur_param['header'] == 'MESSAGEID' then
 
-           -- Special case for spamassassin
 
-           ordinary = false
 
-         elseif cur_param['header'] == 'ToCc' then
 
-           ordinary = false
 
-         else
 
-           table.insert(hdr_params, cur_param)
 
-         end
 
-     end
 
-     cur_rule['ordinary'] = ordinary and (not (#hdr_params > 1))
 
-     cur_rule['header'] = hdr_params
 
-   end
 
- end
 
- local function process_sa_conf(f)
 
-   local cur_rule = {}
 
-   local valid_rule = false
 
-   local function insert_cur_rule()
 
-    if not rules[cur_rule.type] then
 
-      rules[cur_rule.type] = {}
 
-    end
 
-    local target = rules[cur_rule.type]
 
-    if cur_rule.type == 'header' then
 
-      if not cur_rule.header[1].header then
 
-       rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
 
-       return
 
-      end
 
-      if not target[cur_rule.header[1].header] then
 
-        target[cur_rule.header[1].header] = {}
 
-      end
 
-      target = target[cur_rule.header[1].header]
 
-    end
 
-    if not cur_rule['symbol'] then
 
-      rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)
 
-      return
 
-    end
 
-    target[cur_rule['symbol']] = cur_rule
 
-    cur_rule = {}
 
-    valid_rule = false
 
-   end
 
-   local function parse_score(words)
 
-     if #words == 3 then
 
-       -- score rule <x>
 
-       return tonumber(words[3])
 
-     elseif #words == 6 then
 
-       -- score rule <x1> <x2> <x3> <x4>
 
-       -- we assume here that bayes and network are enabled and select <x4>
 
-       return tonumber(words[6])
 
-     else
 
-       rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2])
 
-     end
 
-     return 0
 
-   end
 
-   local skip_to_endif = false
 
-   local if_nested = 0
 
-   for l in f:lines() do
 
-     (function ()
 
-     l = lua_util.rspamd_str_trim(l)
 
-     -- Replace bla=~/re/ with bla =~ /re/ (#2372)
 
-     l = l:gsub('([^%s])%s*([=!]~)%s*([^%s])', '%1 %2 %3')
 
-     if string.len(l) == 0 or string.sub(l, 1, 1) == '#' then
 
-       return
 
-     end
 
-     -- Unbalanced if/endif
 
-     if if_nested < 0 then if_nested = 0 end
 
-     if skip_to_endif then
 
-       if string.match(l, '^endif') then
 
-         if_nested = if_nested - 1
 
-         if if_nested == 0 then
 
-           skip_to_endif = false
 
-         end
 
-       elseif string.match(l, '^if') then
 
-         if_nested = if_nested + 1
 
-       elseif string.match(l, '^else') then
 
-         -- Else counterpart for if
 
-         skip_to_endif = false
 
-       end
 
-       table.insert(complicated, l)
 
-       return
 
-     else
 
-       if string.match(l, '^ifplugin') then
 
-         skip_to_endif = true
 
-         if_nested = if_nested + 1
 
-         table.insert(complicated, l)
 
-       elseif string.match(l, '^if !plugin%(') then
 
-          skip_to_endif = true
 
-          if_nested = if_nested + 1
 
-         table.insert(complicated, l)
 
-       elseif string.match(l, '^if') then
 
-         -- Unknown if
 
-         skip_to_endif = true
 
-         if_nested = if_nested + 1
 
-         table.insert(complicated, l)
 
-       elseif string.match(l, '^else') then
 
-         -- Else counterpart for if
 
-         skip_to_endif = true
 
-         table.insert(complicated, l)
 
-       elseif string.match(l, '^endif') then
 
-         if_nested = if_nested - 1
 
-         table.insert(complicated, l)
 
-       end
 
-     end
 
-     -- Skip comments
 
-     local words = fun.totable(fun.take_while(
 
-       function(w) return string.sub(w, 1, 1) ~= '#' end,
 
-       fun.filter(function(w)
 
-           return w ~= "" end,
 
-       fun.iter(split(l)))))
 
-     if words[1] == "header" then
 
-       -- header SYMBOL Header ~= /regexp/
 
-       if valid_rule then
 
-         insert_cur_rule()
 
-       end
 
-       if words[4] and (words[4] == '=~' or words[4] == '!~') then
 
-         cur_rule['type'] = 'header'
 
-         cur_rule['symbol'] = words[2]
 
-         if words[4] == '!~' then
 
-           table.insert(complicated, l)
 
-           return
 
-         end
 
-         cur_rule['re_expr'] = words_to_re(words, 4)
 
-         local unset_comp = string.find(cur_rule['re_expr'], '%s+%[if%-unset:')
 
-         if unset_comp then
 
-           table.insert(complicated, l)
 
-           return
 
-         end
 
-         cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 
-         if not cur_rule['re'] then
 
-           rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2",
 
-             cur_rule['re_expr'], cur_rule['symbol'])
 
-           table.insert(complicated, l)
 
-           return
 
-         else
 
-           handle_header_def(words[3], cur_rule)
 
-           if not cur_rule['ordinary'] then
 
-             table.insert(complicated, l)
 
-             return
 
-           end
 
-         end
 
-         valid_rule = true
 
-       else
 
-         table.insert(complicated, l)
 
-         return
 
-       end
 
-     elseif words[1] == "body" then
 
-       -- body SYMBOL /regexp/
 
-       if valid_rule then
 
-         insert_cur_rule()
 
-       end
 
-       cur_rule['symbol'] = words[2]
 
-       if words[3] and (string.sub(words[3], 1, 1) == '/'
 
-           or string.sub(words[3], 1, 1) == 'm') then
 
-         cur_rule['type'] = 'sabody'
 
-         cur_rule['re_expr'] = words_to_re(words, 2)
 
-         cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 
-         if cur_rule['re'] then
 
-           valid_rule = true
 
-         end
 
-       else
 
-         -- might be function
 
-         table.insert(complicated, l)
 
-         return
 
-       end
 
-     elseif words[1] == "rawbody" then
 
-       -- body SYMBOL /regexp/
 
-       if valid_rule then
 
-         insert_cur_rule()
 
-       end
 
-       cur_rule['symbol'] = words[2]
 
-       if words[3] and (string.sub(words[3], 1, 1) == '/'
 
-           or string.sub(words[3], 1, 1) == 'm') then
 
-         cur_rule['type'] = 'sarawbody'
 
-         cur_rule['re_expr'] = words_to_re(words, 2)
 
-         cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 
-         if cur_rule['re'] then
 
-           valid_rule = true
 
-         end
 
-       else
 
-         table.insert(complicated, l)
 
-         return
 
-       end
 
-     elseif words[1] == "full" then
 
-       -- body SYMBOL /regexp/
 
-       if valid_rule then
 
-         insert_cur_rule()
 
-       end
 
-       cur_rule['symbol'] = words[2]
 
-       if words[3] and (string.sub(words[3], 1, 1) == '/'
 
-           or string.sub(words[3], 1, 1) == 'm') then
 
-         cur_rule['type'] = 'message'
 
-         cur_rule['re_expr'] = words_to_re(words, 2)
 
-         cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 
-         cur_rule['raw'] = true
 
-         if cur_rule['re'] then
 
-           valid_rule = true
 
-         end
 
-       else
 
-         table.insert(complicated, l)
 
-         return
 
-       end
 
-     elseif words[1] == "uri" then
 
-       -- uri SYMBOL /regexp/
 
-       if valid_rule then
 
-         insert_cur_rule()
 
-       end
 
-       cur_rule['type'] = 'uri'
 
-       cur_rule['symbol'] = words[2]
 
-       cur_rule['re_expr'] = words_to_re(words, 2)
 
-       cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])
 
-       if cur_rule['re'] and cur_rule['symbol'] then
 
-         valid_rule = true
 
-       else
 
-         table.insert(complicated, l)
 
-         return
 
-       end
 
-     elseif words[1] == "meta" then
 
-       -- meta SYMBOL expression
 
-       if valid_rule then
 
-         insert_cur_rule()
 
-       end
 
-       table.insert(complicated, l)
 
-       return
 
-     elseif words[1] == "describe" and valid_rule then
 
-       cur_rule['description'] = words_to_re(words, 2)
 
-     elseif words[1] == "score" then
 
-       scores[words[2]] = parse_score(words)
 
-     else
 
-       table.insert(complicated, l)
 
-       return
 
-     end
 
-     end)()
 
-   end
 
-   if valid_rule then
 
-     insert_cur_rule()
 
-   end
 
- end
 
- for _,matched in ipairs(arg) do
 
-   local f = io.open(matched, "r")
 
-   if f then
 
-     rspamd_logger.messagex(rspamd_config, 'loading SA rules from %s', matched)
 
-     process_sa_conf(f)
 
-   else
 
-     rspamd_logger.errx(rspamd_config, "cannot open %1", matched)
 
-   end
 
- end
 
- local multimap_conf = {}
 
- local function handle_rule(what, syms, hdr)
 
-   local mtype
 
-   local filter
 
-   local fname
 
-   local header
 
-   local sym = what:upper()
 
-   if what == 'sabody' then
 
-     mtype = 'content'
 
-     fname = 'body_re.map'
 
-     filter = 'oneline'
 
-   elseif what == 'sarawbody' then
 
-     fname = 'raw_body_re.map'
 
-     mtype = 'content'
 
-     filter = 'rawtext'
 
-   elseif what == 'full' then
 
-     fname = 'full_re.map'
 
-     mtype = 'content'
 
-     filter = 'full'
 
-   elseif what == 'uri' then
 
-     fname = 'uri_re.map'
 
-     mtype = 'url'
 
-     filter = 'full'
 
-   elseif what == 'header' then
 
-     fname = ('hdr_' .. hdr .. '_re.map'):lower()
 
-     mtype = 'header'
 
-     header = hdr
 
-     sym = sym .. '_' .. hdr:upper()
 
-   else
 
-     rspamd_logger.errx('unknown type: %s', what)
 
-     return
 
-   end
 
-   local conf = {
 
-     type = mtype,
 
-     filter = filter,
 
-     symbol = 'SA_MAP_AUTO_' .. sym,
 
-     regexp = true,
 
-     map = fname,
 
-     header = header,
 
-     symbols = {}
 
-   }
 
-   local re_file = io.open(fname, 'w')
 
-   for k,r in pairs(syms) do
 
-     local score = 0.0
 
-     if scores[k] then
 
-       score = scores[k]
 
-     end
 
-     re_file:write(string.format('/%s/ %s:%f\n', tostring(r.re), k, score))
 
-     table.insert(conf.symbols, k)
 
-   end
 
-   re_file:close()
 
-   multimap_conf[sym:lower()] = conf
 
-   rspamd_logger.messagex('stored %s regexp in %s', sym:lower(), fname)
 
- end
 
- for k,v in pairs(rules) do
 
-   if k == 'header' then
 
-     for h,r in pairs(v) do
 
-       handle_rule(k, r, h)
 
-     end
 
-   else
 
-     handle_rule(k, v)
 
-   end
 
- end
 
- local out = ucl.to_format(multimap_conf, 'ucl')
 
- local mmap_conf = io.open('auto_multimap.conf', 'w')
 
- mmap_conf:write(out)
 
- mmap_conf:close()
 
- rspamd_logger.messagex('stored multimap conf in %s', 'auto_multimap.conf')
 
- local sa_remain = io.open('auto_sa.conf', 'w')
 
- fun.each(function(l) 
 
-   sa_remain:write(l)
 
-   sa_remain:write('\n')
 
- end, fun.filter(function(l) return not string.match(l, '^%s+$') end, complicated))
 
- sa_remain:close()
 
- rspamd_logger.messagex('stored sa remains conf in %s', 'auto_sa.conf')
 
 
  |