| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443 | local fun = require "fun"local rspamd_logger = require "rspamd_logger"local util = require "rspamd_util"local lua_util = require "lua_util"local rspamd_regexp = require "rspamd_regexp"local ucl = require "ucl"local complicated = {}local rules = {}local scores = {}local function words_to_re(words, start)  return table.concat(fun.totable(fun.drop_n(start, words)), " ");endlocal function split(str, delim)  local result = {}  if not delim then    delim = '[^%s]+'  end  for token in string.gmatch(str, delim) do    table.insert(result, token)  end  return resultendlocal function handle_header_def(hline, cur_rule)  --Now check for modifiers inside header's name  local hdrs = split(hline, '[^|]+')  local hdr_params = {}  local cur_param = {}  -- Check if an re is an ordinary re  local ordinary = true  for _,h in ipairs(hdrs) do    if h == 'ALL' or h == 'ALL:raw' then      ordinary = false    else      local args = split(h, '[^:]+')      cur_param['strong'] = false      cur_param['raw'] = false      cur_param['header'] = args[1]      if args[2] then        -- We have some ops that are required for the header, so it's not ordinary        ordinary = false      end      fun.each(function(func)          if func == 'addr' then            cur_param['function'] = function(str)              local addr_parsed = util.parse_addr(str)              local ret = {}              if addr_parsed then                for _,elt in ipairs(addr_parsed) do                  if elt['addr'] then                    table.insert(ret, elt['addr'])                  end                end              end              return ret            end          elseif func == 'name' then            cur_param['function'] = function(str)              local addr_parsed = util.parse_addr(str)              local ret = {}              if addr_parsed then                for _,elt in ipairs(addr_parsed) do                  if elt['name'] then                    table.insert(ret, elt['name'])                  end                end              end              return ret            end          elseif func == 'raw' then            cur_param['raw'] = true          elseif func == 'case' then            cur_param['strong'] = true          else            rspamd_logger.warnx(rspamd_config, 'Function %1 is not supported in %2',              func, cur_rule['symbol'])          end        end, fun.tail(args))        -- Some header rules require splitting to check of multiple headers        if cur_param['header'] == 'MESSAGEID' then          -- Special case for spamassassin          ordinary = false        elseif cur_param['header'] == 'ToCc' then          ordinary = false        else          table.insert(hdr_params, cur_param)        end    end    cur_rule['ordinary'] = ordinary and (not (#hdr_params > 1))    cur_rule['header'] = hdr_params  endendlocal function process_sa_conf(f)  local cur_rule = {}  local valid_rule = false  local function insert_cur_rule()   if not rules[cur_rule.type] then     rules[cur_rule.type] = {}   end   local target = rules[cur_rule.type]   if cur_rule.type == 'header' then     if not cur_rule.header[1].header then      rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)      return     end     if not target[cur_rule.header[1].header] then       target[cur_rule.header[1].header] = {}     end     target = target[cur_rule.header[1].header]   end   if not cur_rule['symbol'] then     rspamd_logger.errx(rspamd_config, 'bad rule definition: %1', cur_rule)     return   end   target[cur_rule['symbol']] = cur_rule   cur_rule = {}   valid_rule = false  end  local function parse_score(words)    if #words == 3 then      -- score rule <x>      return tonumber(words[3])    elseif #words == 6 then      -- score rule <x1> <x2> <x3> <x4>      -- we assume here that bayes and network are enabled and select <x4>      return tonumber(words[6])    else      rspamd_logger.errx(rspamd_config, 'invalid score for %1', words[2])    end    return 0  end  local skip_to_endif = false  local if_nested = 0  for l in f:lines() do    (function ()    l = lua_util.rspamd_str_trim(l)    -- Replace bla=~/re/ with bla =~ /re/ (#2372)    l = l:gsub('([^%s])%s*([=!]~)%s*([^%s])', '%1 %2 %3')    if string.len(l) == 0 or string.sub(l, 1, 1) == '#' then      return    end    -- Unbalanced if/endif    if if_nested < 0 then if_nested = 0 end    if skip_to_endif then      if string.match(l, '^endif') then        if_nested = if_nested - 1        if if_nested == 0 then          skip_to_endif = false        end      elseif string.match(l, '^if') then        if_nested = if_nested + 1      elseif string.match(l, '^else') then        -- Else counterpart for if        skip_to_endif = false      end      table.insert(complicated, l)      return    else      if string.match(l, '^ifplugin') then        skip_to_endif = true        if_nested = if_nested + 1        table.insert(complicated, l)      elseif string.match(l, '^if !plugin%(') then         skip_to_endif = true         if_nested = if_nested + 1        table.insert(complicated, l)      elseif string.match(l, '^if') then        -- Unknown if        skip_to_endif = true        if_nested = if_nested + 1        table.insert(complicated, l)      elseif string.match(l, '^else') then        -- Else counterpart for if        skip_to_endif = true        table.insert(complicated, l)      elseif string.match(l, '^endif') then        if_nested = if_nested - 1        table.insert(complicated, l)      end    end    -- Skip comments    local words = fun.totable(fun.take_while(      function(w) return string.sub(w, 1, 1) ~= '#' end,      fun.filter(function(w)          return w ~= "" end,      fun.iter(split(l)))))    if words[1] == "header" then      -- header SYMBOL Header ~= /regexp/      if valid_rule then        insert_cur_rule()      end      if words[4] and (words[4] == '=~' or words[4] == '!~') then        cur_rule['type'] = 'header'        cur_rule['symbol'] = words[2]        if words[4] == '!~' then          table.insert(complicated, l)          return        end        cur_rule['re_expr'] = words_to_re(words, 4)        local unset_comp = string.find(cur_rule['re_expr'], '%s+%[if%-unset:')        if unset_comp then          table.insert(complicated, l)          return        end        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])        if not cur_rule['re'] then          rspamd_logger.warnx(rspamd_config, "Cannot parse regexp '%1' for %2",            cur_rule['re_expr'], cur_rule['symbol'])          table.insert(complicated, l)          return        else          handle_header_def(words[3], cur_rule)          if not cur_rule['ordinary'] then            table.insert(complicated, l)            return          end        end        valid_rule = true      else        table.insert(complicated, l)        return      end    elseif words[1] == "body" then      -- body SYMBOL /regexp/      if valid_rule then        insert_cur_rule()      end      cur_rule['symbol'] = words[2]      if words[3] and (string.sub(words[3], 1, 1) == '/'          or string.sub(words[3], 1, 1) == 'm') then        cur_rule['type'] = 'sabody'        cur_rule['re_expr'] = words_to_re(words, 2)        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])        if cur_rule['re'] then          valid_rule = true        end      else        -- might be function        table.insert(complicated, l)        return      end    elseif words[1] == "rawbody" then      -- body SYMBOL /regexp/      if valid_rule then        insert_cur_rule()      end      cur_rule['symbol'] = words[2]      if words[3] and (string.sub(words[3], 1, 1) == '/'          or string.sub(words[3], 1, 1) == 'm') then        cur_rule['type'] = 'sarawbody'        cur_rule['re_expr'] = words_to_re(words, 2)        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])        if cur_rule['re'] then          valid_rule = true        end      else        table.insert(complicated, l)        return      end    elseif words[1] == "full" then      -- body SYMBOL /regexp/      if valid_rule then        insert_cur_rule()      end      cur_rule['symbol'] = words[2]      if words[3] and (string.sub(words[3], 1, 1) == '/'          or string.sub(words[3], 1, 1) == 'm') then        cur_rule['type'] = 'message'        cur_rule['re_expr'] = words_to_re(words, 2)        cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])        cur_rule['raw'] = true        if cur_rule['re'] then          valid_rule = true        end      else        table.insert(complicated, l)        return      end    elseif words[1] == "uri" then      -- uri SYMBOL /regexp/      if valid_rule then        insert_cur_rule()      end      cur_rule['type'] = 'uri'      cur_rule['symbol'] = words[2]      cur_rule['re_expr'] = words_to_re(words, 2)      cur_rule['re'] = rspamd_regexp.create(cur_rule['re_expr'])      if cur_rule['re'] and cur_rule['symbol'] then        valid_rule = true      else        table.insert(complicated, l)        return      end    elseif words[1] == "meta" then      -- meta SYMBOL expression      if valid_rule then        insert_cur_rule()      end      table.insert(complicated, l)      return    elseif words[1] == "describe" and valid_rule then      cur_rule['description'] = words_to_re(words, 2)    elseif words[1] == "score" then      scores[words[2]] = parse_score(words)    else      table.insert(complicated, l)      return    end    end)()  end  if valid_rule then    insert_cur_rule()  endendfor _,matched in ipairs(arg) do  local f = io.open(matched, "r")  if f then    rspamd_logger.messagex(rspamd_config, 'loading SA rules from %s', matched)    process_sa_conf(f)  else    rspamd_logger.errx(rspamd_config, "cannot open %1", matched)  endendlocal multimap_conf = {}local function handle_rule(what, syms, hdr)  local mtype  local filter  local fname  local header  local sym = what:upper()  if what == 'sabody' then    mtype = 'content'    fname = 'body_re.map'    filter = 'oneline'  elseif what == 'sarawbody' then    fname = 'raw_body_re.map'    mtype = 'content'    filter = 'rawtext'  elseif what == 'full' then    fname = 'full_re.map'    mtype = 'content'    filter = 'full'  elseif what == 'uri' then    fname = 'uri_re.map'    mtype = 'url'    filter = 'full'  elseif what == 'header' then    fname = ('hdr_' .. hdr .. '_re.map'):lower()    mtype = 'header'    header = hdr    sym = sym .. '_' .. hdr:upper()  else    rspamd_logger.errx('unknown type: %s', what)    return  end  local conf = {    type = mtype,    filter = filter,    symbol = 'SA_MAP_AUTO_' .. sym,    regexp = true,    map = fname,    header = header,    symbols = {}  }  local re_file = io.open(fname, 'w')  for k,r in pairs(syms) do    local score = 0.0    if scores[k] then      score = scores[k]    end    re_file:write(string.format('/%s/ %s:%f\n', tostring(r.re), k, score))    table.insert(conf.symbols, k)  end  re_file:close()  multimap_conf[sym:lower()] = conf  rspamd_logger.messagex('stored %s regexp in %s', sym:lower(), fname)endfor k,v in pairs(rules) do  if k == 'header' then    for h,r in pairs(v) do      handle_rule(k, r, h)    end  else    handle_rule(k, v)  endendlocal out = ucl.to_format(multimap_conf, 'ucl')local mmap_conf = io.open('auto_multimap.conf', 'w')mmap_conf:write(out)mmap_conf:close()rspamd_logger.messagex('stored multimap conf in %s', 'auto_multimap.conf')local sa_remain = io.open('auto_sa.conf', 'w')fun.each(function(l)   sa_remain:write(l)  sa_remain:write('\n')end, fun.filter(function(l) return not string.match(l, '^%s+$') end, complicated))sa_remain:close()rspamd_logger.messagex('stored sa remains conf in %s', 'auto_sa.conf')
 |