From 17ea75600009a9eb63f9914fa17e651c6bf32793 Mon Sep 17 00:00:00 2001 From: Holger Brunn Date: Thu, 26 Jul 2012 15:42:50 +0200 Subject: [PATCH 01/53] [ADD] initial commit --- fts_base/__init__.py | 23 ++ fts_base/__openerp__.py | 34 +++ fts_base/fts_base.py | 262 +++++++++++++++++++++++ fts_base/fts_proxy.py | 177 +++++++++++++++ fts_base/fts_proxy.xml | 48 +++++ fts_base/images/fulltextsearch-hover.png | Bin 0 -> 1713 bytes fts_base/images/fulltextsearch.png | Bin 0 -> 948 bytes 7 files changed, 544 insertions(+) create mode 100644 fts_base/__init__.py create mode 100644 fts_base/__openerp__.py create mode 100644 fts_base/fts_base.py create mode 100644 fts_base/fts_proxy.py create mode 100644 fts_base/fts_proxy.xml create mode 100644 fts_base/images/fulltextsearch-hover.png create mode 100644 fts_base/images/fulltextsearch.png diff --git a/fts_base/__init__.py b/fts_base/__init__.py new file mode 100644 index 0000000..fdd1661 --- /dev/null +++ b/fts_base/__init__.py @@ -0,0 +1,23 @@ +# -*- coding: utf-8 -*- +############################################################################## +# +# OpenERP, Open Source Management Solution +# This module copyright (C) 2012 Therp BV (). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +############################################################################## + +import fts_proxy +import fts_base diff --git a/fts_base/__openerp__.py b/fts_base/__openerp__.py new file mode 100644 index 0000000..fa76ef7 --- /dev/null +++ b/fts_base/__openerp__.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +############################################################################## +# +# OpenERP, Open Source Management Solution +# This module copyright (C) 2012 Therp BV (). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +############################################################################## + +{ + "name": "Fulltext search", + "version": "1.0", + "depends": ["base"], + "author": "Therp BV", + "category": "Searching", + "description": "Base module for fulltext search.", + "init_xml": [], + 'update_xml': ["fts_proxy.xml"], + 'demo_xml': [], + 'installable': True, + 'active': False, +} diff --git a/fts_base/fts_base.py b/fts_base/fts_base.py new file mode 100644 index 0000000..dad297d --- /dev/null +++ b/fts_base/fts_base.py @@ -0,0 +1,262 @@ +# -*- coding: utf-8 -*- +############################################################################## +# +# OpenERP, Open Source Management Solution +# This module copyright (C) 2012 Therp BV (). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +############################################################################## +try: + from openerp import SUPERUSER_ID +except: + SUPERUSER_ID = 1 + +class fts_base_meta(type): + + _plugins = [] + + def __init__(self, name, bases, attrs): + if name != 'fts_base': + + cr = self.pool.db.cursor() + self._plugins.append(self(self.pool, cr)) + cr.commit() + cr.close() + + super(fts_base_meta, self).__init__(name, bases, attrs) + +class fts_base(object): + """This is the base class for modules implementing fulltext searches. + If you want to mess around with ORM functions, you probably want to go to + fts_proxy + + To define your own search operator, + """ + + __metaclass__ = fts_base_meta + + _model = None + """The model this search works on. Required.""" + + _indexed_column = None + """The column this search works on. Required. + If this is a list of strings, all of them will be indexed for the fulltext + search. + """ + + _table = None + """The table this search works on. Will be deduced from model if + not set.""" + + _tsvector_column = None + """The column holding tsvector data. Will be created on init. + If not set, it will be ${_indexed_column}_tsvector.""" + + _tsvector_column_index = None + """The name of the index for _tsvector_column. + If not set, it will be ${_indexed_column}_idx.""" + + _tsvector_column_trigger = None + """The name of the trigger to update _tsvector_column when _indexed_column + is updated. + If not set, it will be ${_indexed_column}_trigger.""" + + _tsconfig = 'pg_catalog.simple' + """The fulltext config (=language) to be used. Will be read from + properties if they exist: A specific one for the current module, then + fts_base.""" + + _title_column = 'name' + """The column to be shown as title of a match. This can be an arbitrary SQL + expression""" + + _disable_seqscan = True + """The postgresql query planner (as of 9.0) chooses against using the query + planner way too often. This forces hin to use it which improves speed in all + tested cases. Disable (and report) if this causes problems for you.""" + + def __init__(self, pool, cr): + """Assign default values and create _tsvector_column if necessary.""" + if not self._table: + self._table = pool.get(self._model)._table + + if not self._tsvector_column: + self._tsvector_column = (self._indexed_column + if isinstance(self._indexed_column, str) + else '_'.join(self._indexed_column)) + '_tsvector' + + if not self._tsvector_column_index: + self._tsvector_column_index = self._tsvector_column + '_idx' + + if not self._tsvector_column_trigger: + self._tsvector_column_trigger = self._tsvector_column + '_trigger' + + self._create_tsvector_column(pool, cr) + + def _create_tsvector_column(self, pool, cr): + """Create the column to hold tsvector data.""" + + if (self._model is None or self._tsvector_column is None or + self._column_exists(cr, self._table, self._tsvector_column)): + return + + cr.execute(''' + ALTER TABLE "%(table)s" ADD COLUMN "%(tsvector_column)s" + tsvector''' % + { + 'tsvector_column': self._tsvector_column, + 'table': self._table, + }) + + self._create_tsvector_column_index(pool, cr) + self._create_indexed_column_trigger(pool, cr) + pool.get('fts.proxy').create_init_tsvector_cronjob(cr, SUPERUSER_ID, + self) + + def _create_tsvector_column_index(self, pool, cr): + """Create an index on _tsvector_column. + Override if you want something else than gin.""" + + cr.execute(''' + CREATE INDEX "%(tsvector_column_index)s" ON "%(table)s" USING + gin("%(tsvector_column)s")''' % + { + 'tsvector_column_index': self._tsvector_column_index, + 'tsvector_column': self._tsvector_column, + 'table': self._table, + }) + + + def _create_indexed_column_trigger(self, pool, cr): + """Create a trigger for changes to _indexed_column""" + + cr.execute(''' + CREATE TRIGGER "%(tsvector_column_trigger)s" BEFORE INSERT OR UPDATE + ON "%(table)s" FOR EACH ROW EXECUTE PROCEDURE + tsvector_update_trigger("%(tsvector_column)s", '%(language)s', + "%(indexed_column)s")''' % + { + 'tsvector_column': self._tsvector_column, + 'tsvector_column_trigger': self._tsvector_column_trigger, + 'table': self._table, + 'language': self._tsconfig, + 'indexed_column': (self._indexed_column + if isinstance(self._indexed_column, str) + else '","'.join(self._indexed_column)) + }) + + def _init_tsvector_column(self, pool, cr): + """Fill _tsvector_column. This can take a long time and is called in a + cronjob. + Override if you want to have more than just one column indexed. In that + case you probably also have to override + _create_indexed_column_trigger""" + + cr.execute(''' + UPDATE "%(table)s" SET "%(tsvector_column)s"= + to_tsvector('%(language)s', %(indexed_column)s)''' % + { + 'tsvector_column': self._tsvector_column, + 'table': self._table, + 'language': self._tsconfig, + 'indexed_column': ('"' + self._indexed_column + '"' + if isinstance(self._indexed_column, str) + else reduce(lambda x, y: ('' if x is None else + (x + " || ' ' || ") + ) + + "coalesce(\"" + y + "\", '')", + self._indexed_column)), + }) + + def _column_exists(self, cr, table, column): + """Check if a columns exists in a table""" + + cr.execute("""SELECT column_name + FROM information_schema.columns + WHERE table_name='%(table)s' and column_name='%(column)s'""" % + {'table': table, 'column': column}) + return cr.rowcount == 1 + + + def search(self, cr, uid, args, order=None, context=None, count=False, + searchstring=None): + """The actual search function. Create fts.proxy objects and returns + their ids. + Override if you need more than full text matching against the query + string""" + + res = [] + proxy_obj = self.pool.get('fts.proxy') + + if self._disable_seqscan: + cr.execute('set enable_seqscan=off') + + cr.execute( + ( + "SELECT " + + ( + "count(*)" if count else + """ + id, + ts_rank(%(tsvector_column)s, + plainto_tsquery('%(language)s', %%(searchstring)s)), + %(title_column)s, + """ + + ( + """ + ts_headline('%(language)s', %(indexed_column)s, + plainto_tsquery('%(language)s', %%(searchstring)s), + 'StartSel = *, StopSel = *')""" + if context.get('fts_summary') + else 'null' + ) + ) + + """ + FROM %(table)s WHERE %(tsvector_column)s @@ + plainto_tsquery('%(language)s', %%(searchstring)s)""" + ) % + { + 'tsvector_column': self._tsvector_column, + 'table': self._table, + 'language': self._tsconfig, + 'indexed_column': ('"' + self._indexed_column + '"' + if isinstance(self._indexed_column, str) + else reduce(lambda x, y: ('' if x is None else + (x + " || ' ' || ") + ) + + "coalesce(\"" + y + "\", '')", + self._indexed_column)), + 'title_column': self._title_column, + }, + {'searchstring': searchstring}) + + for row in cr.fetchall(): + + if count: + return row[0] + + res.append(proxy_obj.create(cr, uid, + { + 'model': self._model, + 'res_id': row[0], + 'rank': row[1], + 'name': row[2], + 'summary': row[3], + })) + + if self._disable_seqscan: + cr.execute('set enable_seqscan=on') + + return res diff --git a/fts_base/fts_proxy.py b/fts_base/fts_proxy.py new file mode 100644 index 0000000..7b2c466 --- /dev/null +++ b/fts_base/fts_proxy.py @@ -0,0 +1,177 @@ +# -*- coding: utf-8 -*- +############################################################################## +# +# OpenERP, Open Source Management Solution +# This module copyright (C) 2012 Therp BV (). +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU Affero General Public License as +# published by the Free Software Foundation, either version 3 of the +# License, or (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Affero General Public License for more details. +# +# You should have received a copy of the GNU Affero General Public License +# along with this program. If not, see . +# +############################################################################## + +# 6.0 compatibility +try: + from openerp.osv.orm import TransientModel + from openerp.osv import fields + from openerp.tools import DEFAULT_SERVER_DATETIME_FORMAT + from openerp import SUPERUSER_ID + from openerp import tools +except: + from osv.osv import osv_memory as TransientModel + import osv.fields as fields + DEFAULT_SERVER_DATETIME_FORMAT = "%Y-%m-%d %H:%M:%S" + SUPERUSER_ID = 1 + import tools + +from fts_base import fts_base_meta +from fts_base import fts_base +from datetime import datetime + +class fts_proxy(TransientModel): + + _name = 'fts.proxy' + + _columns = { + 'name': fields.char('Name', size=256), + 'text': fields.function(lambda *args: {}, string='Searchstring', + type='text', + fnct_search=lambda *args: {}), + 'model': fields.char('Model', size=256, translate=True), + 'res_id': fields.integer('Res ID'), + 'rank': fields.float('Rank'), + 'summary': fields.text('Summary') + } + + _order = 'rank DESC, model ASC' + + _fts_models = {} + + def __init__(self, pool, cr): + + fts_base.pool = pool + + # 6.0 compatibility + if not hasattr(pool, 'db'): + import pooler + fts_base.pool.__dict__['db'] = pooler.get_db(cr.dbname) + + return super(fts_proxy, self).__init__(pool, cr) + + @tools.cache() + def search(self, cr, uid, args, offset=0, limit=None, order=None, + context=None, count=False): + + searchstring = '' + models = [] + + for arg in args: + if arg[0] == 'text' and arg[1] == 'ilike': + searchstring = arg[2] + if arg[0] == 'model' and arg[1] == '=': + models.append(arg[2]) + + if not searchstring: + return [] + + res = 0 if count else [] + + #TODO: if this search is limited, it is probably about scrolling and + #we have cached the results of the initial nonlimited search. So return + #that. Should look something like + #if self.search.lookup(self, cr, args, 0, None, order, context, count)) + #and works only in openerp6.1 + + import logging + logger = logging.getLogger('fulltextsearch') + logger.debug('offset: %s limit: %s order=%s count=%s' % (offset, limit, + order, count)) + logger.debug('args ' + str(args)) + logger.debug('context ' + str(context)) + + #TODO: context may contain info which document types are interesting + #only call search for that ones in this case + for search_plugin in fts_base_meta._plugins: + + if models and search_plugin._model not in models: + continue + + logger.debug('plugin ' + str(search_plugin)) + res += search_plugin.search(cr, uid, args, order=order, + context=context, count=count, + searchstring=searchstring) + + logger.debug('finished') + + if count: + return res + + #TODO: cache ids of results for scolling (offset > 0). that should use + #some kind of hash over the search parameters + return super(fts_proxy, self).search(cr, uid, [('id', 'in', res)], + offset=offset, + limit=limit, order=order, + context=context) + + def open_document(self, cr, uid, ids, context=None): + action_data = False + if ids: + result = self.browse(cr, uid, ids[0], context=context) + model = result.model + res_id = result.res_id + + ir_act_window = self.pool.get('ir.actions.act_window') + action_ids = ir_act_window.search(cr, uid, [('res_model', '=', model), ('view_type', '=', 'form')]) + if action_ids: + action_data = ir_act_window.read(cr, uid, action_ids[0], context=context) + action_data.update({ + 'domain' : "[('id','=',%d)]" % (res_id), + 'nodestroy': True, + 'context': {}, + 'res_id': res_id, + }) + + return action_data + + def create_init_tsvector_cronjob(self, cr, uid, fts_object): + + fts_classname = (fts_object.__class__.__module__ + '.' + + fts_object.__class__.__name__) + + self.pool.get('ir.cron').create(cr, SUPERUSER_ID, + { + 'name': 'fulltextsearch init ' + fts_classname, + 'nextcall' : datetime.now().strftime( + DEFAULT_SERVER_DATETIME_FORMAT), + 'model': self._name, + 'function': 'init_tsvector_cronjob', + 'args': "('" + fts_classname + "',)", + 'interval_type': False, + 'interval_number': False, + }) + + def init_tsvector_cronjob(self, cr, uid, fts_classname, context=None): + + import logging + logger = logging.getLogger('fts_cronjob') + logger.info('looking for search plugin ' + fts_classname) + + for search_plugin in fts_base_meta._plugins: + if (search_plugin.__class__.__module__ + '.' + + search_plugin.__class__.__name__) == fts_classname: + + logger.info('running _init_tsvector_column for ' + fts_classname) + search_plugin._init_tsvector_column(self.pool, cr) + logger.info('finished') + +# 6.0 compatibility +fts_proxy() diff --git a/fts_base/fts_proxy.xml b/fts_base/fts_proxy.xml new file mode 100644 index 0000000..b2967af --- /dev/null +++ b/fts_base/fts_proxy.xml @@ -0,0 +1,48 @@ + + + + + fts_proxy.search + fts.proxy + search + + + + + + + + + + + + + fts_proxy.tree + fts.proxy + tree + + +