coreseek 分布式索引 与 单索引

技术文档 - PHP文档 作者: xcheng1986 发布于:2014-06-20 阅读(1739) (暂无评论)  

单索引 配置文件(分表)
#数据源
source work_union
{
    type                       = mysql
    sql_host                   = 192.168.2.121
    sql_user                   = root
    sql_pass                   = 123456
    sql_db                     = hr_resume_center_new
    sql_port                   = 3306
    sql_query_pre              = SET NAMES utf8
    sql_query_pre    	       = REPLACE INTO delta_work SELECT 100,999
    sql_query                  = (SELECT id, resume_id, start, end, company_name FROM hr_resume_work_0) \
                                  UNION all (SELECT id, resume_id, start, end, company_name FROM hr_resume_work_1) \
                                  UNION all (SELECT id, resume_id, start, end, company_name FROM hr_resume_work_2) \
                                  UNION all (SELECT id, resume_id, start, end, company_name FROM hr_resume_work_3) 
    sql_attr_uint              = resume_id
    sql_attr_timestamp         = start
    sql_attr_timestamp         = end
    sql_query_info_pre         = SET NAMES utf8 
    sql_query_info             = (SELECT id, resume_id, start, end, company_name,department,jobs FROM hr_resume_work_0) \
                                 UNION all (SELECT id, resume_id, start, end, company_name,department,jobs FROM hr_resume_work_1) \
                                 UNION all (SELECT id, resume_id, start, end, company_name,department,jobs FROM hr_resume_work_2) \
                                 UNION all (SELECT id, resume_id, start, end, company_name,department,jobs FROM hr_resume_work_3) \
                                 WHERE id=$id 
}

#索引                         
index index_work_union                  
{                             
    source                    = work_union
    path                      = /data/coreseek/3306/3306_work_test_union/master/index
    docinfo                   = extern
    mlock                     = 0
    morphology                = none
    min_word_len              = 1
    html_strip                = 0
    #中文分词配置             
    charset_dictpath          = /usr/local/mmseg3/etc/
    charset_type              = zh_cn.utf-8
	ngram_len                 = 0
}

indexer
{
    mem_limit                 = 5000M
}

searchd
{
    listen                    = 15313
    read_timeout              = 5
    max_children              = 30
    max_matches               = 1000
    seamless_rotate           = 0
    preopen_indexes           = 0
    unlink_old                = 1
	compat_sphinxql_magics    = 0 # 关闭
    pid_file                  = /usr/local/coreseek/var/log_3306/3306_work_test_union/searchd_mysql_work_0.pid
    log                       = /usr/local/coreseek/var/log_3306/3306_work_test_union/searchd_mysql_work_0.log 
    query_log                 = /usr/local/coreseek/var/log_3306/3306_work_test_union/query_mysql_work_0.log 
    binlog_path               = # 关闭binlog日志
}
下面是分布式索引节点和总代理的配置文件
下面是4个节点索引中的一个,其余3个基本一致。省略。
#数据源
source work_0
{
    type                       = mysql
    sql_host                   = 192.168.2.121
    sql_user                   = root
    sql_pass                   = 123456
    sql_db                     = hr_resume_center_new
    sql_port                   = 3306
    sql_query_pre              = SET NAMES utf8
    sql_query_pre		       = REPLACE INTO delta_work SELECT 0,MAX(sys_time) FROM hr_resume_work_0  # 注:delta_work一张表记录hr_resume_work_*的十张表的最大sys_time 
    sql_query                  = SELECT id, resume_id, start, end, company_name FROM hr_resume_work_0 where sys_time<=( SELECT sys_time FROM delta_work WHERE id=0 ) 
    sql_attr_uint              = resume_id
    sql_attr_timestamp         = start
    sql_attr_timestamp         = end
    sql_query_info_pre         = SET NAMES utf8 
    sql_query_info             = SELECT id, resume_id, start, end, company_name,department,jobs FROM hr_resume_work_0 WHERE id=$id 
}                             
                              
#数据源增量                   
source work_delta_0 : work_0           
{                             
    sql_query_pre 		       = SET NAMES utf8
    sql_query 			       = SELECT id, resume_id, start, end, company_name FROM hr_resume_work_0 where sys_time > ( SELECT sys_time FROM delta_work WHERE id=0 ) 
	sql_query_post_index       = REPLACE INTO delta_work SELECT 0,MAX(sys_time) FROM hr_resume_work_0
}                             

#索引                         
index index_work_0                    
{                             
    source                    = work_0
    path                      = /data/coreseek/3306/3306_work_0/master/index
    docinfo                   = extern
    mlock                     = 0
    morphology                = none
    min_word_len              = 1
    html_strip                = 0
    #中文分词配置             
    charset_dictpath          = /usr/local/mmseg3/etc/
    charset_type              = zh_cn.utf-8
	ngram_len                 = 0
}

#索引增量
index delta_work_0 : index_work_0
{
    source                    = work_delta_0
    path                      = /data/coreseek/3306/3306_work_0/delta/index
}

indexer
{
    mem_limit                 = 512M
}

searchd
{
    listen                    = 5313
    read_timeout              = 5
    max_children              = 30
    max_matches               = 1000
    seamless_rotate           = 0
    preopen_indexes           = 0
    unlink_old                = 1
	compat_sphinxql_magics    = 0 # 关闭
    pid_file                  = /usr/local/coreseek/var/log_3306/log_work_0/searchd_mysql_work_0.pid
    log                       = /usr/local/coreseek/var/log_3306/log_work_0/searchd_mysql_work_0.log 
    query_log                 = /usr/local/coreseek/var/log_3306/log_work_0/query_mysql_work_0.log 
    binlog_path               = # 关闭binlog日志
}
总代理
index dist
{
	type				= distributed
	
	agent = 192.168.2.121:5313:index_work_0
	agent = 192.168.2.121:5314:index_work_1
	agent = 192.168.2.121:5315:index_work_2
	agent = 192.168.2.121:5316:index_work_3
	
	agent_query_timeout 		= 100000
}

indexer
{
    mem_limit           = 5000M
}
searchd
{
    listen              	= 15312
    read_timeout        	= 5
    max_children        	= 30
    max_matches         	= 6000
    seamless_rotate     	= 1
    preopen_indexes     	= 1
    unlink_old          	= 1
    compat_sphinxql_magics	= 0
    query_log_format 		= sphinxql
    pid_file				= /usr/local/coreseek/var/log_work/searchd_mysql.pid
    log 					= /usr/local/coreseek/var/log_work/searchd_mysql.log
    query_log 				= /usr/local/coreseek/var/log_work/query_mysql.log
    #workers            	= threads
    dist_threads 			= 6
}