||
Skip to end of metadata
Go to start of metadata

WSO2 G-Reg uses Apache Solr to support the Registry search feature. To support the Solr search, all Registry resources saved in the RDMS are indexed using a periodic task. You need to extract the content of the resource to index the resource content. The logic of extracting content of the resource varies from resource type. Therefore, Apache Solr provides an extension point to write your own custom logic to create an index document for each resource. 

Follow the steps below to create a custom indexer.

  1. Create an Apache Maven project, and add the following dependencies and WSO2 Maven repository in the pom.xml file of your project:

    <dependencies>
            <dependency>
                <groupId>org.wso2.carbon</groupId>
                <artifactId>org.wso2.carbon.registry.core</artifactId>
                <version>4.2.0</version>
                <scope>provided</scope>
            </dependency>
            <dependency>
                <groupId>org.wso2.carbon</groupId>
                <artifactId>org.wso2.carbon.registry.indexing</artifactId>
                <version>4.2.0</version>
                <scope>provided</scope>
            </dependency>
        </dependencies>
    
        <repositories>
            <repository>
                <id>wso2-nexus</id>
                <name>WSO2 internal Repository</name>
                <url>http://maven.wso2.org/nexus/content/groups/wso2-public/</url>
                <releases>
                    <enabled>true</enabled>
                    <updatePolicy>daily</updatePolicy>
                    <checksumPolicy>ignore</checksumPolicy>
                </releases>
            </repository>
    
            <repository>
                <id>wso2.releases</id>
                <name>WSO2 internal Repository</name>
                <url>http://maven.wso2.org/nexus/content/repositories/releases/</url>
                <releases>
                    <enabled>true</enabled>
                    <updatePolicy>daily</updatePolicy>
                    <checksumPolicy>ignore</checksumPolicy>
                </releases>
            </repository>
        </repositories
  2. Create a Java class implementing the indexer interface (i.e., org.wso2.carbon.registry.indexing.indexer.Indexer), and overriding the getIndexedDocument() method. Include the logic of extracting the resource content in this. For example, 

    package org.wso2.carbon.registry.indexer;
    
    import org.apache.commons.logging.Log;
    import org.apache.commons.logging.LogFactory;
    import org.apache.solr.common.SolrException;
    import org.wso2.carbon.registry.core.exceptions.RegistryException;
    import org.wso2.carbon.registry.core.utils.RegistryUtils;
    import org.wso2.carbon.registry.indexing.AsyncIndexer;
    import org.wso2.carbon.registry.indexing.IndexingConstants;
    import org.wso2.carbon.registry.indexing.indexer.Indexer;
    import org.wso2.carbon.registry.indexing.solr.IndexDocument;
    
    import java.util.Arrays;
    import java.util.HashMap;
    import java.util.List;
    import java.util.Map;
    
    public class TextIndexer implements Indexer {
        public static final Log log = LogFactory.getLog(TextIndexer.class);
    
        @Override
        public IndexDocument getIndexedDocument(AsyncIndexer.File2Index fileData) throws SolrException, RegistryException {
    
            if (log.isDebugEnabled()) {
                log.debug("Registry Text Indexer is running");
            }
    
            return getPreProcessedDocument(fileData);
        }
    
        private IndexDocument getPreProcessedDocument(AsyncIndexer.File2Index fileData) throws RegistryException {
            String jsonAsString = RegistryUtils.decodeBytes(fileData.data);
    
            IndexDocument indexDocument = new IndexDocument(fileData.path, jsonAsString,
                    null);
            Map<String, List<String>> attributes = new HashMap<String, List<String>>();
            if (fileData.mediaType != null) {
                attributes.put(IndexingConstants.FIELD_MEDIA_TYPE, Arrays.asList(fileData.mediaType.toLowerCase()));
            }
            if (fileData.lcState != null) {
                attributes.put(IndexingConstants.FIELD_LC_STATE, Arrays.asList(fileData.lcState.toLowerCase()));
            }
            if (fileData.lcName != null) {
                attributes.put(IndexingConstants.FIELD_LC_NAME, Arrays.asList(fileData.lcName.toLowerCase()));
            }
            if (fileData.path != null) {
                attributes.put("overview_name", Arrays.asList(RegistryUtils.getResourceName(fileData.path).toLowerCase()));
            }
            indexDocument.setFields(attributes);
            return indexDocument;
        }
    }
  3. Build the project, and add built JAR file into the <G-Reg_HOME>/repository/components/dropins/ directory.
  4. Add the below configuration within the <indexers> element in the <G-Reg_HOME>/repository/conf/registry.xml file as shown below.

        <indexingConfiguration>
            ............
            <indexers>
                ..........
                <indexer class=”org.wso2.carbon.registry.indexer.TextIndexer” mediaTypeRegEx=”application/text\+plain” profiles=”default,uddi-registry”/>
            </indexers>
            <exclusions>
                <exclusion pathRegEx="/_system/config/repository/dashboards/gadgets/swfobject1-5/.*[.]html"/>
                <exclusion pathRegEx="/_system/local/repository/components/org[.]wso2[.]carbon[.]registry/mount/.*"/>
            </exclusions>
        </indexingConfiguration>
  5. Start the WSO2 G-Reg server. The custom indexer you added will apply when you add/update a resource with the media-type mapped for the indexer in the <G-Reg_HOME>/repository/conf/registry.xml file.
  • No labels