solr webapp - BloomReach Experience - Open Source CMS

This article covers a Hippo CMS version 11. There's an updated version available that covers our most recent release.

solr webapp

The Solr webapp is basically just the webapp that

  1. has a webapp/WEB-INF dir that contains all mandatory Solr files (and some mandatory Hippo parts, see below)

  2. pom that pulls in the solr-core

solr webapp/WEB-INF

This folder must contain all the files from the example project from the Solr distribution. If you build with the archetype in the future, this will all be set up correctly for you. The most important part about the solr files is, is that the HST hst-solr-content-beans DocumentObjectBinder assumes some mandatory parts in the schema.xml. The following parts in the schema.xml are mandatory :

<!-- ***** DYNAMIC FIELDS NEEDED FOR HIPPO COMPOUND BEAN INDEXING ***  -->
<dynamicField name="*_multiple_compound_mi" type="int"     indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_multiple_compound_ms" type="string"  indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_multiple_compound_ml" type="long"    indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_multiple_compound_mt" type="text_general"
              indexed="true"  stored="false" multiValued="true"/>
<dynamicField name="*_multiple_compound_mb" type="boolean" indexed="true"
              stored="false"  multiValued="true"/>
<dynamicField name="*_multiple_compound_mf" type="float"   indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_multiple_compound_md" type="double"  indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_multiple_compound_mdt" type="tdate"  indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_multiple_compound_i" type="int"      indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_multiple_compound_s" type="string"   indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_multiple_compound_l" type="long"     indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_multiple_compound_t" type="text_general" indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_multiple_compound_b" type="boolean"  indexed="true"
              stored="false"  multiValued="true"/>
<dynamicField name="*_multiple_compound_f" type="float"    indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_multiple_compound_d" type="double"   indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_multiple_compound_dt" type="tdate"   indexed="true"
              stored="false" multiValued="true"/>

<dynamicField name="*_compound_mi"  type="int"    indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_compound_ms"  type="string"  indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_compound_ml"  type="long"   indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_compound_mt"  type="text_general" indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_compound_mb"  type="boolean" indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_compound_mf"  type="float"  indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_compound_md"  type="double" indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_compound_mdt"  type="tdate" indexed="true"
              stored="false" multiValued="true"/>
<dynamicField name="*_compound_i"  type="int"    indexed="true"
              stored="false"/>
<dynamicField name="*_compound_s"  type="string"  indexed="true"
              stored="false"/>
<dynamicField name="*_compound_l"  type="long"   indexed="true"
              stored="false"/>
<dynamicField name="*_compound_t"  type="text_general" indexed="true"
              stored="false"/>
<dynamicField name="*_compound_b"  type="boolean" indexed="true"
  stored="false"/>
<dynamicField name="*_compound_f"  type="float"  indexed="true"
  stored="false"/>
<dynamicField name="*_compound_d"  type="double" indexed="true"
  stored="false"/>
<dynamicField name="*_compound_dt"  type="tdate" indexed="true"
  stored="false"/>

<!-- ***************** END DYNAMIC FIELDS NEEDED FOR HIPPO COMPOUND
                       BEAN INDEXING ******************  -->

For correct indexing of HTML contained in Hippo documents, we also advice to use the solr.HTMLStripCharFilterFactory during indexing of text. Thus:

<fieldType name="text_general" class="solr.TextField"
           positionIncrementGap="100">
      <analyzer type="index">
        <!-- By default, we skip HTML tags during indexing -->
        <charFilter class="solr.HTMLStripCharFilterFactory"
                    escapedTags="a, b, body, br, button, caption, center,
                                 cite, code, col, dd, em, fieldset, font,
                                 form, frame, frameset, h1, h2, h3, h4, h5,
                                 h6, head, hr, html, i, iframe, img, input,
                                 label, legend, li, link, map, meta, object,
                                 ol, option, p, param, pre, q, s, script,
                                 select, span, strike, strong, style, sub,
                                 sup, table, tbody, td, textarea, tfoot, th,
                                 thead, title, tr, tt, u, ul, var" />
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
                words="stopwords.txt" enablePositionIncrements="true" />
        <!-- in this example, we will only use synonyms at query time
        <filter class="solr.SynonymFilterFactory"
                synonyms="index_synonyms.txt" ignoreCase="true"
                expand="false"/>
        -->
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true"
                       words="stopwords.txt"
                       enablePositionIncrements="true" />
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt"
                ignoreCase="true" expand="true"/>
        <filter class="solr.LowerCaseFilterFactory"/>
      </analyzer>
</fieldType>

Typically, though possibly different per use case per customer, the schema.xml then also contains copyField configuration to index some fields also to the default search field. For example:

<!-- copyField commands copy one field to another at the time a document
        is added to the index.  It's used either to index the same field
        differently, or to add multiple fields to the same field for
        easier/faster searching.  -->

  <copyField source="name" dest="text"/>
  <copyField source="localizedName" dest="text"/>
  <copyField source="title" dest="text"/>
  <copyField source="summary" dest="text"/>
  <copyField source="brand" dest="text"/>
  <copyField source="product" dest="text"/>
  <copyField source="color" dest="text"/>
  <copyField source="product" dest="text"/>
  <copyField source="type" dest="text"/>
  <copyField source="tags" dest="text"/>
  <copyField source="categories" dest="text"/>

  <!-- HIPPO SPECIFIC : EXPLCICIT AND DYNAMIC TEXT COMPOUND FIELDS WE BY
       DEFAULT ADD to 'text' FIELD -->

  <copyField source="hippostdContent_multiple_compound_t" dest="text"
             maxChars="3000"/>
  <copyField source="hippostdContent_compound_t" dest="text"
             maxChars="3000"/>

  <copyField source="*_multiple_compound_ms" dest="text" maxChars="500"/>
  <copyField source="*_multiple_compound_s" dest="text" maxChars="500"/>
  <copyField source="*_multiple_compound_mt" dest="text" maxChars="3000"/>
  <copyField source="*_multiple_compound_t" dest="text" maxChars="3000"/>
  <copyField source="*_compound_ms" dest="text" maxChars="500"/>
  <copyField source="*_compound_s" dest="text" maxChars="500"/>
  <copyField source="*_compound_mt" dest="text" maxChars="3000"/>
  <copyField source="*_compound_t" dest="text" maxChars="3000"/>

  <!-- END HIPPO SPECIFIC : DYNAMIC TEXT COMPOUND FIELDS WE BY DEFAULT ADD
       to 'text' FIELD -->

Also, you might want to use explicit mapping for some compound fields instead of the dynamic *_multiple_compound_t. So, for example:

    <!-- ****************** OPTIONAL EXPLICIT FIELDS FOR HIPPO COMPOUND BEAN
                            HTML INDEXING ******************  -->

    <!-- set STORED to false if NO highlighting is needed
         html fields are copied to 'text' hence not indexed here directly
         -->
    <field name="hippostdContent_multiple_compound_t"  type="text_general"
           indexed="false"  stored="true" multiValued="true"/>
    <field name="hippostdContent_compound_t"  type="text_general"
           indexed="false"  stored="true"/>

    <!-- ****************** OPTIONAL EXPLICIT FIELDS FOR HIPPO COMPOUND
                            BEAN HTML INDEXING **************  -->

The Solr pom.xml

Typically, the Solr pom.xml looks as follows:

<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">

  <modelVersion>4.0.0</modelVersion>
  <parent>
    <groupId>org.example</groupId>
    <artifactId>myhippoproject</artifactId>
    <version>1.01.00-SNAPSHOT</version>
  </parent>

  <name>My Hippo Project solr</name>
  <description>My Hippo Project Solr</description>
  <artifactId>myhippoproject-solr</artifactId>
  <packaging>war</packaging>

  <dependencies>

    <dependency>
      <groupId>commons-lang</groupId>
      <artifactId>commons-lang</artifactId>
    </dependency>

    <dependency>
      <groupId>org.apache.solr</groupId>
      <artifactId>solr-core</artifactId>
      <version>${dependency.solr.version}</version>
      <scope>runtime</scope>
    </dependency>

  </dependencies>

  <build>
    <finalName>solr</finalName>
  </build>
</project>
Did you find this page helpful?
How could this documentation serve you better?
On this page
    Did you find this page helpful?
    How could this documentation serve you better?