SOLR 4.10.4

* Get Solr

wget http://archive.apache.org/dist/lucene/solr/4.10.4/solr-4.10.4.tgz
tar -xvzf solr-4.10.4.tgz
cp -R solr-4.10.4/example /opt/solr
cp -R solr-4.10.4/contrib /opt/solr/
cp -R solr-4.10.4/dist /opt/solr/

* Install Jetty9 (only for dependencies)

apt-get install jetty9
service jetty9 stop

ln -s /usr/share/jetty9/start.ini /opt/solr/start.ini

cp /etc/default/jetty9 /etc/default/solr
nano -w /etc/default/solr

NO_START=0
JAVA_HOME=/usr/lib/jvm/java-8-oracle
JAVA_OPTIONS="-Xmx6g -Xms6g -Djava.awt.headless=true -Dsolr.solr.home=/opt/solr/solr -Djava.net.preferIPv4Stack=true $JAVA_OPTIONS"
JETTY_USER=jetty
JETTY_HOME=/opt/solr
JETTY_ARGS="jetty.port=8983"
JETTY_LOGS=/opt/solr/logs

cp /usr/share/jetty9/bin/jetty.sh /etc/init.d/solr
chmode +x /etc/init.d/solr

* Prepare solr

mkdir /srv/solr
mkdir /srv/solr/data
chown -R jetty:jetty /srv/solr

cd ~
git clone --recursive https://github.com/discoverygarden/basic-solr-config.git
cd /opt/solr/solr/islandora/conf/
cp schema.xml schema.xml.ORI
cp solrconfig.xml solrconfig.xml.OK
cp stopwords.txt stopwords.txt.ORI
cp -v ~/basic-solr-config/conf/* ./

nano -w /opt/solr/solr/islandora/conf/solrconfig.xml
...
  <lib dir="../../contrib/extraction/lib" regex=".*\.jar" />
  <lib dir="../../dist/" regex="solr-cell-\d.*\.jar" />
 
  <lib dir="../../contrib/clustering/lib/" regex=".*\.jar" />
  <lib dir="../../dist/" regex="solr-clustering-\d.*\.jar" />
 
  <lib dir="../../contrib/langid/lib/" regex=".*\.jar" />
  <lib dir="../../dist/" regex="solr-langid-\d.*\.jar" />
 
  <lib dir="../../contrib/velocity/lib" regex=".*\.jar" />
  <lib dir="../../dist/" regex="solr-velocity-\d.*\.jar" />
...
  <dataDir>${solr.data.dir:/srv/solr/data}</dataDir>
...
nano -w /opt/solr/solr/islandora/conf/schema.xml
...
   <field name="dc.subject" type="string"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dc.subject_dct" type="text"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dc.creator" type="string"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dc.creator_dct" type="text"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dc.publisher" type="string"  indexed="true"  stored="true" multiValued="true"/>
   <field name="dc.publisher_dct" type="text"  indexed="true"  stored="true" multiValued="true"/>
...
<copyField source="dc.subject" dest="dc.subject_dct"/>
<copyField source="dc.creator" dest="dc.creator_dct"/>
<copyField source="dc.publisher" dest="dc.publisher_dct"/>
...

Stopwords and delimiter
In most cases, book language is Italian.

nano -w /opt/solr/solr/islandora/conf/schema.xml
...
    <fieldType name="text_fgs" class="solr.TextField" positionIncrementGap="100">
      <analyzer>
        <tokenizer class="solr.StandardTokenizerFactory"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwordsDC.txt"/>
      </analyzer>
    </fieldType>
...
    <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
      <analyzer type="index">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.HyphenatedWordsFilterFactory"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"
                types="wdfftypes.txt"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"
                types="wdfftypes.txt"/>
        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
        <filter class="solr.LowerCaseFilterFactory"/>
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldType>
cp /opt/solr/etc/jetty.xml /opt/solr/etc/jetty.xml.ORI
nano -w /opt/solr/etc/jetty.xml
    <!-- =========================================================== -->
    <!-- Set handler Collection Structure                            -->
    <!-- =========================================================== -->
    <Set name="handler">
      <New id="Handlers" class="org.eclipse.jetty.server.handler.HandlerCollection">
        <Set name="handlers">
         <Array type="org.eclipse.jetty.server.Handler">
           <Item>
     <!-- here begins the new stuff -->
     <New class="org.eclipse.jetty.server.handler.IPAccessHandler">
       <Call name="addWhite"><Arg>127.0.0.1</Arg></Call>
       <Call name="addWhite"><Arg>150.145.48.48</Arg></Call>
       <Call name="addWhite"><Arg>150.145.48.49</Arg></Call>
       <Set name="handler">
         <!-- here's where you put what was there before: -->
         <New id="Contexts" class="org.eclipse.jetty.server.handler.ContextHandlerCollection"/>
       </Set>
     </New>
     <!-- here ends the new stuff -->
           </Item>
           <Item>
             <New id="DefaultHandler" class="org.eclipse.jetty.server.handler.DefaultHandler"/>
           </Item>
           <Item>
             <New id="RequestLog" class="org.eclipse.jetty.server.handler.RequestLogHandler"/>
           </Item>
         </Array>
        </Set>
      </New>
    </Set>
mv /opt/solr/solr/collection1 /opt/solr/solr/islandora
nano -w /opt/solr/solr/islandora/core.properties
         
         name=islandora

chown -R jetty:jetty /opt/solr
chmode 0775 -R /opt/solr


service solr start

tail -f /opt/solr/logs/solr.log
 
 
reloaded/be_solr.txt ยท Last modified: 2018/04/09 21:39 by giancarlo

Developers: CNR IRCrES IT Office and Library
Giancarlo Birello (giancarlo.birello _@_ ircres.cnr.it) and Anna Perin (anna.perin _@_ ircres.cnr.it)
DigiBess is licensed under: Creative Commons License
Recent changes RSS feed Creative Commons License Valid XHTML 1.0 Valid CSS Driven by DokuWiki
Drupal Garland Theme for Dokuwiki