001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.fcrepo.connector.file;
019
020import static java.lang.System.currentTimeMillis;
021import static org.fcrepo.kernel.api.FedoraTypes.CONTENT_DIGEST;
022import static org.fcrepo.kernel.api.FedoraTypes.CONTENT_SIZE;
023import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_BINARY;
024import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_CONTAINER;
025import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_NON_RDF_SOURCE_DESCRIPTION;
026import static org.fcrepo.kernel.api.FedoraTypes.FEDORA_RESOURCE;
027import static org.fcrepo.kernel.api.utils.ContentDigest.asURI;
028import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.JCR_CREATED;
029import static org.fcrepo.kernel.modeshape.FedoraJcrConstants.JCR_LASTMODIFIED;
030import static org.modeshape.jcr.api.JcrConstants.JCR_DATA;
031import static org.modeshape.jcr.api.JcrConstants.NT_FILE;
032import static org.modeshape.jcr.api.JcrConstants.NT_FOLDER;
033import static org.modeshape.jcr.api.JcrConstants.NT_RESOURCE;
034
035import java.io.File;
036import java.net.URI;
037import java.util.Date;
038import java.util.HashMap;
039import java.io.IOException;
040import java.util.Map;
041
042import com.google.common.annotations.VisibleForTesting;
043
044import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
045import org.infinispan.schematic.document.Document;
046import org.modeshape.connector.filesystem.FileSystemConnector;
047import org.modeshape.jcr.api.value.DateTime;
048import org.modeshape.jcr.api.nodetype.NodeTypeManager;
049import org.modeshape.jcr.spi.federation.DocumentChanges;
050import org.modeshape.jcr.spi.federation.DocumentReader;
051import org.modeshape.jcr.spi.federation.DocumentWriter;
052import org.modeshape.jcr.value.BinaryValue;
053import org.modeshape.jcr.value.Name;
054import org.modeshape.jcr.value.Property;
055import org.modeshape.jcr.value.basic.BasicSingleValueProperty;
056import org.slf4j.Logger;
057import org.slf4j.LoggerFactory;
058
059import javax.jcr.NamespaceRegistry;
060import javax.jcr.RepositoryException;
061
062/**
063 * This class extends the {@link FileSystemConnector} to enable the autocreation of Fedora-specific datastream and
064 * content properties.
065 *
066 * @author Andrew Woods
067 *         Date: 1/30/14
068 */
069public class FedoraFileSystemConnector extends FileSystemConnector {
070
071    private static final Logger LOGGER = LoggerFactory.getLogger(FedoraFileSystemConnector.class);
072
073    private static final String DELIMITER = "/";
074    private static final String JCR_CONTENT = "jcr:content";
075    private static final String JCR_CONTENT_SUFFIX = DELIMITER + JCR_CONTENT;
076
077    /**
078     * The string path for a {@link File} object that represents the top-level directory in which properties are
079     * stored.  This is optional for this connector, but if set allows properties to be cached (greatly
080     * improving performance) for even read-only connectors.  When this property is specified the extraPropertiesStore
081     * should be null (not specified) as it would be overridden by this.
082     */
083    private String propertiesDirectoryPath;
084    private File propertiesDirectory;
085
086    @Override
087    public void initialize(final NamespaceRegistry registry,
088                           final NodeTypeManager nodeTypeManager) throws IOException {
089        LOGGER.warn("FedoraFileSystemConnector will be removed from the core of Fedora in a coming release. "
090            + "See https://jira.duraspace.org/browse/FCREPO-2028 for more information.");
091        try {
092            super.initialize(registry, nodeTypeManager);
093        } catch (final RepositoryException e) {
094            throw new RepositoryRuntimeException("Error initializing FedoraFileSystemConnector!", e);
095        }
096
097        if (propertiesDirectoryPath != null) {
098           propertiesDirectory = new File(propertiesDirectoryPath);
099            if (!propertiesDirectory.exists() || !propertiesDirectory.isDirectory()) {
100                throw new RepositoryRuntimeException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath
101                        + ", does not exist or is not a directory.");
102            } else if ( !propertiesDirectory.canRead() || !propertiesDirectory.canWrite() ) {
103                throw new RepositoryRuntimeException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath
104                        + ", should be readable and writable.");
105            }
106            if (extraPropertiesStore() != null) {
107                LOGGER.warn("Extra properties store was specified but won't be used!");
108            }
109            setExtraPropertiesStore(new ExternalJsonSidecarExtraPropertyStore(this, translator(), propertiesDirectory));
110        }
111    }
112
113    /**
114     * This method returns the object/document for the node with the federated arg 'id'.
115     *
116     * Additionally, this method adds Fedora datastream and content properties to the result of the parent class
117     * implementation.
118     */
119    @Override
120    public Document getDocumentById(final String id) {
121        LOGGER.debug("Getting Federated document: {}", id);
122        if (null == id || id.isEmpty()) {
123            LOGGER.warn("Can not get document with null id");
124            return null;
125        }
126
127        final Document doc = super.getDocumentById(id);
128        if ( doc == null ) {
129            LOGGER.debug("Non-existent node, document is null: {}", id);
130            return doc;
131        }
132
133        final DocumentReader docReader = readDocument(doc);
134        final DocumentWriter docWriter = writeDocument(doc);
135        final long lastmod = fileFor(id).lastModified();
136        LOGGER.debug("Adding lastModified={}", lastmod);
137        docWriter.addProperty(JCR_LASTMODIFIED, lastmod);
138
139        final String primaryType = docReader.getPrimaryTypeName();
140
141        if (!docReader.getMixinTypeNames().contains(FEDORA_RESOURCE)) {
142            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_RESOURCE, id);
143            docWriter.addMixinType(FEDORA_RESOURCE);
144        }
145
146        // Is Fedora Datastream?
147        if (primaryType.equals(NT_FILE)) {
148            decorateDatastreamNode(docReader, docWriter);
149
150        // Is Fedora Content?
151        } else if (primaryType.equals(NT_RESOURCE)) {
152            decorateContentNode(docReader, docWriter, fileFor(id));
153
154        // Is Fedora Object?
155        } else if (primaryType.equals(NT_FOLDER)) {
156            decorateObjectNode(docReader, docWriter);
157        }
158
159        return docWriter.document();
160    }
161
162    /**
163     * Checks whether internally managed properties can and should be stored to
164     * an ExtraPropertiesStore.
165     * @return whether internally managed properties can and should be stored to
166     */
167    protected boolean shouldCacheProperties() {
168        return extraPropertiesStore() != null && (!isReadonly() || this.propertiesDirectory != null);
169    }
170
171
172    /**
173     * Pass-thru to the parent class in order to make this function public
174     *
175     * @param id the node ID to test
176     * @return whether the id corresponds to the root location
177     */
178    @Override
179    public boolean isRoot(final String id) {
180        return super.isRoot(id);
181    }
182
183    /**
184     * Pass-thru to the parent class in order to make this function public
185     *
186     * @param file the file used to compute a sha1 hash
187     * @return the sha1 hash of the file contents
188     */
189    @Override
190    public String sha1(final File file) {
191        final String cachedSha1 = getCachedSha1(file);
192        if (cachedSha1 == null) {
193            return computeAndCacheSha1(file);
194        }
195        return cachedSha1;
196    }
197
198    private String getCachedSha1(final File file) {
199        final String id = idFor(file) + JCR_CONTENT_SUFFIX;
200        if (extraPropertiesStore() != null) {
201            final Map<Name, Property> extraProperties = extraPropertiesStore().getProperties(id);
202            final Name digestName = nameFrom(CONTENT_DIGEST);
203            if (extraProperties.containsKey(digestName)) {
204                if (!hasBeenModifiedSincePropertiesWereStored(file, extraProperties.get(nameFrom(JCR_CREATED)))) {
205                    LOGGER.trace("Found sha1 for {} in extra properties store.", id);
206                    final String uriStr = ((URI) extraProperties.get(digestName).getFirstValue()).toString();
207                    return uriStr.substring(uriStr.indexOf("sha1:") + 5);
208                }
209            }
210        } else {
211            LOGGER.trace("No cache configured to contain object hashes.");
212        }
213        return null;
214    }
215
216    private String computeAndCacheSha1(final File file) {
217        final String id = idFor(file) + JCR_CONTENT_SUFFIX;
218        LOGGER.trace("Computing sha1 for {}.", id);
219        final String sha1 = super.sha1(file);
220        if (shouldCacheProperties()) {
221            final Map<Name, Property> updateMap = new HashMap<>();
222            final Property digestProperty = new BasicSingleValueProperty(nameFrom(CONTENT_DIGEST),
223                    asURI("SHA-1", sha1));
224            final Property digestDateProperty = new BasicSingleValueProperty(nameFrom(JCR_CREATED),
225                    factories().getDateFactory().create(file.lastModified()));
226            updateMap.put(digestProperty.getName(), digestProperty);
227            updateMap.put(digestDateProperty.getName(), digestDateProperty);
228            extraPropertiesStore().updateProperties(id, updateMap);
229        }
230        return sha1;
231    }
232
233    private static void decorateObjectNode(final DocumentReader docReader, final DocumentWriter docWriter) {
234        if (!docReader.getMixinTypeNames().contains(FEDORA_CONTAINER)) {
235            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_CONTAINER, docReader.getDocumentId());
236            docWriter.addMixinType(FEDORA_CONTAINER);
237        }
238    }
239
240    private static void decorateDatastreamNode(final DocumentReader docReader, final DocumentWriter docWriter) {
241        if (!docReader.getMixinTypeNames().contains(FEDORA_NON_RDF_SOURCE_DESCRIPTION)) {
242            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_NON_RDF_SOURCE_DESCRIPTION, docReader.getDocumentId());
243            docWriter.addMixinType(FEDORA_NON_RDF_SOURCE_DESCRIPTION);
244        }
245    }
246
247    private static void decorateContentNode(final DocumentReader docReader,
248                                            final DocumentWriter docWriter,
249                                            final File file) {
250        if (!docReader.getMixinTypeNames().contains(FEDORA_BINARY)) {
251            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_BINARY, docReader.getDocumentId());
252            docWriter.addMixinType(FEDORA_BINARY);
253        }
254
255        if (null == docReader.getProperty(CONTENT_DIGEST)
256                || hasBeenModifiedSincePropertiesWereStored(file, docReader.getProperty(JCR_CREATED))) {
257            final BinaryValue binaryValue = getBinaryValue(docReader);
258            final String dsChecksum = binaryValue.getHexHash();
259            final String dsURI = asURI("SHA-1", dsChecksum).toString();
260
261            LOGGER.trace("Adding {} property of {} to {}", CONTENT_DIGEST, dsURI, docReader.getDocumentId());
262            docWriter.addProperty(CONTENT_DIGEST, dsURI);
263        }
264
265        if (null == docReader.getProperty(CONTENT_SIZE)) {
266            final long binarySize = file.length();
267            LOGGER.trace("Adding {} property of {} to {}", CONTENT_SIZE, binarySize, docReader.getDocumentId());
268            docWriter.addProperty(CONTENT_SIZE, binarySize);
269        }
270
271        LOGGER.debug("Decorated data property at path: {}", docReader.getDocumentId());
272    }
273
274    private static boolean hasBeenModifiedSincePropertiesWereStored(final File file, final Property lastModified) {
275        if (lastModified == null) {
276            LOGGER.trace("Hash for {} has not been computed yet.", file.getName());
277            return true;
278        }
279        final DateTime datetime = (DateTime) lastModified.getFirstValue();
280        if (datetime.toDate().equals(new Date(file.lastModified()))) {
281            return false;
282        }
283        LOGGER.trace("{} has been modified ({}) since hash was last computed ({}).", file.getName(),
284                new Date(file.lastModified()), datetime.toDate());
285        return true;
286    }
287
288    private static BinaryValue getBinaryValue(final DocumentReader docReader) {
289        final Property binaryProperty = docReader.getProperty(JCR_DATA);
290        return (BinaryValue) binaryProperty.getFirstValue();
291    }
292
293    /* Override write operations to also update the parent file's timestamp, so
294       its Last-Modified header correctly reflects changes to children. */
295    @Override
296    public boolean removeDocument( final String id ) {
297        if ( super.removeDocument(id) ) {
298            touchParent(id);
299            return true;
300        }
301        return false;
302    }
303
304    @Override
305    public void storeDocument( final Document document ) {
306        super.storeDocument( document );
307        touchParent(readDocument(document).getDocumentId());
308    }
309
310    @Override
311    public void updateDocument( final DocumentChanges changes ) {
312        super.updateDocument( changes );
313        touchParent( changes.getDocumentId() );
314    }
315
316    /**
317     * Find the parent file, and set its timestamp to the current time.  This
318     * timestamp will be used for populating the Last-Modified header.
319     * @param id the id
320    **/
321    protected void touchParent( final String id ) {
322        if (!isRoot(id)) {
323            final File file = fileFor(id);
324            final File parent = file.getParentFile();
325            parent.setLastModified(currentTimeMillis());
326        }
327    }
328
329    /* Overriding so unit test can mock. */
330    @Override
331    @VisibleForTesting
332    protected File fileFor( final String id ) {
333        return super.fileFor(id);
334    }
335    @Override
336    @VisibleForTesting
337    protected DocumentReader readDocument( final Document document ) {
338        return super.readDocument(document);
339    }
340
341    /* Overriding to make the FedoraFileSystemConnector is always read-only. */
342    @Override
343    public boolean isReadonly() {
344        return true;
345    }
346
347    @Override
348    public boolean isContentNode(final String id) {
349        return super.isContentNode(id);
350    }
351
352}