001/**
002 * Copyright 2015 DuraSpace, Inc.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.fcrepo.connector.file;
017
018import static java.lang.System.currentTimeMillis;
019import static org.fcrepo.kernel.api.FedoraJcrTypes.CONTENT_DIGEST;
020import static org.fcrepo.kernel.api.FedoraJcrTypes.CONTENT_SIZE;
021import static org.fcrepo.kernel.api.FedoraJcrTypes.FEDORA_BINARY;
022import static org.fcrepo.kernel.api.FedoraJcrTypes.FEDORA_CONTAINER;
023import static org.fcrepo.kernel.api.FedoraJcrTypes.FEDORA_NON_RDF_SOURCE_DESCRIPTION;
024import static org.fcrepo.kernel.api.FedoraJcrTypes.FEDORA_RESOURCE;
025import static org.fcrepo.kernel.api.FedoraJcrTypes.JCR_CREATED;
026import static org.fcrepo.kernel.api.FedoraJcrTypes.JCR_LASTMODIFIED;
027import static org.fcrepo.kernel.api.utils.ContentDigest.asURI;
028import static org.modeshape.jcr.api.JcrConstants.JCR_DATA;
029import static org.modeshape.jcr.api.JcrConstants.NT_FILE;
030import static org.modeshape.jcr.api.JcrConstants.NT_FOLDER;
031import static org.modeshape.jcr.api.JcrConstants.NT_RESOURCE;
032
033import java.io.File;
034import java.net.URI;
035import java.util.Date;
036import java.util.HashMap;
037import java.io.IOException;
038import java.util.Map;
039
040import com.google.common.annotations.VisibleForTesting;
041
042import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
043import org.infinispan.schematic.document.Document;
044import org.modeshape.connector.filesystem.FileSystemConnector;
045import org.modeshape.jcr.api.value.DateTime;
046import org.modeshape.jcr.api.nodetype.NodeTypeManager;
047import org.modeshape.jcr.spi.federation.DocumentChanges;
048import org.modeshape.jcr.spi.federation.DocumentReader;
049import org.modeshape.jcr.spi.federation.DocumentWriter;
050import org.modeshape.jcr.value.BinaryValue;
051import org.modeshape.jcr.value.Name;
052import org.modeshape.jcr.value.Property;
053import org.modeshape.jcr.value.basic.BasicSingleValueProperty;
054import org.slf4j.Logger;
055import org.slf4j.LoggerFactory;
056
057import javax.jcr.NamespaceRegistry;
058import javax.jcr.RepositoryException;
059
060/**
061 * This class extends the {@link FileSystemConnector} to enable the autocreation of Fedora-specific datastream and
062 * content properties.
063 *
064 * @author Andrew Woods
065 *         Date: 1/30/14
066 */
067public class FedoraFileSystemConnector extends FileSystemConnector {
068
069    private static final Logger LOGGER = LoggerFactory.getLogger(FedoraFileSystemConnector.class);
070
071    private static final String DELIMITER = "/";
072    private static final String JCR_CONTENT = "jcr:content";
073    private static final String JCR_CONTENT_SUFFIX = DELIMITER + JCR_CONTENT;
074
075    /**
076     * The string path for a {@link File} object that represents the top-level directory in which properties are
077     * stored.  This is optional for this connector, but if set allows properties to be cached (greatly
078     * improving performance) for even read-only connectors.  When this property is specified the extraPropertiesStore
079     * should be null (not specified) as it would be overridden by this.
080     */
081    private String propertiesDirectoryPath;
082    private File propertiesDirectory;
083
084    @Override
085    public void initialize(final NamespaceRegistry registry,
086                           final NodeTypeManager nodeTypeManager) throws IOException {
087        try {
088            super.initialize(registry, nodeTypeManager);
089        } catch (final RepositoryException e) {
090            throw new RepositoryRuntimeException("Error initializing FedoraFileSystemConnector!", e);
091        }
092
093        if (propertiesDirectoryPath != null) {
094           propertiesDirectory = new File(propertiesDirectoryPath);
095            if (!propertiesDirectory.exists() || !propertiesDirectory.isDirectory()) {
096                throw new RepositoryRuntimeException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath
097                        + ", does not exist or is not a directory.");
098            } else if ( !propertiesDirectory.canRead() || !propertiesDirectory.canWrite() ) {
099                throw new RepositoryRuntimeException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath
100                        + ", should be readable and writable.");
101            }
102            if (extraPropertiesStore() != null) {
103                LOGGER.warn("Extra properties store was specified but won't be used!");
104            }
105            setExtraPropertiesStore(new ExternalJsonSidecarExtraPropertyStore(this, translator(), propertiesDirectory));
106        }
107    }
108
109    /**
110     * This method returns the object/document for the node with the federated arg 'id'.
111     *
112     * Additionally, this method adds Fedora datastream and content properties to the result of the parent class
113     * implementation.
114     */
115    @Override
116    public Document getDocumentById(final String id) {
117        LOGGER.debug("Getting Federated document: {}", id);
118        if (null == id || id.isEmpty()) {
119            LOGGER.warn("Can not get document with null id");
120            return null;
121        }
122
123        final Document doc = super.getDocumentById(id);
124        if ( doc == null ) {
125            LOGGER.debug("Non-existent node, document is null: {}", id);
126            return doc;
127        }
128
129        final DocumentReader docReader = readDocument(doc);
130        final DocumentWriter docWriter = writeDocument(doc);
131        final long lastmod = fileFor(id).lastModified();
132        LOGGER.debug("Adding lastModified={}", lastmod);
133        docWriter.addProperty(JCR_LASTMODIFIED, lastmod);
134
135        final String primaryType = docReader.getPrimaryTypeName();
136
137        if (!docReader.getMixinTypeNames().contains(FEDORA_RESOURCE)) {
138            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_RESOURCE, id);
139            docWriter.addMixinType(FEDORA_RESOURCE);
140        }
141
142        // Is Fedora Datastream?
143        if (primaryType.equals(NT_FILE)) {
144            decorateDatastreamNode(docReader, docWriter);
145
146        // Is Fedora Content?
147        } else if (primaryType.equals(NT_RESOURCE)) {
148            decorateContentNode(docReader, docWriter, fileFor(id));
149
150        // Is Fedora Object?
151        } else if (primaryType.equals(NT_FOLDER)) {
152            decorateObjectNode(docReader, docWriter);
153        }
154
155        return docWriter.document();
156    }
157
158    /**
159     * Checks whether internally managed properties can and should be stored to
160     * an ExtraPropertiesStore.
161     * @return whether internally managed properties can and should be stored to
162     */
163    protected boolean shouldCacheProperties() {
164        return extraPropertiesStore() != null && (!isReadonly() || this.propertiesDirectory != null);
165    }
166
167
168    /**
169     * Pass-thru to the parent class in order to make this function public
170     *
171     * @param id the node ID to test
172     * @return whether the id corresponds to the root location
173     */
174    @Override
175    public boolean isRoot(final String id) {
176        return super.isRoot(id);
177    }
178
179    /**
180     * Pass-thru to the parent class in order to make this function public
181     *
182     * @param file the file used to compute a sha1 hash
183     * @return the sha1 hash of the file contents
184     */
185    @Override
186    public String sha1(final File file) {
187        final String cachedSha1 = getCachedSha1(file);
188        if (cachedSha1 == null) {
189            return computeAndCacheSha1(file);
190        }
191        return cachedSha1;
192    }
193
194    private String getCachedSha1(final File file) {
195        final String id = idFor(file) + JCR_CONTENT_SUFFIX;
196        if (extraPropertiesStore() != null) {
197            final Map<Name, Property> extraProperties = extraPropertiesStore().getProperties(id);
198            final Name digestName = nameFrom(CONTENT_DIGEST);
199            if (extraProperties.containsKey(digestName)) {
200                if (!hasBeenModifiedSincePropertiesWereStored(file, extraProperties.get(nameFrom(JCR_CREATED)))) {
201                    LOGGER.trace("Found sha1 for {} in extra properties store.", id);
202                    final String uriStr = ((URI) extraProperties.get(digestName).getFirstValue()).toString();
203                    return uriStr.substring(uriStr.indexOf("sha1:") + 5);
204                }
205            }
206        } else {
207            LOGGER.trace("No cache configured to contain object hashes.");
208        }
209        return null;
210    }
211
212    private String computeAndCacheSha1(final File file) {
213        final String id = idFor(file) + JCR_CONTENT_SUFFIX;
214        LOGGER.trace("Computing sha1 for {}.", id);
215        final String sha1 = super.sha1(file);
216        if (shouldCacheProperties()) {
217            final Map<Name, Property> updateMap = new HashMap<>();
218            final Property digestProperty = new BasicSingleValueProperty(nameFrom(CONTENT_DIGEST),
219                    asURI("SHA-1", sha1));
220            final Property digestDateProperty = new BasicSingleValueProperty(nameFrom(JCR_CREATED),
221                    factories().getDateFactory().create(file.lastModified()));
222            updateMap.put(digestProperty.getName(), digestProperty);
223            updateMap.put(digestDateProperty.getName(), digestDateProperty);
224            extraPropertiesStore().updateProperties(id, updateMap);
225        }
226        return sha1;
227    }
228
229    private static void decorateObjectNode(final DocumentReader docReader, final DocumentWriter docWriter) {
230        if (!docReader.getMixinTypeNames().contains(FEDORA_CONTAINER)) {
231            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_CONTAINER, docReader.getDocumentId());
232            docWriter.addMixinType(FEDORA_CONTAINER);
233        }
234    }
235
236    private static void decorateDatastreamNode(final DocumentReader docReader, final DocumentWriter docWriter) {
237        if (!docReader.getMixinTypeNames().contains(FEDORA_NON_RDF_SOURCE_DESCRIPTION)) {
238            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_NON_RDF_SOURCE_DESCRIPTION, docReader.getDocumentId());
239            docWriter.addMixinType(FEDORA_NON_RDF_SOURCE_DESCRIPTION);
240        }
241    }
242
243    private static void decorateContentNode(final DocumentReader docReader,
244                                            final DocumentWriter docWriter,
245                                            final File file) {
246        if (!docReader.getMixinTypeNames().contains(FEDORA_BINARY)) {
247            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_BINARY, docReader.getDocumentId());
248            docWriter.addMixinType(FEDORA_BINARY);
249        }
250
251        if (null == docReader.getProperty(CONTENT_DIGEST)
252                || hasBeenModifiedSincePropertiesWereStored(file, docReader.getProperty(JCR_CREATED))) {
253            final BinaryValue binaryValue = getBinaryValue(docReader);
254            final String dsChecksum = binaryValue.getHexHash();
255            final String dsURI = asURI("SHA-1", dsChecksum).toString();
256
257            LOGGER.trace("Adding {} property of {} to {}", CONTENT_DIGEST, dsURI, docReader.getDocumentId());
258            docWriter.addProperty(CONTENT_DIGEST, dsURI);
259        }
260
261        if (null == docReader.getProperty(CONTENT_SIZE)) {
262            final long binarySize = file.length();
263            LOGGER.trace("Adding {} property of {} to {}", CONTENT_SIZE, binarySize, docReader.getDocumentId());
264            docWriter.addProperty(CONTENT_SIZE, binarySize);
265        }
266
267        LOGGER.debug("Decorated data property at path: {}", docReader.getDocumentId());
268    }
269
270    private static boolean hasBeenModifiedSincePropertiesWereStored(final File file, final Property lastModified) {
271        if (lastModified == null) {
272            LOGGER.trace("Hash for {} has not been computed yet.", file.getName());
273            return true;
274        }
275        final DateTime datetime = (DateTime) lastModified.getFirstValue();
276        if (datetime.toDate().equals(new Date(file.lastModified()))) {
277            return false;
278        }
279        LOGGER.trace("{} has been modified ({}) since hash was last computed ({}).", file.getName(),
280                new Date(file.lastModified()), datetime.toDate());
281        return true;
282    }
283
284    private static BinaryValue getBinaryValue(final DocumentReader docReader) {
285        final Property binaryProperty = docReader.getProperty(JCR_DATA);
286        return (BinaryValue) binaryProperty.getFirstValue();
287    }
288
289    /* Override write operations to also update the parent file's timestamp, so
290       its Last-Modified header correctly reflects changes to children. */
291    @Override
292    public boolean removeDocument( final String id ) {
293        if ( super.removeDocument(id) ) {
294            touchParent(id);
295            return true;
296        }
297        return false;
298    }
299
300    @Override
301    public void storeDocument( final Document document ) {
302        super.storeDocument( document );
303        touchParent(readDocument(document).getDocumentId());
304    }
305
306    @Override
307    public void updateDocument( final DocumentChanges changes ) {
308        super.updateDocument( changes );
309        touchParent( changes.getDocumentId() );
310    }
311
312    /**
313     * Find the parent file, and set its timestamp to the current time.  This
314     * timestamp will be used for populating the Last-Modified header.
315     * @param id the id
316    **/
317    protected void touchParent( final String id ) {
318        if (!isRoot(id)) {
319            final File file = fileFor(id);
320            final File parent = file.getParentFile();
321            parent.setLastModified(currentTimeMillis());
322        }
323    }
324
325    /* Overriding so unit test can mock. */
326    @Override
327    @VisibleForTesting
328    protected File fileFor( final String id ) {
329        return super.fileFor(id);
330    }
331    @Override
332    @VisibleForTesting
333    protected DocumentReader readDocument( final Document document ) {
334        return super.readDocument(document);
335    }
336
337    /* Overriding to make the FedoraFileSystemConnector is always read-only. */
338    @Override
339    public boolean isReadonly() {
340        return true;
341    }
342
343    @Override
344    public boolean isContentNode(final String id) {
345        return super.isContentNode(id);
346    }
347
348}