001/**
002 * Copyright 2015 DuraSpace, Inc.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.fcrepo.connector.file;
017
018import static java.lang.System.currentTimeMillis;
019import static org.fcrepo.kernel.FedoraJcrTypes.CONTENT_DIGEST;
020import static org.fcrepo.kernel.FedoraJcrTypes.CONTENT_SIZE;
021import static org.fcrepo.kernel.FedoraJcrTypes.FEDORA_BINARY;
022import static org.fcrepo.kernel.FedoraJcrTypes.FEDORA_CONTAINER;
023import static org.fcrepo.kernel.FedoraJcrTypes.FEDORA_NON_RDF_SOURCE_DESCRIPTION;
024import static org.fcrepo.kernel.FedoraJcrTypes.FEDORA_RESOURCE;
025import static org.fcrepo.kernel.FedoraJcrTypes.JCR_CREATED;
026import static org.fcrepo.kernel.FedoraJcrTypes.JCR_LASTMODIFIED;
027import static org.fcrepo.kernel.utils.ContentDigest.asURI;
028import static org.modeshape.jcr.api.JcrConstants.JCR_DATA;
029import static org.modeshape.jcr.api.JcrConstants.NT_FILE;
030import static org.modeshape.jcr.api.JcrConstants.NT_FOLDER;
031import static org.modeshape.jcr.api.JcrConstants.NT_RESOURCE;
032
033import java.io.File;
034import java.net.URI;
035import java.util.Date;
036import java.util.HashMap;
037import java.io.IOException;
038import java.util.Map;
039
040import com.google.common.annotations.VisibleForTesting;
041
042import org.infinispan.schematic.document.Document;
043import org.modeshape.connector.filesystem.ExternalJsonSidecarExtraPropertyStore;
044import org.modeshape.connector.filesystem.FileSystemConnector;
045import org.modeshape.jcr.api.value.DateTime;
046import org.modeshape.jcr.api.nodetype.NodeTypeManager;
047import org.modeshape.jcr.spi.federation.DocumentChanges;
048import org.modeshape.jcr.spi.federation.DocumentReader;
049import org.modeshape.jcr.spi.federation.DocumentWriter;
050import org.modeshape.jcr.value.BinaryValue;
051import org.modeshape.jcr.value.Name;
052import org.modeshape.jcr.value.Property;
053import org.modeshape.jcr.value.basic.BasicSingleValueProperty;
054import org.slf4j.Logger;
055import org.slf4j.LoggerFactory;
056
057import javax.jcr.NamespaceRegistry;
058import javax.jcr.RepositoryException;
059
060/**
061 * This class extends the {@link FileSystemConnector} to enable the autocreation of Fedora-specific datastream and
062 * content properties.
063 *
064 * @author Andrew Woods
065 *         Date: 1/30/14
066 */
067public class FedoraFileSystemConnector extends FileSystemConnector {
068
069    private static final Logger LOGGER = LoggerFactory.getLogger(FedoraFileSystemConnector.class);
070
071    private static final String DELIMITER = "/";
072    private static final String JCR_CONTENT = "jcr:content";
073    private static final String JCR_CONTENT_SUFFIX = DELIMITER + JCR_CONTENT;
074
075    /**
076     * The string path for a {@link File} object that represents the top-level directory in which properties are
077     * stored.  This is optional for this connector, but if set allows properties to be cached (greatly
078     * improving performance) for even read-only connectors.  When this property is specified the extraPropertiesStore
079     * should be null (not specified) as it would be overridden by this.
080     */
081    private String propertiesDirectoryPath;
082    private File propertiesDirectory;
083
084    @Override
085    public void initialize(final NamespaceRegistry registry,
086                           final NodeTypeManager nodeTypeManager) throws RepositoryException, IOException {
087        super.initialize(registry, nodeTypeManager);
088
089        if (propertiesDirectoryPath != null) {
090           propertiesDirectory = new File(propertiesDirectoryPath);
091            if (!propertiesDirectory.exists() || !propertiesDirectory.isDirectory()) {
092                throw new RepositoryException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath
093                        + ", does not exist or is not a directory.");
094            } else if ( !propertiesDirectory.canRead() || !propertiesDirectory.canWrite() ) {
095                throw new RepositoryException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath
096                        + ", should be readable and writable.");
097            }
098            if (extraPropertiesStore() != null) {
099                LOGGER.warn("Extra properties store was specified but won't be used!");
100            }
101            setExtraPropertiesStore(new ExternalJsonSidecarExtraPropertyStore(this, translator(), propertiesDirectory));
102        }
103    }
104
105    /**
106     * This method returns the object/document for the node with the federated arg 'id'.
107     *
108     * Additionally, this method adds Fedora datastream and content properties to the result of the parent class
109     * implementation.
110     */
111    @Override
112    public Document getDocumentById(final String id) {
113        LOGGER.debug("Getting Federated document: {}", id);
114        if (null == id || id.isEmpty()) {
115            LOGGER.warn("Can not get document with null id");
116            return null;
117        }
118
119        final Document doc = super.getDocumentById(id);
120        if ( doc == null ) {
121            LOGGER.debug("Non-existent node, document is null: {}", id);
122            return doc;
123        }
124
125        final DocumentReader docReader = readDocument(doc);
126        final DocumentWriter docWriter = writeDocument(doc);
127        final long lastmod = fileFor(id).lastModified();
128        LOGGER.debug("Adding lastModified={}", lastmod);
129        docWriter.addProperty(JCR_LASTMODIFIED, lastmod);
130
131        final String primaryType = docReader.getPrimaryTypeName();
132
133        if (!docReader.getMixinTypeNames().contains(FEDORA_RESOURCE)) {
134            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_RESOURCE, id);
135            docWriter.addMixinType(FEDORA_RESOURCE);
136        }
137
138        // Is Fedora Datastream?
139        if (primaryType.equals(NT_FILE)) {
140            decorateDatastreamNode(docReader, docWriter);
141
142        // Is Fedora Content?
143        } else if (primaryType.equals(NT_RESOURCE)) {
144            decorateContentNode(docReader, docWriter, fileFor(id));
145
146        // Is Fedora Object?
147        } else if (primaryType.equals(NT_FOLDER)) {
148            decorateObjectNode(docReader, docWriter);
149        }
150
151        return docWriter.document();
152    }
153
154    /**
155     * Checks whether internally managed properties can and should be stored to
156     * an ExtraPropertiesStore.
157     */
158    protected boolean shouldCacheProperties() {
159        return extraPropertiesStore() != null && (!isReadonly() || this.propertiesDirectory != null);
160    }
161
162    @Override
163    public String sha1(final File file) {
164        final String cachedSha1 = getCachedSha1(file);
165        if (cachedSha1 == null) {
166            return computeAndCacheSha1(file);
167        }
168        return cachedSha1;
169    }
170
171
172    private String getCachedSha1(final File file) {
173        final String id = idFor(file) + JCR_CONTENT_SUFFIX;
174        if (extraPropertiesStore() != null) {
175            final Map<Name, Property> extraProperties = extraPropertiesStore().getProperties(id);
176            final Name digestName = nameFrom(CONTENT_DIGEST);
177            if (extraProperties.containsKey(digestName)) {
178                if (!hasBeenModifiedSincePropertiesWereStored(file, extraProperties.get(nameFrom(JCR_CREATED)))) {
179                    LOGGER.trace("Found sha1 for {} in extra properties store.", id);
180                    final String uriStr = ((URI) extraProperties.get(digestName).getFirstValue()).toString();
181                    return uriStr.substring(uriStr.indexOf("sha1:") + 5);
182                }
183            }
184        } else {
185            LOGGER.trace("No cache configured to contain object hashes.");
186        }
187        return null;
188    }
189
190    private String computeAndCacheSha1(final File file) {
191        final String id = idFor(file) + JCR_CONTENT_SUFFIX;
192        LOGGER.trace("Computing sha1 for {}.", id);
193        final String sha1 = super.sha1(file);
194        if (shouldCacheProperties()) {
195            final Map<Name, Property> updateMap = new HashMap<>();
196            final Property digestProperty = new BasicSingleValueProperty(nameFrom(CONTENT_DIGEST),
197                    asURI("SHA-1", sha1));
198            final Property digestDateProperty = new BasicSingleValueProperty(nameFrom(JCR_CREATED),
199                    factories().getDateFactory().create(file.lastModified()));
200            updateMap.put(digestProperty.getName(), digestProperty);
201            updateMap.put(digestDateProperty.getName(), digestDateProperty);
202            extraPropertiesStore().updateProperties(id, updateMap);
203        }
204        return sha1;
205    }
206
207
208
209    private static void decorateObjectNode(final DocumentReader docReader, final DocumentWriter docWriter) {
210        if (!docReader.getMixinTypeNames().contains(FEDORA_CONTAINER)) {
211            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_CONTAINER, docReader.getDocumentId());
212            docWriter.addMixinType(FEDORA_CONTAINER);
213        }
214    }
215
216    private static void decorateDatastreamNode(final DocumentReader docReader, final DocumentWriter docWriter) {
217        if (!docReader.getMixinTypeNames().contains(FEDORA_NON_RDF_SOURCE_DESCRIPTION)) {
218            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_NON_RDF_SOURCE_DESCRIPTION, docReader.getDocumentId());
219            docWriter.addMixinType(FEDORA_NON_RDF_SOURCE_DESCRIPTION);
220        }
221    }
222
223    private static void decorateContentNode(final DocumentReader docReader,
224                                            final DocumentWriter docWriter,
225                                            final File file) {
226        if (!docReader.getMixinTypeNames().contains(FEDORA_BINARY)) {
227            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_BINARY, docReader.getDocumentId());
228            docWriter.addMixinType(FEDORA_BINARY);
229        }
230
231        if (null == docReader.getProperty(CONTENT_DIGEST)
232                || hasBeenModifiedSincePropertiesWereStored(file, docReader.getProperty(JCR_CREATED))) {
233            final BinaryValue binaryValue = getBinaryValue(docReader);
234            final String dsChecksum = binaryValue.getHexHash();
235            final String dsURI = asURI("SHA-1", dsChecksum).toString();
236
237            LOGGER.trace("Adding {} property of {} to {}", CONTENT_DIGEST, dsURI, docReader.getDocumentId());
238            docWriter.addProperty(CONTENT_DIGEST, dsURI);
239        }
240
241        if (null == docReader.getProperty(CONTENT_SIZE)) {
242            final long binarySize = file.length();
243            LOGGER.trace("Adding {} property of {} to {}", CONTENT_SIZE, binarySize, docReader.getDocumentId());
244            docWriter.addProperty(CONTENT_SIZE, binarySize);
245        }
246
247        LOGGER.debug("Decorated data property at path: {}", docReader.getDocumentId());
248    }
249
250    private static boolean hasBeenModifiedSincePropertiesWereStored(final File file, final Property lastModified) {
251        if (lastModified == null) {
252            LOGGER.trace("Hash for {} has not been computed yet.", file.getName());
253            return true;
254        }
255        final DateTime datetime = (DateTime) lastModified.getFirstValue();
256        if (datetime.toDate().equals(new Date(file.lastModified()))) {
257            return false;
258        }
259        LOGGER.trace("{} has been modified ({}) since hash was last computed ({}).", file.getName(),
260                new Date(file.lastModified()), datetime.toDate());
261        return true;
262    }
263
264    private static BinaryValue getBinaryValue(final DocumentReader docReader) {
265        final Property binaryProperty = docReader.getProperty(JCR_DATA);
266        return (BinaryValue) binaryProperty.getFirstValue();
267    }
268
269    /* Override write operations to also update the parent file's timestamp, so
270       its Last-Modified header correctly reflects changes to children. */
271    @Override
272    public boolean removeDocument( final String id ) {
273        if ( super.removeDocument(id) ) {
274            touchParent(id);
275            return true;
276        }
277        return false;
278    }
279
280    @Override
281    public void storeDocument( final Document document ) {
282        super.storeDocument( document );
283        touchParent(readDocument(document).getDocumentId());
284    }
285
286    @Override
287    public void updateDocument( final DocumentChanges changes ) {
288        super.updateDocument( changes );
289        touchParent( changes.getDocumentId() );
290    }
291
292    /**
293     * Find the parent file, and set its timestamp to the current time.  This
294     * timestamp will be used for populating the Last-Modified header.
295    **/
296    protected void touchParent( final String id ) {
297        if (!isRoot(id)) {
298            final File file = fileFor(id);
299            final File parent = file.getParentFile();
300            parent.setLastModified(currentTimeMillis());
301        }
302    }
303
304    /* Overriding so unit test can mock. */
305    @Override
306    @VisibleForTesting
307    protected File fileFor( final String id ) {
308        return super.fileFor(id);
309    }
310    @Override
311    @VisibleForTesting
312    protected DocumentReader readDocument( final Document document ) {
313        return super.readDocument(document);
314    }
315
316    /* Overriding to make the FedoraFileSystemConnector is always read-only. */
317    @Override
318    public boolean isReadonly() {
319        return true;
320    }
321}