001/**
002 * Copyright 2015 DuraSpace, Inc.
003 *
004 * Licensed under the Apache License, Version 2.0 (the "License");
005 * you may not use this file except in compliance with the License.
006 * You may obtain a copy of the License at
007 *
008 *     http://www.apache.org/licenses/LICENSE-2.0
009 *
010 * Unless required by applicable law or agreed to in writing, software
011 * distributed under the License is distributed on an "AS IS" BASIS,
012 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
013 * See the License for the specific language governing permissions and
014 * limitations under the License.
015 */
016package org.fcrepo.connector.file;
017
018import static java.lang.System.currentTimeMillis;
019import static org.fcrepo.kernel.FedoraJcrTypes.CONTENT_DIGEST;
020import static org.fcrepo.kernel.FedoraJcrTypes.CONTENT_SIZE;
021import static org.fcrepo.kernel.FedoraJcrTypes.FEDORA_BINARY;
022import static org.fcrepo.kernel.FedoraJcrTypes.FEDORA_CONTAINER;
023import static org.fcrepo.kernel.FedoraJcrTypes.FEDORA_NON_RDF_SOURCE_DESCRIPTION;
024import static org.fcrepo.kernel.FedoraJcrTypes.FEDORA_RESOURCE;
025import static org.fcrepo.kernel.FedoraJcrTypes.JCR_CREATED;
026import static org.fcrepo.kernel.FedoraJcrTypes.JCR_LASTMODIFIED;
027import static org.fcrepo.kernel.utils.ContentDigest.asURI;
028import static org.modeshape.jcr.api.JcrConstants.JCR_DATA;
029import static org.modeshape.jcr.api.JcrConstants.NT_FILE;
030import static org.modeshape.jcr.api.JcrConstants.NT_FOLDER;
031import static org.modeshape.jcr.api.JcrConstants.NT_RESOURCE;
032
033import java.io.File;
034import java.net.URI;
035import java.util.Date;
036import java.util.HashMap;
037import java.io.IOException;
038import java.util.Map;
039
040import com.google.common.annotations.VisibleForTesting;
041
042import org.infinispan.schematic.document.Document;
043import org.modeshape.connector.filesystem.ExternalJsonSidecarExtraPropertyStore;
044import org.modeshape.connector.filesystem.FileSystemConnector;
045import org.modeshape.jcr.api.value.DateTime;
046import org.modeshape.jcr.api.nodetype.NodeTypeManager;
047import org.modeshape.jcr.spi.federation.DocumentChanges;
048import org.modeshape.jcr.spi.federation.DocumentReader;
049import org.modeshape.jcr.spi.federation.DocumentWriter;
050import org.modeshape.jcr.value.BinaryValue;
051import org.modeshape.jcr.value.Name;
052import org.modeshape.jcr.value.Property;
053import org.modeshape.jcr.value.basic.BasicSingleValueProperty;
054import org.slf4j.Logger;
055import org.slf4j.LoggerFactory;
056
057import javax.jcr.NamespaceRegistry;
058import javax.jcr.RepositoryException;
059
060/**
061 * This class extends the {@link FileSystemConnector} to enable the autocreation of Fedora-specific datastream and
062 * content properties.
063 *
064 * @author Andrew Woods
065 *         Date: 1/30/14
066 */
067public class FedoraFileSystemConnector extends FileSystemConnector {
068
069    private static final Logger LOGGER = LoggerFactory.getLogger(FedoraFileSystemConnector.class);
070
071    private static final String DELIMITER = "/";
072    private static final String JCR_CONTENT = "jcr:content";
073    private static final String JCR_CONTENT_SUFFIX = DELIMITER + JCR_CONTENT;
074
075    /**
076     * The string path for a {@link File} object that represents the top-level directory in which properties are
077     * stored.  This is optional for this connector, but if set allows properties to be cached (greatly
078     * improving performance) for even read-only connectors.  When this property is specified the extraPropertiesStore
079     * should be null (not specified) as it would be overridden by this.
080     */
081    private String propertiesDirectoryPath;
082    private File propertiesDirectory;
083
084    @Override
085    public void initialize(final NamespaceRegistry registry,
086                           final NodeTypeManager nodeTypeManager) throws RepositoryException, IOException {
087        super.initialize(registry, nodeTypeManager);
088
089        if (propertiesDirectoryPath != null) {
090           propertiesDirectory = new File(propertiesDirectoryPath);
091            if (!propertiesDirectory.exists() || !propertiesDirectory.isDirectory()) {
092                throw new RepositoryException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath
093                        + ", does not exist or is not a directory.");
094            } else if ( !propertiesDirectory.canRead() || !propertiesDirectory.canWrite() ) {
095                throw new RepositoryException("Configured \"propertiesDirectory\", " + propertiesDirectoryPath
096                        + ", should be readable and writable.");
097            }
098            if (extraPropertiesStore() != null) {
099                LOGGER.warn("Extra properties store was specified but won't be used!");
100            }
101            setExtraPropertiesStore(new ExternalJsonSidecarExtraPropertyStore(this, translator(), propertiesDirectory));
102        }
103    }
104
105    /**
106     * This method returns the object/document for the node with the federated arg 'id'.
107     *
108     * Additionally, this method adds Fedora datastream and content properties to the result of the parent class
109     * implementation.
110     */
111    @Override
112    public Document getDocumentById(final String id) {
113        LOGGER.debug("Getting Federated document: {}", id);
114        if (null == id || id.isEmpty()) {
115            LOGGER.warn("Can not get document with null id");
116            return null;
117        }
118
119        final Document doc = super.getDocumentById(id);
120        if ( doc == null ) {
121            LOGGER.debug("Non-existent node, document is null: {}", id);
122            return doc;
123        }
124
125        final DocumentReader docReader = readDocument(doc);
126        final DocumentWriter docWriter = writeDocument(doc);
127        final long lastmod = fileFor(id).lastModified();
128        LOGGER.debug("Adding lastModified={}", lastmod);
129        docWriter.addProperty(JCR_LASTMODIFIED, lastmod);
130
131        final String primaryType = docReader.getPrimaryTypeName();
132
133        if (!docReader.getMixinTypeNames().contains(FEDORA_RESOURCE)) {
134            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_RESOURCE, id);
135            docWriter.addMixinType(FEDORA_RESOURCE);
136        }
137
138        // Is Fedora Datastream?
139        if (primaryType.equals(NT_FILE)) {
140            decorateDatastreamNode(docReader, docWriter);
141
142        // Is Fedora Content?
143        } else if (primaryType.equals(NT_RESOURCE)) {
144            decorateContentNode(docReader, docWriter, fileFor(id));
145
146        // Is Fedora Object?
147        } else if (primaryType.equals(NT_FOLDER)) {
148            decorateObjectNode(docReader, docWriter);
149        }
150
151        return docWriter.document();
152    }
153
154    /**
155     * Checks whether internally managed properties can and should be stored to
156     * an ExtraPropertiesStore.
157     * @return whether internally managed properties can and should be stored to
158     */
159    protected boolean shouldCacheProperties() {
160        return extraPropertiesStore() != null && (!isReadonly() || this.propertiesDirectory != null);
161    }
162
163    @Override
164    public String sha1(final File file) {
165        final String cachedSha1 = getCachedSha1(file);
166        if (cachedSha1 == null) {
167            return computeAndCacheSha1(file);
168        }
169        return cachedSha1;
170    }
171
172
173    private String getCachedSha1(final File file) {
174        final String id = idFor(file) + JCR_CONTENT_SUFFIX;
175        if (extraPropertiesStore() != null) {
176            final Map<Name, Property> extraProperties = extraPropertiesStore().getProperties(id);
177            final Name digestName = nameFrom(CONTENT_DIGEST);
178            if (extraProperties.containsKey(digestName)) {
179                if (!hasBeenModifiedSincePropertiesWereStored(file, extraProperties.get(nameFrom(JCR_CREATED)))) {
180                    LOGGER.trace("Found sha1 for {} in extra properties store.", id);
181                    final String uriStr = ((URI) extraProperties.get(digestName).getFirstValue()).toString();
182                    return uriStr.substring(uriStr.indexOf("sha1:") + 5);
183                }
184            }
185        } else {
186            LOGGER.trace("No cache configured to contain object hashes.");
187        }
188        return null;
189    }
190
191    private String computeAndCacheSha1(final File file) {
192        final String id = idFor(file) + JCR_CONTENT_SUFFIX;
193        LOGGER.trace("Computing sha1 for {}.", id);
194        final String sha1 = super.sha1(file);
195        if (shouldCacheProperties()) {
196            final Map<Name, Property> updateMap = new HashMap<>();
197            final Property digestProperty = new BasicSingleValueProperty(nameFrom(CONTENT_DIGEST),
198                    asURI("SHA-1", sha1));
199            final Property digestDateProperty = new BasicSingleValueProperty(nameFrom(JCR_CREATED),
200                    factories().getDateFactory().create(file.lastModified()));
201            updateMap.put(digestProperty.getName(), digestProperty);
202            updateMap.put(digestDateProperty.getName(), digestDateProperty);
203            extraPropertiesStore().updateProperties(id, updateMap);
204        }
205        return sha1;
206    }
207
208
209
210    private static void decorateObjectNode(final DocumentReader docReader, final DocumentWriter docWriter) {
211        if (!docReader.getMixinTypeNames().contains(FEDORA_CONTAINER)) {
212            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_CONTAINER, docReader.getDocumentId());
213            docWriter.addMixinType(FEDORA_CONTAINER);
214        }
215    }
216
217    private static void decorateDatastreamNode(final DocumentReader docReader, final DocumentWriter docWriter) {
218        if (!docReader.getMixinTypeNames().contains(FEDORA_NON_RDF_SOURCE_DESCRIPTION)) {
219            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_NON_RDF_SOURCE_DESCRIPTION, docReader.getDocumentId());
220            docWriter.addMixinType(FEDORA_NON_RDF_SOURCE_DESCRIPTION);
221        }
222    }
223
224    private static void decorateContentNode(final DocumentReader docReader,
225                                            final DocumentWriter docWriter,
226                                            final File file) {
227        if (!docReader.getMixinTypeNames().contains(FEDORA_BINARY)) {
228            LOGGER.trace("Adding mixin: {}, to {}", FEDORA_BINARY, docReader.getDocumentId());
229            docWriter.addMixinType(FEDORA_BINARY);
230        }
231
232        if (null == docReader.getProperty(CONTENT_DIGEST)
233                || hasBeenModifiedSincePropertiesWereStored(file, docReader.getProperty(JCR_CREATED))) {
234            final BinaryValue binaryValue = getBinaryValue(docReader);
235            final String dsChecksum = binaryValue.getHexHash();
236            final String dsURI = asURI("SHA-1", dsChecksum).toString();
237
238            LOGGER.trace("Adding {} property of {} to {}", CONTENT_DIGEST, dsURI, docReader.getDocumentId());
239            docWriter.addProperty(CONTENT_DIGEST, dsURI);
240        }
241
242        if (null == docReader.getProperty(CONTENT_SIZE)) {
243            final long binarySize = file.length();
244            LOGGER.trace("Adding {} property of {} to {}", CONTENT_SIZE, binarySize, docReader.getDocumentId());
245            docWriter.addProperty(CONTENT_SIZE, binarySize);
246        }
247
248        LOGGER.debug("Decorated data property at path: {}", docReader.getDocumentId());
249    }
250
251    private static boolean hasBeenModifiedSincePropertiesWereStored(final File file, final Property lastModified) {
252        if (lastModified == null) {
253            LOGGER.trace("Hash for {} has not been computed yet.", file.getName());
254            return true;
255        }
256        final DateTime datetime = (DateTime) lastModified.getFirstValue();
257        if (datetime.toDate().equals(new Date(file.lastModified()))) {
258            return false;
259        }
260        LOGGER.trace("{} has been modified ({}) since hash was last computed ({}).", file.getName(),
261                new Date(file.lastModified()), datetime.toDate());
262        return true;
263    }
264
265    private static BinaryValue getBinaryValue(final DocumentReader docReader) {
266        final Property binaryProperty = docReader.getProperty(JCR_DATA);
267        return (BinaryValue) binaryProperty.getFirstValue();
268    }
269
270    /* Override write operations to also update the parent file's timestamp, so
271       its Last-Modified header correctly reflects changes to children. */
272    @Override
273    public boolean removeDocument( final String id ) {
274        if ( super.removeDocument(id) ) {
275            touchParent(id);
276            return true;
277        }
278        return false;
279    }
280
281    @Override
282    public void storeDocument( final Document document ) {
283        super.storeDocument( document );
284        touchParent(readDocument(document).getDocumentId());
285    }
286
287    @Override
288    public void updateDocument( final DocumentChanges changes ) {
289        super.updateDocument( changes );
290        touchParent( changes.getDocumentId() );
291    }
292
293    /**
294     * Find the parent file, and set its timestamp to the current time.  This
295     * timestamp will be used for populating the Last-Modified header.
296     * @param id the id
297    **/
298    protected void touchParent( final String id ) {
299        if (!isRoot(id)) {
300            final File file = fileFor(id);
301            final File parent = file.getParentFile();
302            parent.setLastModified(currentTimeMillis());
303        }
304    }
305
306    /* Overriding so unit test can mock. */
307    @Override
308    @VisibleForTesting
309    protected File fileFor( final String id ) {
310        return super.fileFor(id);
311    }
312    @Override
313    @VisibleForTesting
314    protected DocumentReader readDocument( final Document document ) {
315        return super.readDocument(document);
316    }
317
318    /* Overriding to make the FedoraFileSystemConnector is always read-only. */
319    @Override
320    public boolean isReadonly() {
321        return true;
322    }
323}