001/*
002 * Licensed to DuraSpace under one or more contributor license agreements.
003 * See the NOTICE file distributed with this work for additional information
004 * regarding copyright ownership.
005 *
006 * DuraSpace licenses this file to you under the Apache License,
007 * Version 2.0 (the "License"); you may not use this file except in
008 * compliance with the License.  You may obtain a copy of the License at
009 *
010 *     http://www.apache.org/licenses/LICENSE-2.0
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS,
014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
015 * See the License for the specific language governing permissions and
016 * limitations under the License.
017 */
018package org.fcrepo.persistence.common;
019
020import static java.lang.String.format;
021import static org.apache.commons.codec.binary.Hex.encodeHexString;
022import static org.apache.commons.lang3.StringUtils.substringAfterLast;
023import static org.fcrepo.kernel.api.utils.ContentDigest.getAlgorithm;
024
025import java.io.IOException;
026import java.io.InputStream;
027import java.net.URI;
028import java.security.DigestInputStream;
029import java.security.MessageDigest;
030import java.security.NoSuchAlgorithmException;
031import java.util.Collection;
032import java.util.HashMap;
033import java.util.List;
034import java.util.Map;
035import java.util.Map.Entry;
036import java.util.stream.Collectors;
037
038import org.fcrepo.kernel.api.exception.InvalidChecksumException;
039import org.fcrepo.kernel.api.exception.RepositoryRuntimeException;
040import org.fcrepo.kernel.api.exception.UnsupportedAlgorithmException;
041import org.fcrepo.kernel.api.utils.ContentDigest;
042import org.fcrepo.config.DigestAlgorithm;
043
044/**
045 * Wrapper for an InputStream that allows for the computation and evaluation
046 * of multiple digests at once
047 *
048 * @author bbpennel
049 */
050public class MultiDigestInputStreamWrapper {
051
052    private final InputStream sourceStream;
053
054    private final Map<String, String> algToDigest;
055
056    private final Map<String, DigestInputStream> algToDigestStream;
057
058    private boolean streamRetrieved;
059
060    private Map<String, String> computedDigests;
061
062    /**
063     * Construct a MultiDigestInputStreamWrapper
064     *
065     * @param sourceStream the original source input stream
066     * @param digests collection of digests for the input stream
067     * @param wantDigests list of additional digest algorithms to compute for the input stream
068     */
069    public MultiDigestInputStreamWrapper(final InputStream sourceStream, final Collection<URI> digests,
070            final Collection<DigestAlgorithm> wantDigests) {
071        this.sourceStream = sourceStream;
072        algToDigest = new HashMap<>();
073        algToDigestStream = new HashMap<>();
074
075        if (digests != null) {
076            for (final URI digestUri : digests) {
077                final String algorithm = getAlgorithm(digestUri);
078                final String hash = substringAfterLast(digestUri.toString(), ":");
079                algToDigest.put(algorithm, hash);
080            }
081        }
082
083        // Merge the list of wanted digest algorithms with set of provided digests
084        if (wantDigests != null) {
085            for (final DigestAlgorithm wantDigest : wantDigests) {
086                if (!algToDigest.containsKey(wantDigest.getAlgorithm())) {
087                    algToDigest.put(wantDigest.getAlgorithm(), null);
088                }
089            }
090        }
091    }
092
093    /**
094     * Get the InputStream wrapped to produce the requested digests
095     *
096     * @return wrapped input stream
097     */
098    public InputStream getInputStream() {
099        streamRetrieved = true;
100        InputStream digestStream = sourceStream;
101        for (final String algorithm : algToDigest.keySet()) {
102            try {
103                // Progressively wrap the original stream in layers of digest streams
104                digestStream = new DigestInputStream(
105                        digestStream, MessageDigest.getInstance(algorithm));
106            } catch (final NoSuchAlgorithmException e) {
107                throw new UnsupportedAlgorithmException("Unsupported digest algorithm: " + algorithm, e);
108            }
109
110            algToDigestStream.put(algorithm, (DigestInputStream) digestStream);
111        }
112        return digestStream;
113    }
114
115    /**
116     * After consuming the inputstream, verify that all of the computed digests
117     * matched the provided digests.
118     *
119     * Note: the wrapped InputStream will be consumed if it has not already been read.
120     *
121     * @throws InvalidChecksumException thrown if any of the digests did not match
122     */
123    public void checkFixity() throws InvalidChecksumException {
124        calculateDigests();
125
126        algToDigest.forEach((algorithm, originalDigest) -> {
127            // Skip any algorithms which were calculated but no digest was provided for verification
128            if (originalDigest == null) {
129                return;
130            }
131            final String computed = computedDigests.get(algorithm);
132
133            if (!originalDigest.equalsIgnoreCase(computed)) {
134                throw new InvalidChecksumException(format(
135                        "Checksum mismatch, computed %s digest %s did not match expected value %s",
136                        algorithm, computed, originalDigest));
137            }
138        });
139
140    }
141
142    /**
143     * Returns the list of digests calculated for the wrapped InputStream
144     *
145     * Note: the wrapped InputStream will be consumed if it has not already been read.
146     *
147     * @return list of digests calculated from the wrapped InputStream, in URN format.
148     */
149    public List<URI> getDigests() {
150        calculateDigests();
151
152        return computedDigests.entrySet().stream()
153                .map(e -> ContentDigest.asURI(e.getKey(), e.getValue()))
154                .collect(Collectors.toList());
155    }
156
157    /**
158     * Get the digest calculated for the provided algorithm
159     *
160     * @param alg algorithm of the digest to retrieve
161     * @return the calculated digest, or null if no digest of that type was calculated
162     */
163    public String getDigest(final DigestAlgorithm alg) {
164        calculateDigests();
165
166        return computedDigests.entrySet().stream()
167                .filter(entry -> alg.getAlgorithm().equals(entry.getKey()))
168                .map(Entry::getValue)
169                .findFirst()
170                .orElse(null);
171    }
172
173    private void calculateDigests() {
174        if (computedDigests != null) {
175            return;
176        }
177
178        if (!streamRetrieved) {
179            // Stream not previously consumed, consume it now in order to calculate digests
180            try (final InputStream is = getInputStream()) {
181                while (is.read() != -1) {
182                }
183            } catch (final IOException e) {
184                throw new RepositoryRuntimeException("Failed to read content stream while calculating digests", e);
185            }
186        }
187
188        computedDigests = new HashMap<>();
189        algToDigestStream.forEach((algorithm, digestStream) -> {
190            final String computed = encodeHexString(digestStream.getMessageDigest().digest());
191            computedDigests.put(algorithm, computed);
192        });
193    }
194}