package org.kitesdk.morphline.saxon;

import com.ctc.wstx.cfg.XmlConsts;
import com.google.common.base.Charsets;
import com.typesafe.config.Config;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.nio.charset.Charset;
import java.util.Collection;
import java.util.Collections;
import org.ccil.cowan.tagsoup.HTMLSchema;
import org.ccil.cowan.tagsoup.Parser;
import org.ccil.cowan.tagsoup.XMLWriter;
import org.kitesdk.morphline.api.Command;
import org.kitesdk.morphline.api.CommandBuilder;
import org.kitesdk.morphline.api.MorphlineCompilationException;
import org.kitesdk.morphline.api.MorphlineContext;
import org.kitesdk.morphline.api.MorphlineRuntimeException;
import org.kitesdk.morphline.api.Record;
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.stdio.AbstractParser;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;

/* loaded from: input_file:lib/kite-morphlines-saxon-1.1.0.jar:org/kitesdk/morphline/saxon/ConvertHTMLBuilder.class */
public final class ConvertHTMLBuilder implements CommandBuilder {

    /* loaded from: input_file:lib/kite-morphlines-saxon-1.1.0.jar:org/kitesdk/morphline/saxon/ConvertHTMLBuilder$ConvertHTML.class */
    private static final class ConvertHTML extends AbstractParser {
        private final Charset charset;
        private final boolean omitXMLDeclaration;
        private final XMLReader xmlReader;
        private final HTMLSchema htmlSchema;

        public ConvertHTML(CommandBuilder commandBuilder, Config config, Command command, Command command2, MorphlineContext morphlineContext) throws SAXNotRecognizedException, SAXNotSupportedException {
            super(commandBuilder, config, command, command2, morphlineContext);
            this.htmlSchema = new HTMLSchema();
            this.charset = getConfigs().getCharset(config, "charset", null);
            this.omitXMLDeclaration = getConfigs().getBoolean(config, "omitXMLDeclaration", false);
            this.xmlReader = new Parser();
            this.xmlReader.setProperty(Parser.schemaProperty, this.htmlSchema);
            this.xmlReader.setFeature(Parser.CDATAElementsFeature, getConfigs().getBoolean(config, "noCDATA", false));
            this.xmlReader.setFeature("http://xml.org/sax/features/namespaces", !getConfigs().getBoolean(config, "noNamespaces", true));
            this.xmlReader.setFeature(Parser.ignoreBogonsFeature, getConfigs().getBoolean(config, "noBogons", false));
            this.xmlReader.setFeature(Parser.bogonsEmptyFeature, getConfigs().getBoolean(config, "emptyBogons", false));
            this.xmlReader.setFeature(Parser.rootBogonsFeature, getConfigs().getBoolean(config, "noRootBogons", false));
            this.xmlReader.setFeature(Parser.defaultAttributesFeature, getConfigs().getBoolean(config, "noDefaultAttributes", false));
            this.xmlReader.setFeature(Parser.translateColonsFeature, getConfigs().getBoolean(config, "noColons", false));
            this.xmlReader.setFeature(Parser.restartElementsFeature, getConfigs().getBoolean(config, "noRestart", false));
            this.xmlReader.setFeature(Parser.ignorableWhitespaceFeature, !getConfigs().getBoolean(config, "suppressIgnorableWhitespace", true));
            validateArguments();
        }

        @Override // org.kitesdk.morphline.stdio.AbstractParser
        protected boolean doProcess(Record record, InputStream inputStream) throws IOException {
            try {
                return doProcess2(record, inputStream);
            } catch (SAXNotRecognizedException e) {
                throw new MorphlineRuntimeException(e);
            } catch (SAXNotSupportedException e2) {
                throw new MorphlineRuntimeException(e2);
            } catch (SAXException e3) {
                throw new MorphlineRuntimeException(e3);
            }
        }

        private boolean doProcess2(Record record, InputStream inputStream) throws IOException, SAXException {
            ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(16384);
            XMLWriter xMLWriter = new XMLWriter(new BufferedWriter(new OutputStreamWriter(byteArrayOutputStream, Charsets.UTF_8)));
            xMLWriter.setOutputProperty("encoding", "UTF-8");
            if (this.omitXMLDeclaration) {
                xMLWriter.setOutputProperty("omit-xml-declaration", XmlConsts.XML_SA_YES);
            }
            this.xmlReader.setContentHandler(xMLWriter);
            this.xmlReader.parse(new InputSource(new BufferedReader(new InputStreamReader(inputStream, detectCharset(record, this.charset)))));
            Record copy = record.copy();
            removeAttachments(copy);
            copy.replaceValues(Fields.ATTACHMENT_BODY, byteArrayOutputStream.toByteArray());
            incrementNumRecords();
            return getChild().process(copy);
        }
    }

    @Override // org.kitesdk.morphline.api.CommandBuilder
    public Collection<String> getNames() {
        return Collections.singletonList("convertHTML");
    }

    @Override // org.kitesdk.morphline.api.CommandBuilder
    public Command build(Config config, Command command, Command command2, MorphlineContext morphlineContext) {
        try {
            return new ConvertHTML(this, config, command, command2, morphlineContext);
        } catch (SAXNotRecognizedException e) {
            throw new MorphlineCompilationException("Cannot compile", config, e);
        } catch (SAXNotSupportedException e2) {
            throw new MorphlineCompilationException("Cannot compile", config, e2);
        }
    }
}
