package org.apache.tika.parser.microsoft;

import java.io.FileNotFoundException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import nxt.g00;
import nxt.he;
import nxt.np;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.HWPFOldDocument;
import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.hwpf.model.FieldsDocumentPart;
import org.apache.poi.hwpf.model.PicturesTable;
import org.apache.poi.hwpf.model.StyleDescription;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Field;
import org.apache.poi.hwpf.usermodel.HeaderStories;
import org.apache.poi.hwpf.usermodel.Paragraph;
import org.apache.poi.hwpf.usermodel.Picture;
import org.apache.poi.hwpf.usermodel.Range;
import org.apache.poi.hwpf.usermodel.Table;
import org.apache.poi.hwpf.usermodel.TableCell;
import org.apache.poi.hwpf.usermodel.TableRow;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
import org.apache.poi.poifs.filesystem.DirectoryNode;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.AttributesImpl;

/* loaded from: classes.dex */
public class WordExtractor extends AbstractPOIFSExtractor {
    public static final Map<String, TagAndStyle> k;
    public static final TagAndStyle l;
    public boolean h;
    public boolean i;
    public boolean j;

    /* loaded from: classes.dex */
    public static class PicturesSource {
        public PicturesTable a;
        public Map<Integer, Picture> c;
        public List<Picture> d;
        public List<Picture> e;
        public Set<Picture> b = new HashSet();
        public int f = 0;

        public PicturesSource(HWPFDocument hWPFDocument) {
            PicturesTable picturesTable = hWPFDocument.getPicturesTable();
            this.a = picturesTable;
            this.e = picturesTable.getAllPictures();
            this.c = new HashMap();
            for (Picture picture : this.e) {
                this.c.put(Integer.valueOf(picture.getStartOffset()), picture);
            }
            ArrayList arrayList = new ArrayList();
            this.d = arrayList;
            arrayList.addAll(this.e);
            Range range = hWPFDocument.getRange();
            for (int i = 0; i < range.numCharacterRuns(); i++) {
                CharacterRun characterRun = range.getCharacterRun(i);
                if (this.a.hasPicture(characterRun)) {
                    this.d.set(this.d.indexOf(a(characterRun)), null);
                }
            }
        }

        public final Picture a(CharacterRun characterRun) {
            return this.c.get(Integer.valueOf(characterRun.getPicOffset()));
        }

        public final boolean b(CharacterRun characterRun) {
            return this.a.hasPicture(characterRun);
        }

        public final Picture c() {
            while (this.f < this.d.size()) {
                Picture picture = this.d.get(this.f);
                this.f++;
                if (picture != null) {
                    return picture;
                }
            }
            return null;
        }
    }

    /* loaded from: classes.dex */
    public static class TagAndStyle {
        public String a;
        public String b;

        public TagAndStyle(String str, String str2) {
            this.a = str;
            this.b = str2;
        }

        public boolean a() {
            return this.a.length() == 2 && this.a.startsWith("h");
        }
    }

    static {
        HashMap hashMap = new HashMap();
        k = hashMap;
        TagAndStyle tagAndStyle = new TagAndStyle("p", null);
        l = tagAndStyle;
        hashMap.put("Default", tagAndStyle);
        hashMap.put("Normal", tagAndStyle);
        hashMap.put("heading", new TagAndStyle("h1", null));
        hashMap.put("Heading", new TagAndStyle("h1", null));
        hashMap.put("Title", new TagAndStyle("h1", "title"));
        hashMap.put("Subtitle", new TagAndStyle("h2", "subtitle"));
        hashMap.put("HTML Preformatted", new TagAndStyle("pre", null));
    }

    public WordExtractor(ParseContext parseContext) {
        super(parseContext, null);
    }

    public static TagAndStyle e(String str, boolean z) {
        String sb;
        TagAndStyle tagAndStyle = (TagAndStyle) ((HashMap) k).get(str);
        if (tagAndStyle != null) {
            return tagAndStyle;
        }
        if (str.equals("Table Contents") && z) {
            return l;
        }
        String str2 = null;
        int i = 1;
        if (str.startsWith("heading") || str.startsWith("Heading")) {
            try {
                i = Integer.parseInt(str.substring(str.length() - 1));
            } catch (NumberFormatException unused) {
            }
            StringBuilder u = he.u("h");
            u.append(Math.min(i, 6));
            sb = u.toString();
        } else {
            String replace = str.replace(' ', '_');
            str2 = replace.substring(0, 1).toLowerCase(Locale.ROOT) + replace.substring(1);
            sb = "p";
        }
        return new TagAndStyle(sb, str2);
    }

    public static int f(Range... rangeArr) {
        int i = 0;
        for (Range range : rangeArr) {
            if (range != null) {
                i += range.numParagraphs();
            }
        }
        return i;
    }

    public final void g(CharacterRun characterRun, boolean z, XHTMLContentHandler xHTMLContentHandler) {
        if (!l(characterRun) || characterRun.text().equals("\r")) {
            return;
        }
        if (!z) {
            if (characterRun.isBold() != this.i) {
                if (this.h) {
                    xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", "s", "s");
                    this.h = false;
                }
                if (this.j) {
                    xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", "i", "i");
                    this.j = false;
                }
                if (characterRun.isBold()) {
                    xHTMLContentHandler.startElement("http://www.w3.org/1999/xhtml", "b", "b", XHTMLContentHandler.D2);
                } else {
                    xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", "b", "b");
                }
                this.i = characterRun.isBold();
            }
            if (characterRun.isItalic() != this.j) {
                if (this.h) {
                    xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", "s", "s");
                    this.h = false;
                }
                if (characterRun.isItalic()) {
                    xHTMLContentHandler.startElement("http://www.w3.org/1999/xhtml", "i", "i", XHTMLContentHandler.D2);
                } else {
                    xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", "i", "i");
                }
                this.j = characterRun.isItalic();
            }
            if (characterRun.isStrikeThrough() != this.h) {
                if (characterRun.isStrikeThrough()) {
                    xHTMLContentHandler.startElement("http://www.w3.org/1999/xhtml", "s", "s", XHTMLContentHandler.D2);
                } else {
                    xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", "s", "s");
                }
                this.h = characterRun.isStrikeThrough();
            }
        }
        String replace = characterRun.text().replace('\r', '\n');
        if (replace.endsWith("\u0007")) {
            replace = np.k(replace, -1, 0);
        }
        xHTMLContentHandler.g(replace.replace((char) 30, (char) 8209).replace((char) 31, (char) 8203).replaceAll("[\u0000-\u001f]", "\n"));
    }

    public final void h(Range[] rangeArr, String str, HWPFDocument hWPFDocument, PicturesSource picturesSource, PicturesTable picturesTable, XHTMLContentHandler xHTMLContentHandler) {
        if (f(rangeArr) > 0) {
            xHTMLContentHandler.m("div", "class", str);
            ListManager listManager = new ListManager(hWPFDocument);
            int length = rangeArr.length;
            int i = 0;
            while (i < length) {
                Range range = rangeArr[i];
                if (range != null) {
                    int i2 = 0;
                    while (i2 < range.numParagraphs()) {
                        i2 = i2 + i(range.getParagraph(i2), 0, range, hWPFDocument, FieldsDocumentPart.HEADER, picturesSource, picturesTable, listManager, xHTMLContentHandler) + 1;
                        range = range;
                        i = i;
                    }
                }
                i++;
            }
            xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", "div", "div");
        }
    }

    /* JADX WARN: Type inference failed for: r0v11 */
    /* JADX WARN: Type inference failed for: r0v8 */
    /* JADX WARN: Type inference failed for: r0v9, types: [boolean, int] */
    public final int i(Paragraph paragraph, int i, Range range, HWPFDocument hWPFDocument, FieldsDocumentPart fieldsDocumentPart, PicturesSource picturesSource, PicturesTable picturesTable, ListManager listManager, XHTMLContentHandler xHTMLContentHandler) {
        Paragraph paragraph2;
        TagAndStyle tagAndStyle;
        ?? r0;
        int i2;
        Field fieldByStartOffset;
        String str;
        if (paragraph.isInTable() && paragraph.getTableLevel() > i && i == 0) {
            Table table = range.getTable(paragraph);
            Attributes attributes = XHTMLContentHandler.D2;
            String str2 = "table";
            xHTMLContentHandler.startElement("http://www.w3.org/1999/xhtml", "table", "table", attributes);
            String str3 = "tbody";
            xHTMLContentHandler.startElement("http://www.w3.org/1999/xhtml", "tbody", "tbody", attributes);
            int i3 = 0;
            while (i3 < table.numRows()) {
                TableRow row = table.getRow(i3);
                String str4 = "tr";
                xHTMLContentHandler.startElement("http://www.w3.org/1999/xhtml", "tr", "tr", XHTMLContentHandler.D2);
                int i4 = 0;
                while (i4 < row.numCells()) {
                    TableCell cell = row.getCell(i4);
                    String str5 = "td";
                    xHTMLContentHandler.startElement("http://www.w3.org/1999/xhtml", "td", "td", XHTMLContentHandler.D2);
                    int i5 = 0;
                    while (i5 < cell.numParagraphs()) {
                        i(cell.getParagraph(i5), paragraph.getTableLevel(), cell, hWPFDocument, fieldsDocumentPart, picturesSource, picturesTable, listManager, xHTMLContentHandler);
                        i5++;
                        str5 = str5;
                        str3 = str3;
                        str4 = str4;
                        str2 = str2;
                        i4 = i4;
                        row = row;
                        i3 = i3;
                        table = table;
                    }
                    String str6 = str5;
                    xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", str6, str6);
                    i4++;
                    table = table;
                }
                String str7 = str4;
                xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", str7, str7);
                i3++;
                table = table;
            }
            String str8 = str3;
            Table table2 = table;
            String str9 = str2;
            xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", str8, str8);
            xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", str9, str9);
            return table2.numParagraphs() - 1;
        }
        if (paragraph.text().replaceAll("[\\r\\n\\s]+", "").isEmpty()) {
            return 0;
        }
        if (hWPFDocument.getStyleSheet().numStyles() > paragraph.getStyleIndex()) {
            StyleDescription styleDescription = hWPFDocument.getStyleSheet().getStyleDescription(paragraph.getStyleIndex());
            if (styleDescription == null || styleDescription.getName() == null || styleDescription.getName().length() <= 0) {
                paragraph2 = paragraph;
                tagAndStyle = new TagAndStyle("p", null);
            } else {
                paragraph2 = paragraph;
                r4 = paragraph.isInList() ? listManager.c(paragraph2) : null;
                tagAndStyle = e(styleDescription.getName(), i > 0);
            }
        } else {
            paragraph2 = paragraph;
            tagAndStyle = new TagAndStyle("p", null);
        }
        TagAndStyle tagAndStyle2 = tagAndStyle;
        String str10 = tagAndStyle2.b;
        if (str10 != null) {
            xHTMLContentHandler.m(tagAndStyle2.a, "class", str10);
        } else {
            xHTMLContentHandler.l(tagAndStyle2.a);
        }
        if (r4 != null) {
            xHTMLContentHandler.g(r4);
        }
        int i6 = 0;
        while (i6 < paragraph.numCharacterRuns()) {
            CharacterRun characterRun = paragraph2.getCharacterRun(i6);
            if (characterRun.text().getBytes(StandardCharsets.UTF_8)[0] == 19 && (fieldByStartOffset = hWPFDocument.getFields().getFieldByStartOffset(fieldsDocumentPart, characterRun.getStartOffset())) != null && (fieldByStartOffset.getType() == 58 || fieldByStartOffset.getType() == 56)) {
                CharacterRun markSeparatorCharacterRun = fieldByStartOffset.getMarkSeparatorCharacterRun(range);
                if (markSeparatorCharacterRun != null) {
                    StringBuilder u = he.u("_");
                    u.append(markSeparatorCharacterRun.getPicOffset());
                    str = u.toString();
                } else {
                    str = "_unknown_id";
                }
                String str11 = str;
                AttributesImpl attributesImpl = new AttributesImpl();
                attributesImpl.addAttribute("", "class", "class", "CDATA", "embedded");
                attributesImpl.addAttribute("", "id", "id", "CDATA", str11);
                xHTMLContentHandler.startElement("http://www.w3.org/1999/xhtml", "div", "div", attributesImpl);
                xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", "div", "div");
            }
            if (characterRun.text().equals("\u0013")) {
                i6 += k(paragraph, i6, tagAndStyle2.a(), picturesSource, xHTMLContentHandler);
                i2 = 1;
            } else {
                if (characterRun.text().startsWith("\b")) {
                    for (int i7 = 0; i7 < characterRun.text().length(); i7++) {
                        j(characterRun, picturesSource.c(), picturesSource, xHTMLContentHandler);
                    }
                } else if (picturesTable.hasPicture(characterRun)) {
                    j(characterRun, picturesSource.a(characterRun), picturesSource, xHTMLContentHandler);
                } else {
                    g(characterRun, tagAndStyle2.a(), xHTMLContentHandler);
                }
                i2 = 1;
            }
            i6 += i2;
        }
        if (this.h) {
            xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", "s", "s");
            r0 = 0;
            this.h = false;
        } else {
            r0 = 0;
        }
        if (this.j) {
            xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", "i", "i");
            this.j = r0;
        }
        if (this.i) {
            xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", "b", "b");
            this.i = r0;
        }
        String str12 = tagAndStyle2.a;
        xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", str12, str12);
        return r0;
    }

    public final void j(CharacterRun characterRun, Picture picture, PicturesSource picturesSource, XHTMLContentHandler xHTMLContentHandler) {
        if (!l(characterRun) || picture == null) {
            return;
        }
        String suggestFileExtension = picture.suggestFileExtension();
        int indexOf = picturesSource.e.indexOf(picture) + 1;
        StringBuilder sb = new StringBuilder();
        sb.append("image");
        sb.append(indexOf);
        sb.append(suggestFileExtension.length() > 0 ? g00.e(".", suggestFileExtension) : "");
        String sb2 = sb.toString();
        String mimeType = picture.getMimeType();
        AttributesImpl attributesImpl = new AttributesImpl();
        attributesImpl.addAttribute("", "src", "src", "CDATA", g00.e("embedded:", sb2));
        attributesImpl.addAttribute("", "alt", "alt", "CDATA", sb2);
        xHTMLContentHandler.startElement("http://www.w3.org/1999/xhtml", "img", "img", attributesImpl);
        xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", "img", "img");
        if (picturesSource.b.contains(picture)) {
            return;
        }
        c(TikaInputStream.n(picture.getContent()), sb2, null, mimeType, xHTMLContentHandler, false);
        picturesSource.b.add(picture);
    }

    public final int k(Paragraph paragraph, int i, boolean z, PicturesSource picturesSource, XHTMLContentHandler xHTMLContentHandler) {
        ArrayList arrayList = new ArrayList();
        ArrayList<CharacterRun> arrayList2 = new ArrayList();
        int i2 = i + 1;
        boolean z2 = false;
        while (true) {
            if (i2 >= paragraph.numCharacterRuns()) {
                break;
            }
            CharacterRun characterRun = paragraph.getCharacterRun(i2);
            if (characterRun.text().equals("\u0013")) {
                i2 += k(paragraph, i2 + 1, z, picturesSource, xHTMLContentHandler);
            } else if (characterRun.text().equals("\u0014")) {
                z2 = true;
            } else if (characterRun.text().equals("\u0015")) {
                if (!z2) {
                    arrayList2 = arrayList;
                    arrayList = new ArrayList();
                }
            } else if (z2) {
                arrayList2.add(characterRun);
            } else {
                arrayList.add(characterRun);
            }
            i2++;
        }
        if (arrayList.size() > 0) {
            String text = ((CharacterRun) arrayList.get(0)).text();
            for (int i3 = 1; i3 < arrayList.size(); i3++) {
                StringBuilder u = he.u(text);
                u.append(((CharacterRun) arrayList.get(i3)).text());
                text = u.toString();
            }
            if ((text.startsWith("HYPERLINK") || text.startsWith(" HYPERLINK")) && text.indexOf(34) > -1) {
                int indexOf = text.indexOf(34) + 1;
                int lastIndexOf = text.lastIndexOf(34);
                if (lastIndexOf <= indexOf && (lastIndexOf = text.lastIndexOf(8221)) <= indexOf && (lastIndexOf = text.lastIndexOf(13)) <= indexOf) {
                    lastIndexOf = text.length();
                }
                xHTMLContentHandler.m("a", "href", (indexOf < 0 || indexOf >= lastIndexOf || lastIndexOf > text.length()) ? "" : text.substring(indexOf, lastIndexOf));
                Iterator it = arrayList2.iterator();
                while (it.hasNext()) {
                    g((CharacterRun) it.next(), z, xHTMLContentHandler);
                }
                xHTMLContentHandler.endElement("http://www.w3.org/1999/xhtml", "a", "a");
            } else {
                for (CharacterRun characterRun2 : arrayList2) {
                    if (picturesSource.b(characterRun2)) {
                        j(characterRun2, picturesSource.a(characterRun2), picturesSource, xHTMLContentHandler);
                    } else {
                        g(characterRun2, z, xHTMLContentHandler);
                    }
                }
            }
        } else {
            Iterator it2 = arrayList2.iterator();
            while (it2.hasNext()) {
                g((CharacterRun) it2.next(), z, xHTMLContentHandler);
            }
        }
        return i2 - i;
    }

    public final boolean l(CharacterRun characterRun) {
        return characterRun == null || !characterRun.isMarkedDeleted();
    }

    public void m(DirectoryNode directoryNode, XHTMLContentHandler xHTMLContentHandler) {
        try {
            HWPFDocument hWPFDocument = new HWPFDocument(directoryNode);
            org.apache.poi.hwpf.extractor.WordExtractor wordExtractor = new org.apache.poi.hwpf.extractor.WordExtractor(hWPFDocument);
            HeaderStories headerStories = new HeaderStories(hWPFDocument);
            PicturesTable picturesTable = hWPFDocument.getPicturesTable();
            PicturesSource picturesSource = new PicturesSource(hWPFDocument);
            h(new Range[]{headerStories.getFirstHeaderSubrange(), headerStories.getEvenHeaderSubrange(), headerStories.getOddHeaderSubrange()}, "header", hWPFDocument, picturesSource, picturesTable, xHTMLContentHandler);
            Range range = hWPFDocument.getRange();
            ListManager listManager = new ListManager(hWPFDocument);
            int i = 0;
            while (i < range.numParagraphs()) {
                ListManager listManager2 = listManager;
                i = i + i(range.getParagraph(i), 0, range, hWPFDocument, FieldsDocumentPart.MAIN, picturesSource, picturesTable, listManager2, xHTMLContentHandler) + 1;
                listManager = listManager2;
                range = range;
                picturesSource = picturesSource;
            }
            PicturesSource picturesSource2 = picturesSource;
            for (String str : wordExtractor.getMainTextboxText()) {
                xHTMLContentHandler.h("p", str);
            }
            for (String str2 : wordExtractor.getFootnoteText()) {
                xHTMLContentHandler.h("p", str2);
            }
            for (String str3 : wordExtractor.getCommentsText()) {
                xHTMLContentHandler.h("p", str3);
            }
            for (String str4 : wordExtractor.getEndnoteText()) {
                xHTMLContentHandler.h("p", str4);
            }
            h(new Range[]{headerStories.getFirstFooterSubrange(), headerStories.getEvenFooterSubrange(), headerStories.getOddFooterSubrange()}, "footer", hWPFDocument, picturesSource2, picturesTable, xHTMLContentHandler);
            for (Picture c = picturesSource2.c(); c != null; c = picturesSource2.c()) {
                j(null, c, picturesSource2, xHTMLContentHandler);
            }
            try {
                for (Entry entry : directoryNode.getEntry("ObjectPool")) {
                    if (entry.getName().startsWith("_") && (entry instanceof DirectoryEntry)) {
                        b((DirectoryEntry) entry, xHTMLContentHandler);
                    }
                }
            } catch (FileNotFoundException unused) {
            }
        } catch (OldWordFileFormatException unused2) {
            n(directoryNode, xHTMLContentHandler);
        }
    }

    public void n(DirectoryNode directoryNode, XHTMLContentHandler xHTMLContentHandler) {
        for (String str : new Word6Extractor(new HWPFOldDocument(directoryNode)).getParagraphText()) {
            xHTMLContentHandler.h("p", str);
        }
    }
}
