java - java中有没有什么压缩方法可以减少字符串中的字符数?

标签 java string gzip

我目前在将字符串压缩为 java 中的较少字符时遇到问题。

我有一个很大的字符串,大约有 751396 个字符,需要将字符串压缩成 1500 个字符。

我尝试过 GZIP Compressor、Inflater 和 Deflater,但这些库返回字节数组

然后我尝试了 LZ-String 压缩器,使用 UTF16 编码和 base64 编码我能够获得令人满意的结果,但是这些压缩返回一些既不是字母数字也没有包含在符号中的字符提供的列表。

注意 符号列表是 [+,-,*,/,!,@,#]

是否有任何其他技术可以将字符串压缩为另一个字符数更少的字符串并提供至少 30% 的压缩率。

我使用的 GZip 压缩代码如下:-

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

public class GZIPCompression {

    public static byte[] compress(final String str) throws IOException {
        if ((str == null) || (str.length() == 0)) {
            return null;
        }
        ByteArrayOutputStream obj = new ByteArrayOutputStream();
        GZIPOutputStream gzip = new GZIPOutputStream(obj);
        gzip.write(str.getBytes("UTF-8"));
        gzip.close();
        return obj.toByteArray();
    }

    public static String decompress(final byte[] compressed) throws IOException {
        String outStr = "";
        if ((compressed == null) || (compressed.length == 0)) {
            return "";
        }
        if (isCompressed(compressed)) {
            GZIPInputStream gis = new GZIPInputStream(new ByteArrayInputStream(compressed));
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(gis, "UTF-8"));

            String line;
            while ((line = bufferedReader.readLine()) != null) {
                outStr += line;
            }
        } else {
            outStr = new String(compressed);
        }
        return outStr;
    }

    public static boolean isCompressed(final byte[] compressed) {
        return (compressed[0] == (byte) (GZIPInputStream.GZIP_MAGIC)) && (compressed[1] == (byte) (GZIPInputStream.GZIP_MAGIC >> 8));
    }
}

Inflater & Deflater程序的代码如下:-

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.zip.DataFormatException;
import java.util.zip.Deflater;
import java.util.zip.Inflater;

public class Apple {

    public static void main(String[] args) {
        String sr = "    [120,-100,-19,89,91,79,-21,56,16,-2,43,40,-49,104,55,113,-18,-68,-91,-23,21,104,90,-38,-62,10,-83,120,48,-83,91,34,-46,-92,-21,-92,8,-124,-8,-17,103,-100,-92,77,-22,-38,-25,112,86,27,-119,106,65,84,-86,103,-58,-10,124,-98,-15,101,-66,-66,43,25,126,-99,-112,116,-109,-60,41,81,46,-34,-107,-57,36,121,14,-29,-43,-20,109,3,77,101,-94,-100,43,120,-79,-115,50,63,-39,-58,25,8,52,16,-52,-97,-62,104,-79,19,-88,32,8,-29,37,-114,-77,-70,-92,71,-78,89,125,-36,-65,-33,-107,5,-50,-40,-120,-86,-11,39,82,-31,95,-77,-40,-48,-21,-94,15,82,13,11,-58,-115,112,-102,-6,-55,-126,-103,-7,126,-65,53,-22,-113,-64,-58,123,-63,97,52,37,-85,53,97,-106,-17,74,55,10,87,79,-39,96,-63,-100,65,76,31,-46,40,-116,73,-39,111,-38,3,81,97,18,108,-41,-113,-124,-126,-52,-48,-100,-62,-50,-89,120,-103,-107,-56,108,-99,9,71,52,92,-123,49,52,91,-41,-109,125,19,44,55,9,-51,102,-124,-82,-61,24,71,-96,5,85,-101,-92,25,-76,-78,48,-55,-51,71,-61,67,-103,-92,-49,6,-45,108,75,73,27,-80,-49,-62,53,-101,23,-64,-25,75,-96,89,103,72,-67,48,-44,11,-107,-83,-105,71,105,-8,-126,35,-119,29,-70,-48,74,-69,-10,-106,-18,92,-48,-98,104,122,-90,-85,48,93,10,-118,2,108,-78,-100,102,-55,38,85,46,-44,115,-27,46,-60,-123,23,-2,-106,82,18,-49,-33,-54,21,26,4,-109,-35,-86,-114,-15,107,23,-125,119,36,-125,70,-102,71,-55,23,-58,96,47,5,-60,-13,-61,-24,-80,-28,96,97,105,-31,52,-100,123,101,60,53,-61,112,33,12,48,54,19,-61,-56,74,-112,116,41,-127,-42,74,41,-28,-69,-4,34,-53,109,-68,-64,-113,17,1,-7,-3,77,-18,-8,44,-55,112,4,-39,-77,27,12,68,61,-102,-92,-23,126,112,-45,48,-64,-91,100,-67,14,-45,-76,88,11,-45,-2,-61,-75,-108,-113,-113,-13,67,0,-99,-114,12,64,-91,17,3,-128,-124,108,14,0,-46,28,-99,3,-32,104,66,0,-35,-82,12,64,-91,-111,0,48,-35,6,1,-40,-74,-54,1,-48,84,93,-120,-96,-33,-105,33,-88,52,98,4,-70,-34,96,8,116,-45,114,121,4,-70,24,-63,-27,-91,12,65,-91,-111,32,112,27,-116,-127,-127,44,-115,71,96,-70,66,4,87,87,50,4,-107,70,-116,-64,112,26,-116,-127,-87,89,54,-113,-64,21,-57,-32,-6,90,-122,-96,-46,-120,17,-104,77,34,-80,-112,-50,111,100,36,-55,-94,-31,80,-122,-96,-46,-120,17,-40,77,30,69,-74,-87,-15,89,-124,36,103,81,16,-56,16,84,26,49,2,71,111,112,31,56,-82,-55,-97,69,-70,110,10,17,-116,70,50,4,-107,70,-116,-64,117,26,68,-96,-87,-90,-31,-16,16,92,49,-124,-101,27,25,-124,74,35,-71,-110,-31,-38,108,16,3,-46,85,126,51,27,-106,56,-111,38,19,25,-122,74,35,-63,-48,-24,-99,-96,25,8,-103,-4,-61,66,-78,-99,-89,83,25,-122,74,35,-63,96,-94,38,115,-55,-46,85,-2,72,-78,52,113,28,102,51,25,-122,74,35,-63,96,55,-71,-93,53,-57,52,-8,45,109,-19,-10,-61,-61,-57,-71,-78,43,43,-90,25,-50,-74,48,57,-100,96,-73,41,-95,51,-118,-25,-49,121,93,48,25,-34,-113,6,-82,-83,-62,-3,91,124,116,-37,-123,67,-56,50,12,3,-23,-102,-19,-72,-106,-125,92,56,-25,-64,39,-16,99,-76,-51,54,-37,18,-28,106,-123,87,-60,-117,23,67,-126,-93,-76,27,1,-100,-36,-29,-68,-108,41,-86,-118,-78,18,41,94,-53,71,-75,8,91,46,-80,-50,-21,20,-74,58,-108,-32,-25,-37,-51,-2,-127,93,-82,-93,-16,69,46,20,10,94,-43,-99,127,-74,-84,84,0,31,-40,12,59,41,76,90,127,-58,-77,64,-46,112,83,-106,10,-29,105,-105,57,-73,-49,64,-107,-91,-62,-95,-55,109,-69,110,-94,-101,-24,-40,-92,55,-99,-43,76,28,-29,-40,-30,-22,-54,-81,15,-14,-15,112,28,108,-45,97,-50,82,28,-89,120,-50,122,117,9,-55,-105,120,74,-24,75,56,39,-2,19,-90,43,-112,56,-4,-117,51,47,16,-123,111,126,54,-53,-124,-64,-94,80,-78,-24,-122,36,90,-28,-21,-100,71,2,-60,53,-55,42,-65,-59,-64,-63,-63,98,76,-109,100,89,-74,-58,-112,-5,-84,116,-37,-105,-117,65,94,-65,-58,-117,-68,113,-49,-118,46,-88,-54,70,-53,86,72,-77,39,-82,79,-25,117,19,-46,-73,118,81,-39,-42,21,-125,52,-35,66,21,-99,-105,-60,-12,-83,84,6,121,-23,-122,-93,48,43,36,-112,-54,62,43,-91,-65,-66,-101,-125,-68,-64,111,-60,-49,-5,-1,-50,79,112,52,-33,-73,-5,-8,-105,45,-40,15,-20,91,-71,-79,-18,122,67,118,-78,49,-55,97,-10,6,-55,89,-47,-95,80,-18,-113,39,-106,-25,-121,-3,-81,-123,-3,107,-118,-86,80,50,-71,-34,99,-65,-27,11,123,-113,59,94,112,59,-101,-98,121,65,-5,-84,-43,-71,-13,38,94,-81,-61,-115,-90,25,-68,47,-63,-99,-60,-105,-102,66,-18,75,32,-13,37,-16,-4,-2,-24,55,93,-71,12,36,-82,92,122,-125,-32,108,-40,-15,120,127,116,-109,31,-94,-35,-110,12,-47,30,120,-83,-50,108,-32,127,110,24,95,6,-53,-9,-90,-3,-65,58,-97,89,-27,-121,-113,-4,-74,-84,81,86,-58,17,101,5,23,-54,33,101,85,-29,20,126,66,89,-63,-33,39,73,43,-3,-41,-92,85,-50,66,125,-98,-76,-54,57,-82,127,69,90,25,123,50,74,117,-10,100,-108,-128,-76,-86,-20,-64,72,64,90,33,70,90,53,-55,89,125,85,-54,7,-23,-4,-3,117,98,-108,-113,-125,-8,119,-27,-87,81,62,22,66,39,78,-7,-24,26,79,124,-98,26,-27,-125,-48,17,113,120,106,-108,-113,-61,111,-28,-109,-93,124,44,62,-117,78,-116,-14,113,-43,-93,26,-9,-28,40,31,75,-27,121,-73,19,-92,124,44,126,51,-97,32,-27,99,-13,-44,-37,9,82,62,38,127,36,-99,32,-27,-29,30,-47,86,95,-97,-14,41,-33,-14,-115,-110,62,-59,-77,-108,39,125,10,-23,79,73,-97,-39,-92,-50,-24,-120,56,-97,-33,-90,-123,12,83,-1,21,45,-92,33,1,115,116,-56,11,25,34,94,-56,-74,63,-59,11,-79,95,43,-72,119,-87,-50,-1,-112,-73,-69,-52,-66,-119,-95,-26,-35,-4,38,-122,-66,-119,-95,-1,27,49,4,-97,31,15,0,-88,84]";
        byte[] data = sr.getBytes();
        try {
            String x = new String(decompress(compress(data)));
            System.out.println("decompressed " + x);
        } catch (IOException | DataFormatException e) {
            e.printStackTrace();
        }
    }

    public static byte[] compress(byte[] data) throws IOException {
        Deflater deflater = new Deflater();
        deflater.setInput(data);
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream(data.length);

        deflater.finish();
        byte[] buffer = new byte[1024];
        while (!deflater.finished()) {
            int count = deflater.deflate(buffer);
            outputStream.write(buffer, 0, count);
        }
        outputStream.close();
        byte[] output = outputStream.toByteArray();

        System.out.println("Original: " + data.length);
        System.out.println("Compressed: " + output.length);
        return output;
    }

    public static byte[] decompress(byte[] data) throws IOException, DataFormatException {
        Inflater inflater = new Inflater();
        inflater.setInput(data);
        ByteArrayOutputStream outputStream = new ByteArrayOutputStream(data.length);
        byte[] buffer = new byte[1024];
        while (!inflater.finished()) {
            int count = inflater.inflate(buffer);
            outputStream.write(buffer, 0, count);
        }
        outputStream.close();
        byte[] output = outputStream.toByteArray();
        System.out.println();
        return output;
    } 
}

数据的示例:-

"120,-100,-19,89,91,79,-21,56,16,-2,43,40,-49,104,55,113,-18,-68,-91,-23,21,104,90,-38,-62,10,-83,120,48,-83,91,34,-46,-92,-21,-92,8,-124,-8,-17,103,-100,-92,77,-22,-38,-25,112,86,27,-119,106,65,84,-86,103,-58,-10,124,-98,-15,101,-66,-66,43,25,126,-99,-112,116,-109,-60,41,81,46,-34,-107,-57,36,121,14,-29,-43,-20,109,3,77,101,-94,-100,43,120,-79,-115,50,63,-39,-58,25,8,52,16,-52,-97,-62,104,-79,19,-88,32,8,-29,37,-114,-77,-70,-92,71,-78,89,125,-36,-65,-33,-107,5,-50,-40,-120,-86,-11,39,82,-31,95,-77,-40,-48,-21,-94,15,82,13,11,-58,-115,112,-102,-6,-55,-126,-103,-7,126,-65,53,-22,-113,-64,-58,123,-63,97,52,37,-85,53,97,-106,-17,74,55,10,87,79,-39,96,-63,-100,65,76,31,-46,40,-116,73,-39,111,-38,3,81,97,18,108,-41,-113,-124,-126,-52,-48,-100,-62,-50,-89,120,-103,-107,-56,108,-99,9,71,52,92,-123,49,52,91,-41,-109,125,19,44,55,9,-51,102,-124,-82,-61,24,71,-96,5,85,-101,-92,25,-76,-78,48,-55,-51,71,-61,67,-103,-92,-49,6,-45,108,75,73,27,-80,-49,-62,53,-101,23,-64,-25,75,-96,89,103,72,-67,48,-44,11,-107,-83,-105,71,105,-8,-126,35,-119,29,-70,-48,74,-69,-10,-106,-18,92,-48,-98,104,122,-90,-85,48,93,10,-118,2,108,-78,-100,102,-55,38,85,46,-44,115,-27,46,-60,-123,23,-2,-106,82,18,-49,-33,-54,21,26,4,-109,-35,-86,-114,-15,107,23,-125,119,36,-125,70,-102,71,-55,23,-58,96,47,5,-60,-13,-61,-24,-80,-28,96,97,105,-31,52,-100,123,101,60,53,-61,112,33,12,48,54,19,-61,-56,74,-112,116,41,-127,-42,74,41,-28,-69,-4,34,-53,109,-68,-64,-113,17,1,-7,-3,77,-18,-8,44,-55,112,4,-39,-77,27,12,68,61,-102,-92,-23,126,112,-45,48,-64,-91,100,-67,14,-45,-76,88,11,-45,-2,-61,-75,-108,-113,-113,-13,67,0,-99,-114,12,64,-91,17,3,-128,-124,108,14,0,-46,28,-99,3,-32,104,66,0,-35,-82,12,64,-91,-111,0,48,-35,6,1,-40,-74,-54,1,-48,84,93,-120,-96,-33,-105,33,-88,52,98,4,-70,-34,96,8,116,-45,114,121,4,-70,24,-63,-27,-91,12,65,-91,-111,32,112,27,-116,-127,-127,44,-115,71,96,-70,66,4,87,87,50,4,-107,70,-116,-64,112,26,-116,-127,-87,89,54,-113,-64,21,-57,-32,-6,90,-122,-96,-46,-120,17,-104,77,34,-80,-112,-50,111,100,36,-55,-94,-31,80,-122,-96,-46,-120,17,-40,77,30,69,-74,-87,-15,89,-124,36,103,81,16,-56,16,84,26,49,2,71,111,112,31,56,-82,-55,-97,69,-70,110,10,17,-116,70,50,4,-107,70,-116,-64,117,26,68,-96,-87,-90,-31,-16,16,92,49,-124,-101,27,25,-124,74,35,-71,-110,-31,-38,108,16,3,-46,85,126,51,27,-106,56,-111,38,19,25,-122,74,35,-63,-48,-24,-99,-96,25,8,-103,-4,-61,66,-78,-99,-89,83,25,-122,74,35,-63,96,-94,38,115,-55,-46,85,-2,72,-78,52,113,28,102,51,25,-122,74,35,-63,96,55,-71,-93,53,-57,52,-8,45,109,-19,-10,-61,-61,-57,-71,-78,43,43,-90,25,-50,-74,48,57,-100,96,-73,41,-95,51,-118,-25,-49,121,93,48,25,-34,-113,6,-82,-83,-62,-3,91,124,116,-37,-123,67,-56,50,12,3,-23,-102,-19,-72,-106,-125,92,56,-25,-64,39,-16,99,-76,-51,54,-37,18,-28,106,-123,87,-60,-117,23,67,-126,-93,-76,27,1,-100,-36,-29,-68,-108,41,-86,-118,-78,18,41,94,-53,71,-75,8,91,46,-80,-50,-21,20,-74,58,-108,-32,-25,-37,-51,-2,-127,93,-82,-93,-16,69,46,20,10,94,-43,-99,127,-74,-84,84,0,31,-40,12,59,41,76,90,127,-58,-77,64,-46,112,83,-106,10,-29,105,-105,57,-73,-49,64,-107,-91,-62,-95,-55,109,-69,110,-94,-101,-24,-40,-92,55,-99,-43,76,28,-29,-40,-30,-22,-54,-81,15,-14,-15,112,28,108,-45,97,-50,82,28,-89,120,-50,122,117,9,-55,-105,120,74,-24,75,56,39,-2,19,-90,43,-112,56,-4,-117,51,47,16,-123,111,126,54,-53,-124,-64,-94,80,-78,-24,-122,36,90,-28,-21,-100,71,2,-60,53,-55,42,-65,-59,-64,-63,-63,98,76,-109,100,89,-74,-58,-112,-5,-84,116,-37,-105,-117,65,94,-65,-58,-117,-68,113,-49,-118,46,-88,-54,70,-53,86,72,-77,39,-82,79,-25,117,19,-46,-73,118,81,-39,-42,21,-125,52,-35,66,21,-99,-105,-60,-12,-83,84,6,121,-23,-122,-93,48,43,36,-112,-54,62,43,-91,-65,-66,-101,-125,-68,-64,111,-60,-49,-5,-1,-50,79,112,52,-33,-73,-5,-8,-105,45,-40,15,-20,91,-71,-79,-18,122,67,118,-78,49,-55,97,-10,6,-55,89,-47,-95,80,-18,-113,39,-106,-25,-121,-3,-81,-123,-3,107,-118,-86,80,50,-71,-34,99,-65,-27,11,123,-113,59,94,112,59,-101,-98,121,65,-5,-84,-43,-71,-13,38,94,-81,-61,-115,-90,25,-68,47,-63,-99,-60,-105,-102,66,-18,75,32,-13,37,-16,-4,-2,-24,55,93,-71,12,36,-82,92,122,-125,-32,108,-40,-15,120,127,116,-109,31,-94,-35,-110,12,-47,30,120,-83,-50,108,-32,127,110,24,95,6,-53,-9,-90,-3,-65,58,-97,89,-27,-121,-113,-4,-74,-84,81,86,-58,17,101,5,23,-54,33,101,85,-29,20,126,66,89,-63,-33,39,73,43,-3,-41,-92,85,-50,66,125,-98,-76,-54,57,-82,127,69,90,25,123,50,74,117,-10,100,-108,-128,-76,-86,-20,-64,72,64,90,33,70,90,53,-55,89,125,85,-54,7,-23,-4,-3,117,98,-108,-113,-125,-8,119,-27,-87,81,62,22,66,39,78,-7,-24,26,79,124,-98,26,-27,-125,-48,17,113,120,106,-108,-113,-61,111,-28,-109,-93,124,44,62,-117,78,-116,-14,113,-43,-93,26,-9,-28,40,31,75,-27,121,-73,19,-92,124,44,126,51,-97,32,-27,99,-13,-44,-37,9,82,62,38,127,36,-99,32,-27,-29,30,-47,86,95,-97,-14,41,-33,-14,-115,-110,62,-59,-77,-108,39,125,10,-23,79,73,-97,-39,-92,-50,-24,-120,56,-97,-33,-90,-123,12,83,-1,21,45,-92,33,1,115,116,-56,11,25,34,94,-56,-74,63,-59,11,-79,95,43,-72,119,-87,-50,-1,-112,-73,-69,-52,-66,-119,-95,-26,-35,-4,38,-122,-66,-119,-95,-1,27,49,4,-97,31,15,0,-88,84"

是否有更好的选项来减少字符串中的字符数而不将其转换为字节数组和不需要的字符?

提前致谢

最佳答案

你可以压缩成一个 byte[] 然后用 Base64 编码结果。这将仅使用字母数字和更少的符号,这些符号可以安全地作为文本传输。即它被广泛用于此目的。

public static void main(String[] args) {
    StringBuilder sb = new StringBuilder();
    while (sb.length() < 751396)
        sb.append("Size: ").append(sb.length()).append("\n");
    String s = sb.toString();

    String s2 = deflateBase64(s);
    System.out.println("Uncompressed size = " + s.length() + ", compressed size=" + s2.length());

    String s3 = inflateBase64(s2);
    System.out.println("Same after inflating is " + s3.equals(s));
}

public static String deflateBase64(String text) {
    try {
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        try (Writer writer = new OutputStreamWriter(new DeflaterOutputStream(baos))) {
            writer.write(text);
        }
        return Base64.getEncoder().encodeToString(baos.toByteArray());
    } catch (IOException e) {
        throw new AssertionError(e);
    }
}

public static String inflateBase64(String base64) {
    try (Reader reader = new InputStreamReader(
            new InflaterInputStream(
                    new ByteArrayInputStream(
                            Base64.getDecoder().decode(base64))))) {
        StringWriter sw = new StringWriter();
        char[] chars = new char[1024];
        for (int len; (len = reader.read(chars)) > 0; )
            sw.write(chars, 0, len);
        return sw.toString();
    } catch (IOException e) {
        throw new AssertionError(e);
    }
}

打印

Uncompressed size = 751400, compressed size=219564
Same after inflating is true

关于java - java中有没有什么压缩方法可以减少字符串中的字符数?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/37755996/

相关文章:

java - Google 语言检测 api 回复错误代码 406

java.lang.NoSuchFieldError : gostR28147_cbc

java - 如何正确检查字符串是否仅包含数字或字母?安卓

javascript - 使用 Express 合并、缩小和提供 JavaScript 文件,但响应未压缩

java - 为什么 GZIP "os" header 在 Java 中被硬编码为 FAT?

python - 处理 gzip 或 bzip2ed 下载而不保留压缩数据

java - Spring数据查询示例

java - 确定 TextView 是否需要滚动

python - u 和 r 前缀如何与 python 中的字符串一起使用?

php - 如果带有条件前缀 [+ 和后缀 +],则获取字符串的一部分