java - 按列迭代二维数据,在 Java 中处理和存储列标题

标签 java arrays list loops

我有很大的文本文件,我想循环遍历列,同时对前一个值和下一个值进行一些比较,然后将与它们关联的列标题存储在列表中以供稍后使用。请给我一些关于如何有效解决这个问题的建议。下面是到目前为止所做的,无法超越尝试使用“for 循环”!谢谢。

import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Scanner;

public class Projections {

    public static void main(String[] args) {
        String fileName= "study_panel.csv";
        File file= new File(fileName);

        // 2-dimensional list of strings
        List<List<String>> lines = new ArrayList<>();
        Scanner inputStream;
        try{
            inputStream = new Scanner(file);

            while(inputStream.hasNext()){
                String line= inputStream.next();
                String[] values = line.split(",");
                // Adds the currently parsed line to the 2-dimensional string list
                lines.add(Arrays.asList(values));
            }

            //Compare specific elements in the list
            String svalue = lines.get(3).get(1);
            String svalue2 = lines.get(3).get(2);
            if(svalue.equals(svalue2)){
                System.out.println("No recombination");
                //store column`s header in list
            }
            else{
                System.out.println("Recombination");
                //store column`s header in list
            }

            inputStream.close();
        }catch (FileNotFoundException e) {
            e.printStackTrace();
        }

        // Iterate through the 2-dimensional data and store column headers
        int lineNo = 0;
        for(List<String> line: lines) {
            int columnNo = 0;
            String previousValue=None;
            String newValue;

            for (String value: line) {

                //Compare column elements in the 2-dimensional data

                if(previousValue.equals(newValue)){
                    System.out.println("No recombination");
                    //store column`s header in list
                }
                else{
                    System.out.println("Recombination");
                    //store column`s header in list
                }
              // System.out.println("Individual " + lineNo + " Site " + columnNo + ": " + value);
                columnNo++;
            }
            lineNo++;
        }


    }
}

1.研究数据示例

ID,S1_577905,S1_1066894,S1_1293038,S1_1491834
ind1,A,A,A,A
ind2,B,B,B,B
ind3,B,B,A,A
ind4,B,A,B,B
ind5,A,A,H,A
ind6,A,-,B,B
ind7,A,B,A,H

  • 引用数据示例
  • ID,S1_570493,S1_592115,S1_604416,S1_614892,S1_618220,S1_636801,S1_654822,S1_655362,S1_723787,S1_723892,S1_858753,S1_867194,S1_923829,S1_925667,S1_1009779,S1_1009843,S1_1010052,S1_1010123,S1_1010298,S1_1010403,S1_1029733,S1_1039046,S1_1040024,S1_1044174,S1_1044355,S1_1049540,S1_1049657,S1_1050097,S1_1050995,S1_1126726,S1_1166956,S1_1177001,S1_1185437,S1_1188610,S1_1191450,S1_1195593,S1_1195669,S1_1195782,S1_1197394,S1_1207757,S1_1207893,S1_1211271,S1_1211343,S1_1223120,S1_1223377,S1_1237046,S1_1251020,S1_1280051,S1_1280124,S1_1284151,S1_1308043,S1_1340776,S1_1341385,S1_1363675,S1_1363753,S1_1407704,S1_1410354,S1_1431655,S1_1433696,S1_1490941,S1_1507081
    A,T,T,A,C,C,T,T,T,G,G,A,A,A,A,G,G,T,G,C,G,C,T,G,C,T,A,G,C,C,C,T,T,A,C,A,G,G,A,G,C,G,T,A,C,C,A,G,A,G,C,C,A,T,T,C,A,T,T,A,G,G
    B,C,G,T,A,T,C,C,A,C,A,C,C,C,G,T,A,C,C,T,A,G,A,T,T,G,G,A,A,T,T,C,C,C,T,G,A,A,C,T,T,A,C,T,A,G,T,A,G,A,T,T,G,C,A,T,G,C,C,C,A,T
    
    
  • 预期结果示例
  • ID,S1_570493,S1_592115,S1_604416,S1_614892,S1_618220,S1_636801,S1_654822,S1_655362,S1_723787,S1_723892,S1_858753,S1_867194,S1_923829,S1_925667,S1_1009779,S1_1009843,S1_1010052,S1_1010123,S1_1010298,S1_1010403,S1_1029733,S1_1039046,S1_1040024,S1_1044174,S1_1044355,S1_1049540,S1_1049657,S1_1050097,S1_1050995,S1_1126726,S1_1166956,S1_1177001,S1_1185437,S1_1188610,S1_1191450,S1_1195593,S1_1195669,S1_1195782,S1_1197394,S1_1207757,S1_1207893,S1_1211271,S1_1211343,S1_1223120,S1_1223377,S1_1237046,S1_1251020,S1_1280051,S1_1280124,S1_1284151,S1_1308043,S1_1340776,S1_1341385,S1_1363675,S1_1363753,S1_1407704,S1_1410354,S1_1431655,S1_1433696,S1_1490941,S1_1507081
    ind1,T,T,A,C,C,T,T,T,G,G,A,A,A,A,G,G,T,G,C,G,C,T,G,C,T,A,G,C,C,C,T,T,A,C,A,G,G,A,G,C,G,T,A,C,C,A,G,A,G,C,C,A,T,T,C,A,T,T,A,G,G
    ind2,C,G,T,A,T,C,C,A,C,A,C,C,C,G,T,A,C,C,T,A,G,A,T,T,G,G,A,A,T,T,C,C,C,T,G,A,A,C,T,T,A,C,T,A,G,T,A,G,A,T,T,G,C,A,T,G,C,C,C,A,T
    ind3,C,G,T,A,T,C,C,A,C,A,C,C,C,G,T,A,C,C,T,A,G,A,T,T,G,G,A,A,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,C,C,A,T,T,C,A,T,T,A,G,G
    ind4,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,C,C,A,T,T,C,A,T,T,A,G,G
    ind5,T,T,A,C,C,T,T,T,G,G,A,A,A,A,G,G,T,G,C,G,C,T,G,C,T,A,G,C,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
    ind6,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,T,T,G,C,A,T,G,C,C,C,A,T
    ind7,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
    
    

    最佳答案

    假设您不想使用 CSV 库(无论如何您的 csv 看起来都很简单),我尝试更新您的代码。

    public static void main(String[] args) {
            String fileName= "study_panel.csv";
            File file= new File(fileName);
    
            // 2-dimensional list of strings
            List<List<String>> lines = new ArrayList<>();
            List<String> header = null; //Lets store the header in a seperate list
            Map<Integer, List<String>> recombinationM = new HashMap<>();
            Map<Integer, List<String>> noRecombinationM = new HashMap<>();
    
            Scanner inputStream;
            try{
                inputStream = new Scanner(file);
    
                while(inputStream.hasNext()){
                    String line= inputStream.next();
                    String[] values = line.split(",");
    
                    if (header == null){
                        header= Arrays.asList(values);
                        continue;//go to the next line as header is read
                    }
                    // Adds the currently parsed line to the 2-dimensional string list
                    lines.add(Arrays.asList(values));
                }
                inputStream.close();
            }catch (FileNotFoundException e) {
                e.printStackTrace();
            }
    
            // Iterate through the 2-dimensional data and store column headers
    
    
            for (int i=0; i<lines.size(); i++) {
                List<String> recombinationHdr = new ArrayList<>();
                List<String> noRecombinationHdr = new ArrayList<>();
                for (int j=0; j<lines.get(i).size()-1; j++) {
                    //Comparison
                    if (lines.get(i).get(j).equals(lines.get(i).get(j + 1))) {
                        System.out.println("No recombination");
                        noRecombinationHdr.add(header.get(j));//To store the current header
                        //hdrs.add(header.get(j+1)); // To store the next header
                    } else {
                        System.out.println("Recombination");
                        recombinationHdr.add(header.get(j));//To store the current header
                        //recombinationHdr.add(header.get(j+1)); // To store the next header
                    }
                }
                recombinationM.put(i, recombinationHdr);
                noRecombinationM.put(i, noRecombinationHdr);
            }
            //Print maps
            System.out.println("== No Recombination ==");
            for (Map.Entry<Integer,List<String>> entry : noRecombinationM.entrySet()){
               System.out.println("Line: " + entry.getKey() + " - " + entry.getValue().toString());
            }
    
            System.out.println("== Recombination ==");
            for (Map.Entry<Integer,List<String>> entry : recombinationM.entrySet()){
               System.out.println("Line: " + entry.getKey() + " - " + entry.getValue());
            }
        }
    

    我引入了标题列表,其中存储了 CSV(列)的第一行,因此它与存储在行列表中的其余行分开。并且我介绍了重组 headers 和无重组 headers 的两个输出 Map。 map 键是行号, map 的值是标题的字符串列表。

    代码中有主要部分,即扫描仪部分,其中读取 CSV 并将其插入两个列表(标题和行)。第二部分是List迭代和检查。我不确定我是否正确理解了基于下一个/上一个值的值比较的含义,我假设您的意思是在列的当前索引和下一个索引之间的同一行上进行比较: if (lines.get(i).get(j).equals(lines.get(i).get(j + 1))) { 因此,对于第 i 行,它将 j 值与下一个值 j+1 进行比较。

    根据上述评估,行和标题 header.get(j) 被存储到重组/noRecombination 映射中。

    您的样本结果如下:

    == No Recombination ==
    Line: 0 - [S1_577905, S1_1066894, S1_1293038, S1_1491834, S1_1564133]
    Line: 1 - [S1_577905, S1_1066894, S1_1293038, S1_1491834]
    Line: 2 - [S1_1491834, S1_1564133]
    Line: 3 - [S1_577905, S1_1066894, S1_1293038, S1_1491834, S1_1564133]
    Line: 4 - [S1_577905, S1_1491834]
    Line: 5 - [S1_577905, S1_1293038, S1_1491834, S1_1564133]
    Line: 6 - [S1_1293038, S1_1491834, S1_1564133]
    == Recombination ==
    Line: 0 - [ID]
    Line: 1 - [ID, S1_1564133]
    Line: 2 - [ID, S1_577905, S1_1066894, S1_1293038]
    Line: 3 - [ID]
    Line: 4 - [ID, S1_1066894, S1_1293038, S1_1564133]
    Line: 5 - [ID, S1_1066894]
    Line: 6 - [ID, S1_577905, S1_1066894]
    

    如果您不想比较第一列 (ID),您可以从 j=1 开始第二个循环。

    关于java - 按列迭代二维数据,在 Java 中处理和存储列标题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58725617/

    相关文章:

    Java初学者问题简单图解

    java - Hibernate:为两个不同的类映射同一列

    ios - 合并两个数组的每个元素并附加到一个数组

    arrays - 用 DI 序列排列排列

    python-3.x - 按分隔符将列表拆分为子列表

    java - 电子表格API : Get cell entry from specific cell address

    java - 获取 GPS 位置不起作用

    php - 如何使用 PHP 连接两个数组?

    c++ - 使用条件变量的单生产者多消费者缓冲区 pthread 实现

    list - R 中向量和列表数据类型有什么区别?