我有很大的文本文件,我想循环遍历列,同时对前一个值和下一个值进行一些比较,然后将与它们关联的列标题存储在列表中以供稍后使用。请给我一些关于如何有效解决这个问题的建议。下面是到目前为止所做的,无法超越尝试使用“for 循环”!谢谢。
import java.io.File;
import java.io.FileNotFoundException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Scanner;
public class Projections {
public static void main(String[] args) {
String fileName= "study_panel.csv";
File file= new File(fileName);
// 2-dimensional list of strings
List<List<String>> lines = new ArrayList<>();
Scanner inputStream;
try{
inputStream = new Scanner(file);
while(inputStream.hasNext()){
String line= inputStream.next();
String[] values = line.split(",");
// Adds the currently parsed line to the 2-dimensional string list
lines.add(Arrays.asList(values));
}
//Compare specific elements in the list
String svalue = lines.get(3).get(1);
String svalue2 = lines.get(3).get(2);
if(svalue.equals(svalue2)){
System.out.println("No recombination");
//store column`s header in list
}
else{
System.out.println("Recombination");
//store column`s header in list
}
inputStream.close();
}catch (FileNotFoundException e) {
e.printStackTrace();
}
// Iterate through the 2-dimensional data and store column headers
int lineNo = 0;
for(List<String> line: lines) {
int columnNo = 0;
String previousValue=None;
String newValue;
for (String value: line) {
//Compare column elements in the 2-dimensional data
if(previousValue.equals(newValue)){
System.out.println("No recombination");
//store column`s header in list
}
else{
System.out.println("Recombination");
//store column`s header in list
}
// System.out.println("Individual " + lineNo + " Site " + columnNo + ": " + value);
columnNo++;
}
lineNo++;
}
}
}
1.研究数据示例
ID,S1_577905,S1_1066894,S1_1293038,S1_1491834
ind1,A,A,A,A
ind2,B,B,B,B
ind3,B,B,A,A
ind4,B,A,B,B
ind5,A,A,H,A
ind6,A,-,B,B
ind7,A,B,A,H
- 引用数据示例
ID,S1_570493,S1_592115,S1_604416,S1_614892,S1_618220,S1_636801,S1_654822,S1_655362,S1_723787,S1_723892,S1_858753,S1_867194,S1_923829,S1_925667,S1_1009779,S1_1009843,S1_1010052,S1_1010123,S1_1010298,S1_1010403,S1_1029733,S1_1039046,S1_1040024,S1_1044174,S1_1044355,S1_1049540,S1_1049657,S1_1050097,S1_1050995,S1_1126726,S1_1166956,S1_1177001,S1_1185437,S1_1188610,S1_1191450,S1_1195593,S1_1195669,S1_1195782,S1_1197394,S1_1207757,S1_1207893,S1_1211271,S1_1211343,S1_1223120,S1_1223377,S1_1237046,S1_1251020,S1_1280051,S1_1280124,S1_1284151,S1_1308043,S1_1340776,S1_1341385,S1_1363675,S1_1363753,S1_1407704,S1_1410354,S1_1431655,S1_1433696,S1_1490941,S1_1507081
A,T,T,A,C,C,T,T,T,G,G,A,A,A,A,G,G,T,G,C,G,C,T,G,C,T,A,G,C,C,C,T,T,A,C,A,G,G,A,G,C,G,T,A,C,C,A,G,A,G,C,C,A,T,T,C,A,T,T,A,G,G
B,C,G,T,A,T,C,C,A,C,A,C,C,C,G,T,A,C,C,T,A,G,A,T,T,G,G,A,A,T,T,C,C,C,T,G,A,A,C,T,T,A,C,T,A,G,T,A,G,A,T,T,G,C,A,T,G,C,C,C,A,T
- 预期结果示例
ID,S1_570493,S1_592115,S1_604416,S1_614892,S1_618220,S1_636801,S1_654822,S1_655362,S1_723787,S1_723892,S1_858753,S1_867194,S1_923829,S1_925667,S1_1009779,S1_1009843,S1_1010052,S1_1010123,S1_1010298,S1_1010403,S1_1029733,S1_1039046,S1_1040024,S1_1044174,S1_1044355,S1_1049540,S1_1049657,S1_1050097,S1_1050995,S1_1126726,S1_1166956,S1_1177001,S1_1185437,S1_1188610,S1_1191450,S1_1195593,S1_1195669,S1_1195782,S1_1197394,S1_1207757,S1_1207893,S1_1211271,S1_1211343,S1_1223120,S1_1223377,S1_1237046,S1_1251020,S1_1280051,S1_1280124,S1_1284151,S1_1308043,S1_1340776,S1_1341385,S1_1363675,S1_1363753,S1_1407704,S1_1410354,S1_1431655,S1_1433696,S1_1490941,S1_1507081
ind1,T,T,A,C,C,T,T,T,G,G,A,A,A,A,G,G,T,G,C,G,C,T,G,C,T,A,G,C,C,C,T,T,A,C,A,G,G,A,G,C,G,T,A,C,C,A,G,A,G,C,C,A,T,T,C,A,T,T,A,G,G
ind2,C,G,T,A,T,C,C,A,C,A,C,C,C,G,T,A,C,C,T,A,G,A,T,T,G,G,A,A,T,T,C,C,C,T,G,A,A,C,T,T,A,C,T,A,G,T,A,G,A,T,T,G,C,A,T,G,C,C,C,A,T
ind3,C,G,T,A,T,C,C,A,C,A,C,C,C,G,T,A,C,C,T,A,G,A,T,T,G,G,A,A,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,C,C,A,T,T,C,A,T,T,A,G,G
ind4,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,C,C,A,T,T,C,A,T,T,A,G,G
ind5,T,T,A,C,C,T,T,T,G,G,A,A,A,A,G,G,T,G,C,G,C,T,G,C,T,A,G,C,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
ind6,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,T,T,G,C,A,T,G,C,C,C,A,T
ind7,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-,-
最佳答案
假设您不想使用 CSV 库(无论如何您的 csv 看起来都很简单),我尝试更新您的代码。
public static void main(String[] args) {
String fileName= "study_panel.csv";
File file= new File(fileName);
// 2-dimensional list of strings
List<List<String>> lines = new ArrayList<>();
List<String> header = null; //Lets store the header in a seperate list
Map<Integer, List<String>> recombinationM = new HashMap<>();
Map<Integer, List<String>> noRecombinationM = new HashMap<>();
Scanner inputStream;
try{
inputStream = new Scanner(file);
while(inputStream.hasNext()){
String line= inputStream.next();
String[] values = line.split(",");
if (header == null){
header= Arrays.asList(values);
continue;//go to the next line as header is read
}
// Adds the currently parsed line to the 2-dimensional string list
lines.add(Arrays.asList(values));
}
inputStream.close();
}catch (FileNotFoundException e) {
e.printStackTrace();
}
// Iterate through the 2-dimensional data and store column headers
for (int i=0; i<lines.size(); i++) {
List<String> recombinationHdr = new ArrayList<>();
List<String> noRecombinationHdr = new ArrayList<>();
for (int j=0; j<lines.get(i).size()-1; j++) {
//Comparison
if (lines.get(i).get(j).equals(lines.get(i).get(j + 1))) {
System.out.println("No recombination");
noRecombinationHdr.add(header.get(j));//To store the current header
//hdrs.add(header.get(j+1)); // To store the next header
} else {
System.out.println("Recombination");
recombinationHdr.add(header.get(j));//To store the current header
//recombinationHdr.add(header.get(j+1)); // To store the next header
}
}
recombinationM.put(i, recombinationHdr);
noRecombinationM.put(i, noRecombinationHdr);
}
//Print maps
System.out.println("== No Recombination ==");
for (Map.Entry<Integer,List<String>> entry : noRecombinationM.entrySet()){
System.out.println("Line: " + entry.getKey() + " - " + entry.getValue().toString());
}
System.out.println("== Recombination ==");
for (Map.Entry<Integer,List<String>> entry : recombinationM.entrySet()){
System.out.println("Line: " + entry.getKey() + " - " + entry.getValue());
}
}
我引入了标题列表,其中存储了 CSV(列)的第一行,因此它与存储在行列表中的其余行分开。并且我介绍了重组 headers 和无重组 headers 的两个输出 Map。 map 键是行号, map 的值是标题的字符串列表。
代码中有主要部分,即扫描仪部分,其中读取 CSV 并将其插入两个列表(标题和行)。第二部分是List迭代和检查。我不确定我是否正确理解了基于下一个/上一个值的值比较的含义,我假设您的意思是在列的当前索引和下一个索引之间的同一行上进行比较: if (lines.get(i).get(j).equals(lines.get(i).get(j + 1))) {
因此,对于第 i 行,它将 j 值与下一个值 j+1 进行比较。
根据上述评估,行和标题 header.get(j)
被存储到重组/noRecombination 映射中。
您的样本结果如下:
== No Recombination ==
Line: 0 - [S1_577905, S1_1066894, S1_1293038, S1_1491834, S1_1564133]
Line: 1 - [S1_577905, S1_1066894, S1_1293038, S1_1491834]
Line: 2 - [S1_1491834, S1_1564133]
Line: 3 - [S1_577905, S1_1066894, S1_1293038, S1_1491834, S1_1564133]
Line: 4 - [S1_577905, S1_1491834]
Line: 5 - [S1_577905, S1_1293038, S1_1491834, S1_1564133]
Line: 6 - [S1_1293038, S1_1491834, S1_1564133]
== Recombination ==
Line: 0 - [ID]
Line: 1 - [ID, S1_1564133]
Line: 2 - [ID, S1_577905, S1_1066894, S1_1293038]
Line: 3 - [ID]
Line: 4 - [ID, S1_1066894, S1_1293038, S1_1564133]
Line: 5 - [ID, S1_1066894]
Line: 6 - [ID, S1_577905, S1_1066894]
如果您不想比较第一列 (ID),您可以从 j=1 开始第二个循环。
关于java - 按列迭代二维数据,在 Java 中处理和存储列标题,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/58725617/