在一次采访中,技术负责人说我的 scala 代码就像 java 代码一样,但使用了 scala api,他希望我对此进行改进。
我是一名 3 年的 Java 开发人员,我通过关注 Coursera 上的 MOOC 开始了 scala 编码。
谁能告诉我问题是什么以及如何改进?
我得到这份工作是因为我对 Java 的认可,但这份工作是基于 scala 的,编码风格是在试用期间需要解决的一件事。
object Extraction {
// IntelliJ use .idea/modules as current working directory
val FilePathPre = "../../src/main/resources/"
val UserIdFile = "lookup_user.csv"
val ProductIdFile = "lookup_product.csv"
val RatingFile = "agg_ratings.csv"
def readFile(file: String): Iterator[((String, String), String, String)] = {
val Splitter = ","
Source.fromInputStream(this.getClass.getResourceAsStream(file)).getLines()
.map(_.split(Splitter))
.filter(_.size >= 4) // in case line is not valid
.map(x => ((x(0), x(1)), x(2), x(3))) // (userId, ItemId), rating, time
}
def filePrinter(fileName: String, lines: mutable.Map[String, Int]) = {
val file = new File(fileName)
val bw = new BufferedWriter(new FileWriter(file))
lines.toArray.sortWith((a, b) => a._2 < b._2)
.map(x => x._1 + "," + x._2.toString + "\n")
.foreach(bw.write)
bw.close()
}
def aggFilePrinter(fileName: String, lines: mutable.Map[(Int, Int), Float]) = {
val file = new File(fileName)
val bw = new BufferedWriter(new FileWriter(file))
lines.foreach(x => {
val line = x._1._1.toString + "," + x._1._2.toString + "," + (math.round(x._2 * 100.0) / 100.0).toFloat + "\n"
bw.write(line)
})
bw.close()
}
/**
* * une pénalité multiplicative de 0.95 est appliquée au rating
* pour chaque jour d'écart avec le timestamp maximal de input.csv
*
* @param nowTime maximal timestamp at input.csv
* @param pastTime current rating time
* @param rating original rating
* @return final rating multiplied by 0.95 for every day interval from the maximal timestamp
*/
def finalRating(nowTime: String, pastTime: String, rating: String): Float = {
val now =
LocalDateTime.ofInstant(Instant.ofEpochMilli(nowTime.toLong), ZoneId.systemDefault())
val past =
LocalDateTime.ofInstant(Instant.ofEpochMilli(pastTime.toLong), ZoneId.systemDefault())
val diff = ChronoUnit.DAYS.between(past, now)
(math.pow(0.95, diff) * rating.toFloat).toFloat
}
/**
*
* @param file file to extract
*/
def fileDispatcher(file: String) = {
/**
* get idIndice or increment to idIndice and put it to id map
* @param id id in String
* @param idIndice id in Int
* @param idMap userIdMap or productIdMap
* @return (indice for id, max idIndice)
*/
def getIndice(id: String, idIndice: Int, idMap: mutable.Map[String, Int]): (Int, Int) = {
idMap.get(id) match {
case Some(i) => (i, idIndice)
case None => {
val indice = idIndice + 1
idMap += (id -> indice)
(indice, indice)
}
}
}
// 1. scan the file the find the max time
val maxTime = readFile(file).reduce((a, b) => if(a._3 > b._3) a else b)._3
// 2. apply rating condition, calculate rating and return only valid rating lines
val validLines = readFile(file).map(x => (x._1, finalRating(maxTime.toString, x._3, x._2))).filter(_._2 > 0.01)
// 3. loop file lines, sum ratings by (userId, productId), and combine id_String and id_Int
val userIdMap = mutable.Map[String, Int]() // (userId, userIdAsInt)
val productIdMap = mutable.Map[String, Int]() // (productId, productIdAsInt)
val userProductRatingMap = mutable.Map[(Int, Int), Float]() // (userIdAsInt, productIdAsInt, ratingSum)
var userIdIndice = -1
var productIdIndice = -1
validLines.foreach(x => {
val userIdString = x._1._1
val userId = getIndice(userIdString, userIdIndice, userIdMap)
userIdIndice = userId._2
val productIdString = x._1._2
val productId = getIndice(productIdString, productIdIndice, productIdMap)
productIdIndice = productId._2
val key = (userId._1, productId._1)
userProductRatingMap.get(key) match {
case Some(i) => userProductRatingMap += (key -> (i + x._2))
case None => userProductRatingMap += (key -> x._2)
}
})
filePrinter(FilePathPre + UserIdFile, userIdMap)
filePrinter(FilePathPre + ProductIdFile, productIdMap)
aggFilePrinter(FilePathPre + RatingFile, userProductRatingMap)
}
}
最佳答案
除了javish代码之外,您还存在代码风格问题,建议阅读https://docs.scala-lang.org/style/在开始时(这不是最终指南,但开始是可以的)。避免在元组上使用 ._1
,而是使用 match { case (a, b, c) => ... }
。
主要问题是您使用可变结构,因此在 scala 中,默认情况下每个结构都是不可变的,并且应该保持这种状态,除非您有充分的理由让它可变。它更多地是关于函数式编程,从一个角度来看,它试图避免可变性和副作用,你可以通过谷歌搜索更多这个主题。
因此,从代码中删除 mutable.
并将 foreach
替换为例如。 foldLeft
在每次迭代中获取新创建的 immutable.Map
,而不是修改现有的。
关于java - 如何编写scala代码而不是像java代码一样?,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/47325969/