class FtpShow(spark: SparkSession, map: Map[String, String]) { private val path = map(FtpOptions.PATH).stripPrefix("./") private val username = map(FtpOptions.USERNAME) private val passwd = map(FtpOptions.PASSWORD) private val host = map(FtpOptions.HOST) private val port = map.getOrElse(FtpOptions.PORT, "21") def getSampleData: String = { val ftpUri = s"ftp://${username}:${passwd}@${host}:${port}/${path}" spark.sparkContext .wholeTextFiles(ftpUri) .flatMap(s => s._2.split("\n")) .take(DBShow.Num) .mkString("\n") }}
这里读取ftp文件的时候我们采用了sc.wholeTextFiles() 方法,若使用textFile() 方法,则会报错“java.io.IOException: Seek not supported”,ftp 的 InputStream 不支持seek(long:Long) 方法