Decode Packets, Again (#1156)

* fixing issue where the iterator of a closed stream produces no output; added option to include an entire directory of input files; streamlined workflow between gcapy and preprocessed files

* added documentation; restored functionality of the source's iterator; tidied code

* restored functionlaity/reliability of skip-existing and management of duplicate files and file cleanup; added exclusive log for decode error messages

* collects all literal decode errors falling through the logging cracks

* finally eliminating log spam; tidying up the decoded log files's format

* better subclassing and management of the decoy OutStream

* tightening of the noose

* fixing the readme to account for the -f flag
This commit is contained in:
Fate-JH 2023-12-19 14:52:12 -05:00 committed by GitHub
parent 93c3463985
commit b17d699b5f
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 447 additions and 97 deletions

View file

@ -230,7 +230,7 @@ make install PREFIX=$HOME/.local
```
Now you can run the program like that:
```
psf-decode-packets -o ./output-directory foo.gcap bar.gcap
psf-decode-packets -o ./output-directory -f foo.gcap bar.gcap
```
By default, decodePackets takes in `.gcap` files, but it can also take gcapy ascii files with the
`-p` option. Run `psf-decode-packets --help` to get usage info.

View file

@ -98,8 +98,9 @@ object ObjectCreateBase {
}
} catch {
case ex: Exception =>
log.error(s"Decoding error - ${ex.getClass.toString} - ${ex.toString} ($objectClass)")
Attempt.failure(Err(ex.getMessage))
val msg = s"Decoding error - ${ex.toString} ($objectClass)"
log.error(msg)
Attempt.failure(Err(msg))
}
}

View file

@ -1,9 +1,9 @@
// Copyright (c) 2020 PSForever
package net.psforever.tools.decodePackets
import java.io.{BufferedWriter, File, FileWriter}
import java.io.{File, IOException, OutputStream, PrintStream}
import java.nio.charset.CodingErrorAction
import java.nio.file.{Files, Paths, StandardCopyOption}
import java.nio.file.{Files, Path, Paths, StandardCopyOption}
import net.psforever.packet.PacketCoding
import org.apache.commons.io.FileUtils
import scodec.Attempt.{Failure, Successful}
@ -16,17 +16,23 @@ import scala.sys.process._
import scala.util.Using
case class Config(
inDir: String = System.getProperty("user.dir"),
outDir: String = System.getProperty("user.dir"),
preprocessed: Boolean = false,
skipExisting: Boolean = false,
errorLogs: Boolean = false,
files: Seq[File] = Seq()
)
object DecodePackets {
private val utf8Decoder = Codec.UTF8.decoder.onMalformedInput(CodingErrorAction.REPORT)
/** important: must be Java's System.out! */
private val normalSystemOut = System.out
/** important: must be composed of Java classes; log4j interacts with the system as a Java library */
private var outCapture: PrintStream = _
def main(args: Array[String]): Unit = {
val builder = OParser.builder[Config]
val parser = {
import builder._
OParser.sequence(
@ -34,16 +40,27 @@ object DecodePackets {
opt[String]('o', "out-dir")
.action((x, c) => c.copy(outDir = x))
.text("Output directory"),
opt[String]('i', "in-dir")
.action { (x, c) =>
c.copy(
files = c.files ++ getAllFilePathsFromDirectory(x).collect { case path => path.toFile },
inDir = x
)
}
.text("Input directory"),
opt[Unit]('p', "preprocessed")
.action((x, c) => c.copy(preprocessed = true))
.text("Files are already preprocessed gcapy ascii files (do not call gcapy)"),
.action((_, c) => c.copy(preprocessed = true))
.text("Files are already preprocessed gcapy ASCII files"),
opt[Unit]('s', "skip-existing")
.action((x, c) => c.copy(skipExisting = true))
.text("Skip files that already exist in out-dir"),
arg[File]("<file>...")
.action((_, c) => c.copy(skipExisting = true))
.text("Skip files that already exist in the output directory"),
opt[Unit]('e', "error-logs")
.action((_, c) => c.copy(errorLogs = true))
.text("Write decoding errors to another file in the output directory"),
opt[File]('f', "file")
.unbounded()
.required()
.action((x, c) => c.copy(files = c.files :+ x))
.text("Individual files to decode ...")
)
}
@ -54,140 +71,383 @@ object DecodePackets {
sys.exit(1)
}
val outDir = new File(opts.outDir);
var skipExisting = opts.skipExisting
val outDir = new File(opts.outDir)
if (!outDir.exists()) {
skipExisting = false
outDir.mkdirs()
} else if (outDir.isFile) {
println(s"error: out-dir is file")
sys.exit(1)
}
opts.files.foreach { file =>
if (!file.exists) {
println(s"file ${file.getAbsolutePath} does not exist")
sys.exit(1)
val files: Seq[File] = {
val (readable, unreadable) = opts.files.partition(_.exists())
if (unreadable.nonEmpty) {
println(s"The following ${unreadable.size} input files may not exist and will be skipped:")
unreadable.foreach { file => println(s"- ${file.getAbsolutePath}") }
}
if (skipExisting) {
val (skipped, decodable) = filesWithSameNameInDirectory(opts.outDir, readable)
if (skipped.nonEmpty) {
println(s"The following ${skipped.size} input files will not be decoded (reason: skip-existing flag set):")
skipped.foreach { file => println(s"- ${file.getAbsolutePath}") }
}
decodable
} else {
readable
}
}
if (files.isEmpty) {
println("No input files are detected. Please set an input directory with files or indicate individual files.")
sys.exit(1)
}
println(s"${files.size} input file(s) detected.")
val tmpFolder = new File(System.getProperty("java.io.tmpdir") + "/psforever-decode-packets")
var deleteTempFolderAfterwards: Boolean = false
val tmpFolderPath = System.getProperty("java.io.tmpdir") + "/psforever-decode-packets"
val tmpFolder = new File(tmpFolderPath)
if (!tmpFolder.exists()) {
deleteTempFolderAfterwards = true
tmpFolder.mkdirs()
} else if (getAllFilePathsFromDirectory(tmpFolderPath).isEmpty) {
deleteTempFolderAfterwards = true
}
opts.files.par.foreach { file =>
val outFilePath = opts.outDir + "/" + file.getName.split(".gcap")(0) + ".txt"
val outFile = new File(outFilePath);
val bufferedWriter: (String, String) => WriterWrapper = if (opts.errorLogs) {
errorWriter
} else {
normalWriter
}
if (outFile.exists() && opts.skipExisting) {
return
}
//create a decoy console output stream (suppress output from the decode process)
outCapture = new PrintStream(new DeafMuteStream())
if (opts.preprocessed) {
decodeFilesUsing(files, extension = ".txt", tmpFolder, opts.outDir, bufferedWriter, preprocessed)
} else {
decodeFilesUsing(files, extension = ".gcap", tmpFolder, opts.outDir, bufferedWriter, gcapy)
}
//close and null the decoy console output stream
outCapture.close()
outCapture = null
val tmpFilePath = tmpFolder.getAbsolutePath + "/" + file.getName.split(".gcap")(0) + ".txt"
val writer = new BufferedWriter(new FileWriter(new File(tmpFilePath), false))
if (deleteTempFolderAfterwards) {
//if the temporary directory only exists because of this script, it should be safe to delete it
FileUtils.forceDelete(tmpFolder)
} else {
//delete just the files that were created (if files were overwrote, nothing we can do)
val (deleteThese, _) = filesWithSameNameAs(
files,
getAllFilePathsFromDirectory(tmpFolder.getAbsolutePath).toIndexedSeq.map(_.toFile)
)
deleteThese.foreach(FileUtils.forceDelete)
}
}
/**
* Separate files between those that
* can be found in a given directory location by comparing against file names
* and those that can not.
* @param directory where the existing files may be found
* @param files files to test for matching names
* @see `filesWithSameNameAs`
* @see `getAllFilePathsFromDirectory`
* @return a tuple of file lists, comparing the param files against files in the directory;
* the first are the files whose names match;
* the second are the files whose names do not match
*/
private def filesWithSameNameInDirectory(directory: String, files: Seq[File]): (Seq[File], Seq[File]) = {
filesWithSameNameAs(
getAllFilePathsFromDirectory(directory).toIndexedSeq.map(_.toFile),
files
)
}
/**
* Separate files between those that
* can be found amongst a group of files by comparing against file names
* and those that can not.
* @param existingFiles files whose names are to test against
* @param files files to test for matching names
* @see `lowercaseFileNameString`
* @return a tuple of file lists, comparing the param files against files in the directory;
* the first are the files whose names match;
* the second are the files whose names do not match
*/
private def filesWithSameNameAs(existingFiles: Seq[File], files: Seq[File]): (Seq[File], Seq[File]) = {
val existingFileNames = existingFiles.map { path => lowercaseFileNameString(path.toString) }
files.partition { file => existingFileNames.exists(_.endsWith(lowercaseFileNameString(file.getName))) }
}
/**
* Isolate a file's name from a file's path.
* The path is recognized as the direstory structure information,
* everything to the left of the last file separator character.
* The file extension is included.
* @param filename file path of questionable content and length, but including the file name
* @return file name only
*/
private def lowercaseFileNameString(filename: String): String = {
(filename.lastIndexOf(File.separator) match {
case -1 => filename
case n => filename.substring(n)
}).toLowerCase()
}
/**
* Enumerate over files found in the given directory for later.
* @param directory where the files are found
* @see `Files.isDirectory`
* @see `Files.exists`
* @see `Paths.get`
* @return the discovered file paths
*/
private def getAllFilePathsFromDirectory(directory: String): Array[Path] = {
val dir = Paths.get(directory)
val exists = Files.exists(dir)
if (exists && Files.isDirectory(dir)) {
dir.toFile.listFiles().map(_.toPath)
} else if (!exists) {
println(s"error: in-dir does not exist")
Array.empty
} else {
println(s"error: in-dir is file")
Array.empty
}
}
/**
* The primary entry point into the process of parsing the packet capture files
* and producing the decoded packet data.
* Should be configurable for whatever state that the packet capture file can be structured.
* @param files all of the discovered files for consideration
* @param extension file extension being concatenated
* @param temporaryDirectory destination directory where files temporarily exist while being written
* @param outDirectory destination directory where the files are stored after being written
* @param readDecodeAndWrite next step of the file decoding process
* @see `Files.move`
* @see `Paths.get`
* @see `System.setOut`
*/
private def decodeFilesUsing(
files: Seq[File],
extension: String,
temporaryDirectory: File,
outDirectory: String,
writerConstructor: (String, String)=>WriterWrapper,
readDecodeAndWrite: (File,WriterWrapper)=>Unit
): Unit = {
files.par.foreach { file =>
val fileName = file.getName.split(extension)(0)
val outDirPath = outDirectory + File.separator
val tmpDirPath = temporaryDirectory.getAbsolutePath + File.separator
val writer = writerConstructor(tmpDirPath, fileName)
try {
val lines = if (opts.preprocessed) {
val decoder = Codec.UTF8.decoder.onMalformedInput(CodingErrorAction.REPORT)
Using(Source.fromFile(file.getAbsolutePath)(decoder)) { source => source.getLines() }.get
} else {
Using(Source.fromString(s"gcapy -xa '${file.getAbsolutePath}'" !!)) { source => source.getLines() }.get
}
var linesToSkip = 0
for (line <- lines.drop(1)) {
if (linesToSkip > 0) {
linesToSkip -= 1
} else {
val decodedLine = decodePacket(line.drop(line.lastIndexOf(' ')))
writer.write(s"${shortGcapyString(line)}")
writer.newLine()
if (!isNestedPacket(decodedLine)) {
// Standard line, output as is with a bit of extra whitespace for readability
writer.write(decodedLine.replace(",", ", "))
writer.newLine()
} else {
// Packet with nested packets, including possibly other nested packets within e.g. SlottedMetaPacket containing a MultiPacketEx
writer.write(s"${decodedLine.replace(",", ", ")}")
writer.newLine()
val nestedLinesToSkip = recursivelyHandleNestedPacket(decodedLine, writer)
// Gcapy output has duplicated lines for SlottedMetaPackets, so we can skip over those if found to reduce noise
// The only difference between the original and duplicate lines is a slight difference in timestamp of when the packet was processed
linesToSkip = decodedLine.indexOf("SlottedMetaPacket") match {
case pos if pos >= 0 && nestedLinesToSkip > 0 =>
writer.write(s"Skipping $nestedLinesToSkip duplicate lines")
writer.newLine()
nestedLinesToSkip
case _ => 0
}
}
writer.newLine()
}
}
System.setOut(outCapture)
readDecodeAndWrite(file, writer)
System.setOut(normalSystemOut)
writer.close()
Files.move(Paths.get(tmpFilePath), Paths.get(outFilePath), StandardCopyOption.REPLACE_EXISTING)
writer.getFileNames.foreach { fileNameWithExt =>
Files.move(Paths.get(tmpDirPath + fileNameWithExt), Paths.get(outDirPath + fileNameWithExt), StandardCopyOption.REPLACE_EXISTING)
}
} catch {
case e: Throwable =>
println(s"File ${file.getName} threw an exception")
println(s"File ${file.getName} threw an exception because ${e.getMessage}")
writer.close()
e.printStackTrace()
}
}
FileUtils.forceDelete(tmpFolder)
}
/** Traverse down any nested packets such as SlottedMetaPacket, MultiPacket and MultiPacketEx and add indent for each layer down
* The number of lines to skip will be returned so duplicate lines following SlottedMetaPackets in the gcapy output can be filtered out
*/
def recursivelyHandleNestedPacket(decodedLine: String, writer: BufferedWriter, depth: Int = 0): Int = {
/**
* Read data from ASCII transcribed gcapy files.
* @param file file to read
* @param writer writer for output
* @see `decodeFileContents`
* @see `File.getAbsolutePath`
* @see `Source.fromFile`
* @see `Source.getLines`
* @see `Using`
*/
private def preprocessed(file: File, writer: WriterWrapper): Unit = {
Using(Source.fromFile(file.getAbsolutePath)(utf8Decoder)) { source =>
println(s"${decodeFileContents(writer, source.getLines())} lines read from file ${file.getName}")
}
}
/**
* Read data from gcapy files.
* @param file file to read
* @param writer writer for output
* @see `decodeFileContents`
* @see `File.getAbsolutePath`
* @see `Source.fromFile`
* @see `Source.getLines`
* @see `Using`
*/
private def gcapy(file: File, writer: WriterWrapper): Unit = {
Using(Source.fromString(s"gcapy -xa '${file.getAbsolutePath}'" !!)) { source =>
println(s"${decodeFileContents(writer, source.getLines())} lines read from file ${file.getName}")
}
}
/**
* Decode each line from the original file, decode it, then write it to the output file.
* @param writer writer for output
* @param lines raw packet data from the source
* @throws java.io.IOException if writing data goes incorrectly
* @see `decodePacket`
* @see `isNestedPacket`
* @see `recursivelyHandleNestedPacket`
* @see `shortGcapyString`
* @return number of lines read from the source
*/
@throws(classOf[IOException])
private def decodeFileContents(writer: WriterWrapper, lines: Iterator[String]): Int = {
var linesToSkip = 0
var linesRead: Int = 0
for (line <- lines.drop(1)) {
linesRead += 1
if (linesToSkip > 0) {
linesToSkip -= 1
} else {
val header = shortGcapyString(line)
val decodedLine = decodePacket(header, line.drop(line.lastIndexOf(' ')))
writer.write(decodedLine)
val decodedLineText = decodedLine.text
if (isNestedPacket(decodedLineText)) {
// Packet with nested packets, including possibly other nested packets within e.g. SlottedMetaPacket containing a MultiPacketEx
val nestedLinesToSkip = recursivelyHandleNestedPacket(header, decodedLineText, writer)
// Gcapy output has duplicated lines for SlottedMetaPackets, so we can skip over those if found to reduce noise
// The only difference between the original and duplicate lines is a slight difference in timestamp of when the packet was processed
linesToSkip = decodedLineText.indexOf("SlottedMetaPacket") match {
case pos if pos >= 0 && nestedLinesToSkip > 0 =>
writer.write(str = s"Skipping $nestedLinesToSkip duplicate lines")
writer.newLine()
nestedLinesToSkip
case _ => 0
}
}
writer.newLine()
}
}
linesRead
}
/**
* Traverse down any nested packets such as `SlottedMetaPacket`, `MultiPacket`, and `MultiPacketEx`
* and add indent for each layer down.
* A number of lines to skip will be returned so duplicate lines following the nested packet can be filtered out.
* @param decodedLine decoded packet data
* @param writer writer for output
* @param depth the number of layers to indent
* @throws java.io.IOException if writing data goes incorrectly
* @see `decodePacket`
* @see `IOException`
* @see `isNestedPacket`
* @see `nested`
* @see `Regex.findAllIn`
* @return current indent layer
*/
@throws(classOf[IOException])
private def recursivelyHandleNestedPacket(
header: String,
decodedLine: String,
writer: WriterWrapper,
depth: Int = 0
): Int = {
if (decodedLine.indexOf("Failed to parse") >= 0) return depth
val regex = "(0x[a-f0-9]+)".r
val matches = regex.findAllIn(decodedLine)
var linesToSkip = 0
while (matches.hasNext) {
val packet = matches.next()
for (i <- depth to 0 by -1) {
if (i == 0) writer.write("> ")
else writer.write("-")
for (_ <- depth until 0 by -1) {
writer.write(str = "-")
}
val nextDecodedLine = decodePacket(packet)
writer.write(s"${nextDecodedLine.replace(",", ", ")}")
writer.newLine()
writer.write(str = "> ")
val nextDecoded = nested(decodePacket(header, packet))
val nextDecodedLine = nextDecoded.text
writer.write(nextDecoded)
if (isNestedPacket(nextDecodedLine)) {
linesToSkip += recursivelyHandleNestedPacket(nextDecodedLine, writer, depth + 1)
linesToSkip += recursivelyHandleNestedPacket(header, nextDecodedLine, writer, depth + 1)
}
linesToSkip += 1
}
linesToSkip
}
def shortGcapyString(line: String): String = {
/**
* Reformat data common to gcapy packet data files and their derivation form of ASCII transcription.
* @param line original string
* @return transformed string
*/
private def shortGcapyString(line: String): String = {
val regex = "Game record ([0-9]+) at ([0-9.]+s) is from ([S|C]).* to ([S|C]).*contents (.*)".r
line match {
case regex(index, time, from, to, contents) => {
case regex(index, time, from, _, contents) =>
val direction = if (from == "S") "<<<" else ">>>"
s"#$index @ $time C $direction S ($contents)"
}
}
}
def isNestedPacket(decodedLine: String): Boolean = {
/**
* A nested packet contains more packets.
* @param decodedLine decoded packet data
* @return `true`, if the packet is nested; `false`, otherwise
*/
private def isNestedPacket(decodedLine: String): Boolean = {
// Also matches MultiPacketEx
decodedLine.indexOf("MultiPacket") >= 0 || decodedLine.indexOf("SlottedMetaPacket") >= 0
}
def decodePacket(hexString: String): String = {
PacketCoding.decodePacket(ByteVector.fromValidHex(hexString)) match {
case Successful(value) => value.toString
case Failure(cause) => s"Decoding error '${cause.toString}' for data ${hexString}"
/**
* Actually decode the packet data.
* @param hexString raw packet data
* @see `ByteVector.fromValidHex`
* @see `DecodeError`
* @see `DecodedPacket`
* @see `PacketCoding.decodePacket`
* @return decoded packet data
*/
private def decodePacket(header: String, hexString: String): PacketOutput = {
val byteVector = ByteVector.fromValidHex(hexString)
val result = PacketCoding.decodePacket(byteVector) match {
case Successful(value) => DecodedPacket(Some(header), value.toString.replace(",", ", "))
case Failure(cause) => DecodeError(Some(header), s"Decoding error '${cause.toString}'")
}
result
}
/** produce a wrapper that writes decoded packet data */
private def normalWriter(directoryPath: String, fileName: String): WriterWrapper = {
DecodeWriter(directoryPath, fileName)
}
/**
* When nested, a normal properly decoded packet does not print header information.
* The header usually contains the original encoded hexadecimal string.
* @param in decoded packet data
* @return decoded packet data, potentially without header information
*/
private def nested(in: PacketOutput): PacketOutput = {
in match {
case DecodedPacket(_, text) if !in.text.contains("Decoding error") => DecodedPacket(header=None, text)
case _ => in
}
}
/** produce a wrapper that writes decoded packet data and writes decode errors to a second file */
private def errorWriter(directoryPath: String, fileName: String): WriterWrapper = {
DecodeErrorWriter(directoryPath, fileName)
}
}
/**
* I thought what I'd do was, I'd pretend I was one of those deaf-mutes.
* That way I wouldn't have to have any goddam stupid useless conversations with anybody.
*/
private class DeafMuteStream() extends OutputStream {
override def write(arg0: Int): Unit = { /* do nothing */ }
override def write(arg0: Array[Byte]): Unit = { /* do nothing */ }
override def write(arg0: Array[Byte], arg1: Int, arg2: Int): Unit = { /* do nothing */ }
}

View file

@ -0,0 +1,11 @@
// Copyright (c) 2023 PSForever
package net.psforever.tools.decodePackets
trait PacketOutput {
def header: Option[String]
def text: String
}
final case class DecodedPacket(header: Option[String], text: String) extends PacketOutput
final case class DecodeError(header: Option[String], text: String) extends PacketOutput

View file

@ -0,0 +1,78 @@
// Copyright (c) 2023 PSForever
package net.psforever.tools.decodePackets
import java.io.{BufferedWriter, File, FileWriter}
trait WriterWrapper {
def write(str: String): Unit
def write(str: PacketOutput): Unit
def newLine(): Unit
def close(): Unit
def getFileNames: Seq[String]
}
final case class DecodeWriter(directoryPath: String, fileName: String) extends WriterWrapper {
private val log: BufferedWriter = new BufferedWriter(
new FileWriter(new File(directoryPath + fileName + ".txt"), false)
)
def write(str: String): Unit = log.write(str)
def write(data: PacketOutput): Unit = {
data.header
.collect { str =>
log.write(str)
log.newLine()
log.write(data.text)
log.newLine()
Some(str)
}
.orElse {
log.write(data.text)
log.newLine()
None
}
}
def newLine(): Unit = log.newLine()
def close(): Unit = log.close()
def getFileNames: Seq[String] = Seq(fileName + ".txt")
}
final case class DecodeErrorWriter(directoryPath: String, fileName: String)
extends WriterWrapper {
private val log: DecodeWriter = DecodeWriter(directoryPath, fileName)
private val errorLog: DecodeWriter = DecodeWriter(directoryPath, fileName+".error")
def write(str: String): Unit = {
log.write(str)
if (str.contains("Decoding error")) {
errorLog.write(str)
errorLog.newLine()
}
}
def write(data: PacketOutput): Unit = {
log.write(data)
data match {
case error: DecodeError =>
errorLog.write(error)
errorLog.newLine()
case result if result.text.contains("Decoding error") =>
errorLog.write(data)
errorLog.newLine()
case _ => ()
}
}
def newLine(): Unit = log.newLine()
def close(): Unit = {
log.close()
errorLog.close()
}
def getFileNames: Seq[String] = log.getFileNames ++ errorLog.getFileNames
}