Tuesday, November 24, 2020

Ubuntu 20.4 give a user sudo permissions

 sudo usermod -aG sudo <username>


Install NVDA drivers on Ubuntu 20.4

 https://www.itzgeek.com/post/how-to-install-nvidia-drivers-on-ubuntu-20-04-ubuntu-18-04.html

Wednesday, November 18, 2020

Anaconda Ubuntu 20.4 setup

https://linuxize.com/post/how-to-install-anaconda-on-ubuntu-20-04/

 
Download the installer from:
https://repo.anaconda.com/archive/Anaconda3-5.3.1-Linux-x86_64.sh

Get the hash from:
https://docs.anaconda.com/anaconda/install/hashes/Anaconda3-5.3.1-Linux-x86_64.sh-hash/
d4c4256a8f46173b675dd6a62d12f566ed3487f932bab6bb7058f06c124bcc27

# Comand line
wget https://docs.anaconda.com/anaconda/install/hashes/Anaconda3-5.3.1-Linux-x86_64.sh-hash/

sha256sum Anaconda3-5.3.1-Linux-x86_64.sh 
#d4c4256a8f46173b675dd6a62d12f566ed3487f932bab6bb7058f06c124bcc27 Anaconda3-5.3.1-Linux-x86_64.sh
 
chmod +x Anaconda3-5.3.1-Linux-x86_64.sh 
./Anaconda3-5.3.1-Linux-x86_64.sh  
 

Tuesday, September 29, 2020

SPARK Dataframes example




https://medium.com/expedia-group-tech/deep-dive-into-apache-spark-array-functions-720b8fbfa729



val initial_df = Seq(
("x", 4, 1),
("x", 6, 2),
("z", 7, 3),
("a", 3, 4),
("z", 5, 2),
("x", 7, 3),
("x", 9, 7),
("z", 1, 8),
("z", 4, 9),
("z", 7, 4),
("a", 8, 5),
("a", 5, 2),
("a", 3, 8),
("x", 2, 7),
("z", 1, 9)
).toDF("col1", "col2", "col3")
// Generate Array columns 

scala> initial_df
res58: org.apache.spark.sql.DataFrame = [col1: string, col2: int ... 1 more field]

scala> initial_df.show
+----+----+----+
|col1|col2|col3|
+----+----+----+
|   x|   4|   1|
|   x|   6|   2|
|   z|   7|   3|
|   a|   3|   4|
|   z|   5|   2|
|   x|   7|   3|
|   x|   9|   7|
|   z|   1|   8|
|   z|   4|   9|
|   z|   7|   4|
|   a|   8|   5|
|   a|   5|   2|
|   a|   3|   8|
|   x|   2|   7|
|   z|   1|   9|
+----+----+----+

scala> val full_df = (initial_df.groupBy("col1").agg(collect_list($"col2")))
full_df: org.apache.spark.sql.DataFrame = [col1: string, collect_list(col2): array<int>]

scala> val full_df = (initial_df.groupBy("col1").agg(collect_list($"col2"))).show
+----+------------------+
|col1|collect_list(col2)|
+----+------------------+
|   x|   [4, 6, 7, 9, 2]|
|   z|[7, 5, 1, 4, 7, 1]|
|   a|      [3, 8, 5, 3]|
+----+------------------+

scala> initial_df.schema
res61: org.apache.spark.sql.types.StructType = StructType(StructField(col1,StringType,true), StructField(col2,IntegerType,false), StructField(col3,IntegerType,false))

scala> initial_df.toJSON
res62: org.apache.spark.sql.Dataset[String] = [value: string]

scala> initial_df.toJSON.show
+--------------------+
|               value|
+--------------------+
|{"col1":"x","col2...|
|{"col1":"x","col2...|
|{"col1":"z","col2...|
|{"col1":"a","col2...|
|{"col1":"z","col2...|
|{"col1":"x","col2...|
|{"col1":"x","col2...|
|{"col1":"z","col2...|
|{"col1":"z","col2...|
|{"col1":"z","col2...|
|{"col1":"a","col2...|
|{"col1":"a","col2...|
|{"col1":"a","col2...|
|{"col1":"x","col2...|
|{"col1":"z","col2...|
+--------------------+

scala> val full_df = (initial_df.groupBy("col1")).agg(collect_list($"col2"))
full_df: org.apache.spark.sql.DataFrame = [col1: string, collect_list(col2): array<int>]

scala> val full_df = (initial_df.groupBy("col1")).agg(collect_list($"col2")).show
+----+------------------+
|col1|collect_list(col2)|
+----+------------------+
|   x|   [4, 6, 7, 9, 2]|
|   z|[7, 5, 1, 4, 7, 1]|
|   a|      [3, 8, 5, 3]|
+----+------------------+

scala> val full_df = (initial_df.groupBy("col1").agg(collect_list($"col2").as("array_col1"),collect_list($"col3").as("array_col2")))
full_df: org.apache.spark.sql.DataFrame = [col1: string, array_col1: array<int> ... 1 more field]

scala> full_df.show
+----+------------------+------------------+                                    
|col1|        array_col1|        array_col2|
+----+------------------+------------------+
|   x|   [4, 6, 7, 9, 2]|   [1, 2, 3, 7, 7]|
|   z|[7, 5, 1, 4, 7, 1]|[3, 2, 8, 9, 4, 9]|
|   a|      [3, 8, 5, 3]|      [4, 5, 2, 8]|
+----+------------------+------------------+


scala> initial_df.show
+----+----+----+
|col1|col2|col3|
+----+----+----+
|   x|   4|   1|
|   x|   6|   2|
|   z|   7|   3|
|   a|   3|   4|
|   z|   5|   2|
|   x|   7|   3|
|   x|   9|   7|
|   z|   1|   8|
|   z|   4|   9|
|   z|   7|   4|
|   a|   8|   5|
|   a|   5|   2|
|   a|   3|   8|
|   x|   2|   7|
|   z|   1|   9|
+----+----+----+


scala> val df = full_df.drop("array_col1")
df: org.apache.spark.sql.DataFrame = [col1: string, array_col2: array<int>]

scala> df.show
+----+------------------+
|col1|        array_col2|
+----+------------------+
|   x|   [1, 2, 3, 7, 7]|
|   z|[3, 2, 8, 9, 4, 9]|
|   a|      [4, 5, 2, 8]|
+----+------------------+

scala> val arr_contains_df = df.withColumn("result", array_contains($"array_col2", 3))
arr_contains_df: org.apache.spark.sql.DataFrame = [col1: string, array_col2: array<int> ... 1 more field]

scala> 

scala> arr_contains_df.show()
+----+------------------+------+
|col1|        array_col2|result|
+----+------------------+------+
|   x|   [1, 2, 3, 7, 7]|  true|
|   z|[3, 2, 8, 9, 4, 9]|  true|
|   a|      [4, 5, 2, 8]| false|
+----+------------------+------+


scala> val arr_distinct_df = df.withColumn("result", array_distinct($"array_col2"))
arr_distinct_df: org.apache.spark.sql.DataFrame = [col1: string, array_col2: array<int> ... 1 more field]

scala> 

scala> arr_distinct_df.show()
+----+------------------+---------------+
|col1|        array_col2|         result|
+----+------------------+---------------+
|   x|   [1, 2, 3, 7, 7]|   [1, 2, 3, 7]|
|   z|[3, 2, 8, 9, 4, 9]|[3, 2, 8, 9, 4]|
|   a|      [4, 5, 2, 8]|   [4, 5, 2, 8]|
+----+------------------+---------------+









[cmakefileapi-parser] Code model version (2.1) of cmake-file-api is unexpected. Expecting (2.0). IntelliSense configuration may be incorrect.

https://www.codenong.com/cs107130032/ 

1
2
3
[cmakefileapi-parser] Code model version (2.1) of cmake-file-api is unexpected. Expecting (2.0). IntelliSense configuration may be incorrect.
[cmakefileapi-parser] Code model version (2.1) of cmake-file-api is unexpected. Expecting (2.0). IntelliSense configuration may be incorrect.
[build] Build finished with exit code 0


1
After running the CMake configuration, I opened this file -> "build\.cmake\api\v1\reply\codemodel-v2-ab6f9cacd31dc7acf0c6.json", scrolled all the way down, and changed the value of "minor" from 1 to 0. When I clicked Build again, the warnings were gone. FYI, I don't know if this breaks something else or not, I just figured it out by trail and error.
1
2
3
4
5
    "version" :
    {
        "major" : 2,
        "minor" : 0
    }

CMake : globbing the file list

http://derekmolloy.ie/hello-world-introductions-to-cmake/ 


cmake_minimum_required(VERSION 3.0.0)

project(csim_2020 VERSION 0.1.0)


include(CTest)
enable_testing()

file(GLOB SOURCES "*.cpp")
add_executable(testStudent ${SOURCES})

set(CPACK_PROJECT_NAME ${PROJECT_NAME})
set(CPACK_PROJECT_VERSION ${PROJECT_VERSION})
include(CPack)

Sunday, September 13, 2020

MLIR Toy Tutorial - How to simplify the AST so it is easier to read.

 https://mlir.llvm.org/docs/Tutorials/Toy/Ch-1/

# User defined generic function that operates on unknown shaped arguments.
def multiply_transpose(a, b) {
  return transpose(a) * transpose(b);
}

def main() {
  # Define a variable `a` with shape <2, 3>, initialized with the literal value.
  var a = [[1, 2, 3], [4, 5, 6]];
  var b<2, 3> = [1, 2, 3, 4, 5, 6];

  # This call will specialize `multiply_transpose` with <2, 3> for both
  # arguments and deduce a return type of <3, 2> in initialization of `c`.
  var c = multiply_transpose(a, b);

  # A second call to `multiply_transpose` with <2, 3> for both arguments will
  # reuse the previously specialized and inferred version and return <3, 2>.
  var d = multiply_transpose(b, a);

  # A new call with <3, 2> (instead of <2, 3>) for both dimensions will
  # trigger another specialization of `multiply_transpose`.
  var e = multiply_transpose(c, d);

  # Finally, calling into `multiply_transpose` with incompatible shape will
  # trigger a shape inference error.
  var f = multiply_transpose(transpose(a), c);
}

 ~/llvm-project/build/bin/toyc-ch1 test/Examples/Toy/Ch1/ast.toy -emit=ast > ast.ir 2>&1 

# Strip the comments
sed 's/\@test.*//g' ast.ir

# Lets get rid of the fractional floating point to reduce the "niose"
 sed 's/\@test.*//g' ast.ir | sed 's/.000000e+00//g'

  Module:
    Function 
      Proto 'multiply_transpose' 
      Params: [a, b]
      Block {
        Return
          BinOp: * 
            Call 'transpose' [ 
              var: a 
            ]
            Call 'transpose' [ 
              var: b 
            ]
      } // Block
    Function 
      Proto 'main' 
      Params: []
      Block {
        VarDecl a<> 
          Literal: <2, 3>[ <3>[ 1, 2, 3], <3>[ 4, 5, 6]] 
        VarDecl b<2, 3> 
          Literal: <6>[ 1, 2, 3, 4, 5, 6] 
        VarDecl c<> 
          Call 'multiply_transpose' [ 
            var: a 
            var: b 
          ]
        VarDecl d<> 
          Call 'multiply_transpose' [ 
            var: b 
            var: a 
          ]
        VarDecl e<> 
          Call 'multiply_transpose' [ 
            var: b 
            var: c 
          ]
        VarDecl f<> 
          Call 'multiply_transpose' [ 
            Call 'transpose' [ 
              var: a 
            ]
            var: c 
          ]
      } // Block

-00 option turns paragraph slurp mode on, your regex now can work on multi line.

$ sed 's/\@test.*//g' ast.ir | sed 's/.000000e+00//g' | perl -00pe 's/\n[ \t]*var://gi' | perl -00pe 's/\n[ \t]*]/]/gi'
  Module:
    Function 
      Proto 'multiply_transpose' 
      Params: [a, b]
      Block {
        Return
          BinOp: * 
            Call 'transpose' [  a ]
            Call 'transpose' [  b ]
      } // Block
    Function 
      Proto 'main' 
      Params: []
      Block {
        VarDecl a<> 
          Literal: <2, 3>[ <3>[ 1, 2, 3], <3>[ 4, 5, 6]] 
        VarDecl b<2, 3> 
          Literal: <6>[ 1, 2, 3, 4, 5, 6] 
        VarDecl c<> 
          Call 'multiply_transpose' [  a  b ]
        VarDecl d<> 
          Call 'multiply_transpose' [  b  a ]
        VarDecl e<> 
          Call 'multiply_transpose' [  b  c ]
        VarDecl f<> 
          Call 'multiply_transpose' [ 
            Call 'transpose' [  a ] c ]
      } // Block





Exception in thread "main" org.apache.spark.sql.AnalysisException: Failed to find data source: avro.

class ProcessAvro(sparkSession: SparkSession, fileName: String) {
val df = readAvro()

def readAvro(): sql.DataFrame = {
val df = sparkSession.read.format("avro")
.load(fileName)

df
}
}

Exception in thread "main" org.apache.spark.sql.AnalysisException: Failed to find data source: avro. Avro is built-in but external data source module since Spark 2.4. Please deploy the application as per the deployment section of "Apache Avro Data Source Guide".;

Problem: SPARK does not know where to find the avro source.

Solution: https://bmwieczorek.wordpress.com/tag/avro/


scala> val df = spark.read.format("avro").load("myclass_avro")
org.apache.spark.sql.AnalysisException: Failed to find data source: avro. Please find an Avro package at http://spark.apache.org/third-party-projects.html;
at org.apache.spark.sql.execution.datasources.DataSource$.lookupDataSource(DataSource.scala:634)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:190)
at org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:174)
... 49 elided

scala> val df = spark.read.format("com.databricks.spark.avro").load("myclass_avro")
df: org.apache.spark.sql.DataFrame = [mystring: string, myboolean: boolean ... 4 more fields]

scala> df.show
+--------+---------+-----+------+----------+------------------+
|mystring|myboolean|myint|myLong| myfloat| mydouble|
+--------+---------+-----+------+----------+------------------+
| a| true| 1| 1|0.14285715|0.1428571492433548|
| b| false| 2| 2| 2.0| 2.0|
+--------+---------+-----+------+----------+------------------+

Fix: qualify the format.

 val df = sparkSession.read.format("com.databricks.spark.avro")
.load(fileName)


package org.example

import org.apache.spark
import org.apache.spark.sql
import org.apache.spark.sql.SparkSession

class ProcessAvro(sparkSession: SparkSession, fileName: String) {
val df = readAvro()

def readAvro(): sql.DataFrame = {
val df = sparkSession.read.format("com.databricks.spark.avro")
.load(fileName)

df
}
}

/**
* Hello world!
*
*/
object Main extends App{
println( "Hello World!" )

val sparkSession = SparkSession.builder
.appName("Simple Application")
.config("spark.master", "local")
.getOrCreate()

val processAvro = new ProcessAvro(sparkSession,"file.avro")

val df = processAvro.readAvro()







SBT file for SPARK 3,0, Scala 2.12.4, and JUnit

 name := "reporting"


version := "0.1"

scalaVersion := "2.12.4"

resolvers += "Junit Repository" at "https://mvnrepository.com/artifact/com.novocode/junit-interface"
resolvers += "SPARK Repository" at "https://mvnrepository.com/artifact/org.apache.spark/spark-core"

// https://mvnrepository.com/artifact/com.novocode/junit-interface
libraryDependencies += "com.novocode" % "junit-interface" % "0.11" % Test
// https://mvnrepository.com/artifact/org.apache.spark/spark-core
libraryDependencies += "org.apache.spark" %% "spark-core" % "3.0.0"
https://mvnrepository.com/artifact/com.novocode/junit-interface/0.11


Friday, September 11, 2020

How to negate specific word in regex?

 https://stackoverflow.com/questions/1240275/how-to-negate-specific-word-in-regex

A great way to do this is to use negative lookahead:
^(?!.*bar).*$

Tuesday, September 8, 2020

How to manage Erlang and Elixir versions on Mac or Linux to fix a vscode debugger incompatibility issue with elixir-ls

VSCode issue:

Started ElixirLS debugger v0.5.0

Elixir version: "1.10.2 (compiled with Erlang/OTP 21)"

Erlang version: "22"

ElixirLS compiled with Elixir 1.7.4 and erlang 20

WARNING: Debugging is not supported on Elixir 1.10.2. Please upgrade to at least 1.10.3

more info: https://github.com/elixir-lsp/elixir-ls/issues/158


asdf Version/Package manager
https://medium.com/juq/how-to-manage-elixir-versions-on-mac-or-linux-getting-started-with-elixir-12308e7b6451

https://thinkingelixir.com/install-elixir-using-asdf/

https://alchemist.camp/episodes/asdf-language-versions

Compatible Erlang/Elixir versions 

https://hexdocs.pm/elixir/compatibility-and-deprecations.html

ELIXIR VERSION  SUPPORTED ERLANG/OTP VERSIONS
1.10                             21 - 22 (and Erlang/OTP 23 from v1.10.3)


Linux Version/Package manager
git clone https://github.com/asdf-vm/asdf.git ~/.asdf
echo -e '\n. $HOME/.asdf/asdf.sh' >> ~/.bashrc
echo -e '\n. $HOME/.asdf/completions/asdf.bash' >> ~/.bashrc
asdf update
source ~/.bashrc
asdf update
sudo apt install automake autoconf libreadline-dev libncurses-dev libssl-dev libyaml-dev libxslt-dev libffi-dev libtool unixodbc-dev
asdf plugin-add erlang
asdf plugin-add elixir
asdf list-all erlang
asdf install erlang 22.3.4.10
asdf list-all elixir
# select the version that is compatible wit the erlang version and 
# the compatible otp version for the erlang top that you selected 
# above. In this case we selected with OTP 22
asdf install elixir 1.10.3-otp-22
asdf global erlang 22.3.4.10
asdf global elixir 1.10.3-otp-22

VSCode works!

Started ElixirLS debugger v0.5.0

Elixir version: "1.10.3 (compiled with Erlang/OTP 22)"

Erlang version: "22"

ElixirLS compiled with Elixir 1.7.4 and erlang 20

Compiling 1 file (.ex)

Generated pm app

PmTest

  * doctest Pm.hello/0 (1)

  * doctest Pm.hello/0 (1) (0.1ms)

  * test greets the world

  * test greets the world (0.1ms)

Finished in 0.00 seconds

1 doctest, 1 test, 0 failures

Randomized with seed 285439



Wednesday, February 12, 2020

How to install an older version of scala

https://stackoverflow.com/questions/32767204/how-to-install-an-older-version-of-scala/42908943#42908943

brew search scala
then you will find all the available versions like below 
scala                     scala@2.10                scala@2.11                scalaenv                  scalariform               scalastyle
homebrew/science/scalapack                                                     Caskroom/cask/scala-ide
Then choose whichever version you want to install. say if you want to install scala 2.11 then you can do that by runnig the command like below 
brew install scala@2.11

Tuesday, January 28, 2020

Friday, January 24, 2020

ScalaTest in sbt: is there a way to run a single test without tags?

ScalaTest in sbt: is there a way to run a single test without tags?

a single test can be ran by running, in sbt, 
testOnly *class -- -n Tag
to run only the tests whose name includes the substring "foo".
testOnly *MySuite -- -z foo
For exact match rather than substring, use -t instead of -z.

Updating a mutable map counter value


val symTab = collection.mutable.HashMap[String, Int]()
def update0(s: String): Unit = {
  if (symTab.contains(s)) {
    symTab(s) = symTab(s) + 1
  }
  else {
    symTab(s) = 1
  }
}

// Do this instead
def update1(s: String): Unit = symTab(s) = symTab.getOrElse(s, 0) + 1

scala> update1("x")

scala> symTab("x")
res2: Int = 1

scala> update1("x")

scala> symTab("x")

res4: Int = 2